Low-level programming language that is translated into the the architecture's byte-code. Here we will use the x86_64 architecture.
What is x86_64?
64-bit architecture that supports 32-bit. Used by most modern computers.
eax - accumulator
ebx - base
ecx - counter
edx - data
edi - destination
esi - source
esp - stack pointer
ebp - base stack frame pointer
High speed memory used to store information temporarily
* not accessible like the other registers
The names do not matter for the use of the registers, but sometimes are hints to how they are used.
Same as x86 but now we have more and larger registers!
Heres the big picture, but we don't need all these!
Floating Point Registers
Flags
And a bunch of other stuff...
rax - 64-bits, 8-bytes, quad-word (qword)
eax - 32-bits, 4-bytes, double-word (dword)
ax - 16-bits, 2-bytes, word
al/ah - 8-bits, 1-byte, byte
- eax is the lower 32-bits of rax
- ax is the lower 16-bits of eax and rax
- And so on
- This is true for ebx, ecx, edx, and the numbered registers as well.
- Not all registers have byte sized references, such as esp and ebp
We will focus on Intel syntax, but know that AT&T syntax exists.
Main difference is in the source and destination operand order
edi - destination esi - source
mov edi, esi
Intel
mov %esi, %edi
AT&T
In both examples, the contents of the esi register are copied to the edi register
If interested in disassembly then this diagram is useful to you!
We will use libraries that handle disassembly for us, but you should be familiar with the concept.
mov eax, 0x01 ;put 1 into eax
mov [eax], 0x01 ;put 1 into the address in eax
mov eax, [esi] ;put contents of address (esi)
push eax ;put contents of eax on top of stack
push 0x01 ;put 1 on top of stack
; and inc the stack pointer
pop eax ;put contents top of the stack into eax,
; and dec the stack pointer
[] indicates a access to memory*
[base + index*size + offset]
; size can only be 1,2,4,8
[arr + esi*4 + 0] ;array of int
*does not mean the memory is actually accessed
What could the offset be used for?
lea eax, ecx ;invalid
lea eax, [ecx] ;valid, equivalent to mov eax, ecx
lea eax, [ecx + edx] ;mov eax, ecx + edx*1 (implicit 1)
lea eax, [ecx + edx*3] ;invalid, valid numbers are 1,2,4,8
lea eax, [eax + edx*4] ;can be thought of as
; eax = (DWORD *)eax[edx] why?
lea does not access memory with the displacement operator! It only does the pointer arithmetic with no dereference!
jmp addr ;addr could be a register
; with an address or a label
this_is_a_label:
call addr ; functions are just labels (addresses), with a calling convention
ret ; using the correct calling convention,
; ret returns from the called function
syscall ; more commonly seen as 'int' for interrupt
je addr ; or jz -- if zero flag is set
jg addr ; or ja -- if greater - signed or unsigned
jl addr ; or jb -- if less - signed or unsigned
jge addr ; -- if greater or equal to
jle addr ; -- if less or equal to
js addr ; -- if sign bit is set (if negative)
carry -- used to indicate carry in arithmetic operation
zero -- if a value is zero or comparison equals 0
sign -- if negative
overflow -- if overflow occurred
Each flag is set from certain instructions
int *foo(c,d) {
char e;
void *yeet = malloc(sizeof(c)*d);
/* Stop! */
return (int *)yeet;
}
int main(int argc, char *argv[]) {
int a = 5, 3;
int b = 7;
char *bar = foo((b,a),b);
return 0;
}
foo:
push ebp
mov ebp, esp
sub esp, 8 ;make room
mov ecx, [ebp + 4] ;get c
mov edx, [ebp + 8] ;get d
mov eax, 4 ;sizeof(int)
mul edx ;sizeof(int)*d
push eax ;arg to malloc
call malloc
add esp, 4 ;clean up arg
mov [esp], eax ;store in yeet
add esp, 8 ;clean up locals
pop ebp
ret
main:
push ebp
mov ebp, esp
push 5 ;a
push 7 ;b
sub esp, 4 ;bar
mov eax, [esp] ;get b
mov ebx, [esp + 4] ;get a
push ebx ;d
push eax ;c
call foo
add esp, 8 ;clean up args
mov [esp], eax ;store in bar
add esp, 12 ;clean up locals
mov eax, 0 ;return 0
pop ebp
ret