Please sit on the right half of the room
--->
The majority of time in class will be spent on labs. This is an applied course. The weekly homeworks (50% of your grade) will require you to demonstrate mastery of the techniques taught through the labs.
In lieu of a midterm exam students will present their reverse engineering efforts against malware samples assigned to each group.
In lieu of a final exam students will work on the NSA Codebreaker Challenge during the last two weeks of class.
Assignment | Percentage % |
---|---|
Homework | 50% |
Quizzes | 10% |
Participation | 10% |
Team Presentation | 10% |
Final Project | 20% |
Total | 100% |
Do not attempt to use what you learn in this class to commit illegal acts.
You will learn things in this course that you potentially can use to 'steal' intellectual property and exploit commercial software.
It's not our intent to train a new generation of criminals.
Use your best judgement, what you choose to do with this knowledge is on you.
In the United States even if an artifact or process is protected by trade secrets, reverse-engineering the artifact or process is often lawful as long as it has been legitimately obtained.
Reverse engineering of computer software in the US often falls under both contract law as a breach of contract. This is because most EULAs specifically prohibit it.
A person who is in legal possession of a program, is permitted to reverse-engineer and circumvent its protection if this is necessary in order to achieve "interoperability" (to include publication of results).
HACS408E.umd.edu
discord.gg/ETzn4AUu
The process by which a man-made object is deconstructed to reveal its designs, architecture, or to extract knowledge from the object.
#include <stdio.h>
// Function to calculate the factorial of a number
int factorial(int n) {
if (n == 0 || n == 1) {
return 1;
}
return n * factorial(n - 1);
}
// Function to check if a number is prime
int isPrime(int num) {
if (num <= 1) return 0;
for (int i = 2; i * i <= num; i++) {
if (num % i == 0) return 0;
}
return 1;
}
// Function to print Fibonacci sequence up to n terms
void printFibonacci(int n) {
int a = 0, b = 1, next;
for (int i = 1; i <= n; i++) {
printf("%d ", a);
next = a + b;
a = b;
b = next;
}
printf("\n");
}
int main() {
int choice, number;
// Menu for the user to select an operation
printf("Choose an option:\n");
printf("1. Factorial\n");
printf("2. Prime check\n");
printf("3. Fibonacci series\n");
printf("Enter your choice: ");
scanf("%d", &choice);
section .data
hello db 'Hello, World!',0 ; Null-terminated string
hello_len equ $-hello ; Length of the string
section .bss
num resb 1 ; Reserve 1 byte for input
section .text
global _start
_start:
; Write the "Hello, World!" string to stdout
mov eax, 4 ; syscall number for sys_write
mov ebx, 1 ; file descriptor 1 (stdout)
mov ecx, hello ; pointer to the string
mov edx, hello_len ; length of the string
int 0x80 ; call kernel
; Read a single character from stdin
mov eax, 3 ; syscall number for sys_read
mov ebx, 0 ; file descriptor 0 (stdin)
mov ecx, num ; pointer to buffer
mov edx, 1 ; number of bytes to read
int 0x80 ; call kernel
; Check if the entered character is 'y' (yes)
mov al, [num] ; move the input into al
cmp al, 'y' ; compare with 'y'
je print_yes ; jump if equal to "yes"
; Exit the program
mov eax, 1 ; syscall number for sys_exit
xor ebx, ebx ; return 0
int 0x80 ; call kernel
print_yes:
; Write "You pressed 'y'!" to stdout
mov eax, 4 ; syscall number for sys_write
mov ebx, 1 ; file des
A3B9F4C81D75E60B9A2F0D48E71C53BA
8C4F1E29B76D3A58E90C7B14F2E83D6A
5E7A0F1C4D2938B7A3E69C2D51F48B03
1D6C9A5B7E30F42D8C1B94A6E503D28F
F834B2A9E1F560D72B4A9C5F18372C4D
AB13E9C0482F6D7519BCEA073D49258F
9F42D8A5B17C60F9D3B4E2C78A1F5E0D
C74B1A29E38F6D05A9C17E2B5F803D14
6B1E3A479C5D028F47B9E6A0C18F35D2
AF05B27C8E39D6142F3A7B9E1C5406D8
D27B5F4C8A1E9037B6D23A149C85F0E2
8A2F67B94D01C3E5F2784A3B0D9E5F1C
F9D324B85C7E3A019D0A4E5B76C13F28
C8A1B3F04D5E72B93C6A8D14F29E603D
A9F1837B25C04E1D9A32F8B7D5603C14
E5D37B4A291C0F1E36C85D9B42A7F6E0
0D74C9A51F2B8E3A57D9C601E4B32F78
B0E53A7D9F6C18B2D45A91C02E47B3F9
D18A29F3C57B46E04D92B1A7F60C85E3
5B7A2F4E1C9360D9A8B25C07F3E14D8A
4F08C1E72A9B5D6F04D3B28F9E157C60
7D3A6E1F5B94C08D24B9F1A35C76E20F
A18C2B3E9F74D05B6A2C1E4F3D57B8F9
4D5A29F3B7E1C06D29A8F4E52B7D9C13
B74D5F9C01A23E86B9D0F4A3E27C581F
F018C3D27E5B96A14C2F9D83B57A40E6
A1B9E54D02F378C5D19B7E0A43F62C8D
E29A51B3F7C048D6B94C8A2E73D05F19
C7D28B1F5E9034A6D3C19F47B2A05E8F
F09D32A47B85C16E9A0F38D25B4C7E12
3B8A7F6C1D04E92F05D3A9B2F4E18C57
C15B8F03E27D4A96B9D01C3E74A58F02
9E7D34F8B1A02C57E6A3F4D95C18B0D2
B58A02F9D7C34E01A2D69B43E7F85C12
E93A17C5B0D2F4E6A2F15B8D04C39E70
F1A25C08D3B74F29A03E68B95D14C7E2
2B94F3D7C61E0A58D39A42F1C85B76E0
D8C14B7A3F96E2D0A15F32C9E48B07D5
C91F5B8E72A0D4F6B19C4A35D27F38E0
5A9C27B4F18E63D02A5B94F3E7C81D0F
F6A34E2D19B58C70D13F9B72C4A0E5B7
C7E19B05D3A68F24B1D7F03E4A96C2F8
A92C5B07F34E6D1A08F59B2D4E13C7F6
B73A5D94F1E20C68A35B9F7C48D02E16
9A5F7E3D1C0A42B8D49B26F0E13C5A78
E13B58D2C97A06F4D1A03F9B72C48E5F
4F9B27C6E1A05D3A8C19F4E7B25D03F6
B4D8F1C36A29E07B5A93F2D0E14C67F8
A3D27B4E5C19F68A0F1B7E3D29C54F08
C84A5E19F37B62D0A4C18B7F03D25E9F
B75D34F19A02C8E6B29F1A3D58C07E4F
1F94B37C2A06D5E1A9B08C3F27D64F52
A9E31D5F28C07B6D42F1A9C3E04B57F8
7B1F5C9A34E0D26A98F2B03D47F5E1C6
Source
Assembly
Machine Code
#include <stdio.h>
// Function to calculate the factorial of a number
int factorial(int n) {
if (n == 0 || n == 1) {
return 1;
}
return n * factorial(n - 1);
}
// Function to check if a number is prime
int isPrime(int num) {
if (num <= 1) return 0;
for (int i = 2; i * i <= num; i++) {
if (num % i == 0) return 0;
}
return 1;
}
// Function to print Fibonacci sequence up to n terms
void printFibonacci(int n) {
int a = 0, b = 1, next;
for (int i = 1; i <= n; i++) {
printf("%d ", a);
next = a + b;
a = b;
b = next;
}
printf("\n");
}
int main() {
int choice, number;
// Menu for the user to select an operation
printf("Choose an option:\n");
printf("1. Factorial\n");
printf("2. Prime check\n");
printf("3. Fibonacci series\n");
printf("Enter your choice: ");
scanf("%d", &choice);
section .data
hello db 'Hello, World!',0 ; Null-terminated string
hello_len equ $-hello ; Length of the string
section .bss
num resb 1 ; Reserve 1 byte for input
section .text
global _start
_start:
; Write the "Hello, World!" string to stdout
mov eax, 4 ; syscall number for sys_write
mov ebx, 1 ; file descriptor 1 (stdout)
mov ecx, hello ; pointer to the string
mov edx, hello_len ; length of the string
int 0x80 ; call kernel
; Read a single character from stdin
mov eax, 3 ; syscall number for sys_read
mov ebx, 0 ; file descriptor 0 (stdin)
mov ecx, num ; pointer to buffer
mov edx, 1 ; number of bytes to read
int 0x80 ; call kernel
; Check if the entered character is 'y' (yes)
mov al, [num] ; move the input into al
cmp al, 'y' ; compare with 'y'
je print_yes ; jump if equal to "yes"
; Exit the program
mov eax, 1 ; syscall number for sys_exit
xor ebx, ebx ; return 0
int 0x80 ; call kernel
print_yes:
; Write "You pressed 'y'!" to stdout
mov eax, 4 ; syscall number for sys_write
mov ebx, 1 ; file des
A3B9F4C81D75E60B9A2F0D48E71C53BA
8C4F1E29B76D3A58E90C7B14F2E83D6A
5E7A0F1C4D2938B7A3E69C2D51F48B03
1D6C9A5B7E30F42D8C1B94A6E503D28F
F834B2A9E1F560D72B4A9C5F18372C4D
AB13E9C0482F6D7519BCEA073D49258F
9F42D8A5B17C60F9D3B4E2C78A1F5E0D
C74B1A29E38F6D05A9C17E2B5F803D14
6B1E3A479C5D028F47B9E6A0C18F35D2
AF05B27C8E39D6142F3A7B9E1C5406D8
D27B5F4C8A1E9037B6D23A149C85F0E2
8A2F67B94D01C3E5F2784A3B0D9E5F1C
F9D324B85C7E3A019D0A4E5B76C13F28
C8A1B3F04D5E72B93C6A8D14F29E603D
A9F1837B25C04E1D9A32F8B7D5603C14
E5D37B4A291C0F1E36C85D9B42A7F6E0
0D74C9A51F2B8E3A57D9C601E4B32F78
B0E53A7D9F6C18B2D45A91C02E47B3F9
D18A29F3C57B46E04D92B1A7F60C85E3
5B7A2F4E1C9360D9A8B25C07F3E14D8A
4F08C1E72A9B5D6F04D3B28F9E157C60
7D3A6E1F5B94C08D24B9F1A35C76E20F
A18C2B3E9F74D05B6A2C1E4F3D57B8F9
4D5A29F3B7E1C06D29A8F4E52B7D9C13
B74D5F9C01A23E86B9D0F4A3E27C581F
F018C3D27E5B96A14C2F9D83B57A40E6
A1B9E54D02F378C5D19B7E0A43F62C8D
E29A51B3F7C048D6B94C8A2E73D05F19
C7D28B1F5E9034A6D3C19F47B2A05E8F
F09D32A47B85C16E9A0F38D25B4C7E12
3B8A7F6C1D04E92F05D3A9B2F4E18C57
C15B8F03E27D4A96B9D01C3E74A58F02
9E7D34F8B1A02C57E6A3F4D95C18B0D2
B58A02F9D7C34E01A2D69B43E7F85C12
E93A17C5B0D2F4E6A2F15B8D04C39E70
F1A25C08D3B74F29A03E68B95D14C7E2
2B94F3D7C61E0A58D39A42F1C85B76E0
D8C14B7A3F96E2D0A15F32C9E48B07D5
C91F5B8E72A0D4F6B19C4A35D27F38E0
5A9C27B4F18E63D02A5B94F3E7C81D0F
F6A34E2D19B58C70D13F9B72C4A0E5B7
C7E19B05D3A68F24B1D7F03E4A96C2F8
A92C5B07F34E6D1A08F59B2D4E13C7F6
B73A5D94F1E20C68A35B9F7C48D02E16
9A5F7E3D1C0A42B8D49B26F0E13C5A78
E13B58D2C97A06F4D1A03F9B72C48E5F
4F9B27C6E1A05D3A8C19F4E7B25D03F6
B4D8F1C36A29E07B5A93F2D0E14C67F8
A3D27B4E5C19F68A0F1B7E3D29C54F08
C84A5E19F37B62D0A4C18B7F03D25E9F
B75D34F19A02C8E6B29F1A3D58C07E4F
1F94B37C2A06D5E1A9B08C3F27D64F52
A9E31D5F28C07B6D42F1A9C3E04B57F8
7B1F5C9A34E0D26A98F2B03D47F5E1C6
Source
Assembly
Machine Code
This class is focused on the reverse engineering of compiled software and related formats.
General-purpose, imperative, low-level, memory management, compiled
"C was originally developed at Bell Labs by Dennis Ritchie between 1972 and 1973 to make utilities running on Unix" -Wiki
if ( 1 && !0 ){
printf("Very true!\n");
} else {
printf("Very false!\n");
}
while ("Is this non-zero?") {
printf("Who thinks yes?\n");
}
int i; /* C99 */
for (; i < 10; i++) {
print("What is wrong with this?\n");
}
What are pointers used for in C?
What are pointers used for in Java?
//stack memory, how many bytes are used?
int a[10] = {1, 2};
//pointer to stack memory
int *pa = a;
//pointer to heap memory
int *b = malloc(sizeof(int)*10);
//pointer to heap memory pointing to pointers,
// how many bytes are used?
int **c = malloc(sizeof(int*)*3);
c[0] = b;
c[1] = b;
c[2] = a+1;
**(c+2) = 72;
printf("What is c[2][0]? %d\n", c[2][0]);
printf("What is c[2][-1]? %d\n", c[2][-1]);
Referencing memory on the stack or heap
Arrays are contiguous blocks of memory. Static 2D Arrays are contiguous.
These data structures and how they are represented in memory are important!
int foo(int x, int y, int *z) {
if (x > 0 && y > 0){
*z = x*y;
return SUCCESS;
}
return FAILURE;
}
int main() {
int a = -1;
if (foo(1, 2, &a) == FAILURE) {
return 1;
}
printf("%d\n", a);
return a;
}
How do you return multiple items from a function?
Sometimes one return value is not enough, so pointer as arguments can help receive more data from a function.
What is the Stack?
What is the Heap?
*if you know where to look
/* 1. What is wrong here? */
int *baz(){
int i = 10;
return &i;
}
/* 2. Is this valid? */
int *bar() {
static int i = 5;
return &i;
}
/* 3. Is this valid? */
int *foo() {
void *yeet = malloc(sizeof(double)*10);
return (int *)yeet;
}
/* 4. Is this valid? */
int main() {
char *yeett = (char *)foo();
printf(yeett);
/* 5. Missing something? */
return 0;
}
Why do we care about details like these?
int *foo(int c, int d) {
char e;
void *yeet = malloc(sizeof(c)*d);
/* Stop! */
return (int *)yeet;
}
int main(int argc, char *argv[]) {
int a = 5;
int b = 7;
char *bar = foo(a,b);
return 0;
}
High
Low
argv
argc
ret addr
old base
5
7
7
junk
5
ret addr
old base
junk
junk
Low-level programming language that is translated into the the architecture's byte-code. Here we will use the x86_64 architecture.
What is x86_64?
64-bit architecture that supports 32-bit. Used by most modern computers.
eax - accumulator
ebx - base
ecx - counter
edx - data
edi - destination
esi - source
esp - stack pointer
ebp - base stack frame pointer
High speed memory used to store information temporarily
* not accessible like the other registers
The names do not matter for the use of the registers, but sometimes are hints to how they are used.
Same as x86 but now we have more and larger registers!
Heres the big picture, but we don't need all these!
Floating Point Registers
Flags
And a bunch of other stuff...
rax - 64-bits, 8-bytes, quad-word (qword)
eax - 32-bits, 4-bytes, double-word (dword)
ax - 16-bits, 2-bytes, word
al/ah - 8-bits, 1-byte, byte
- eax is the lower 32-bits of rax
- ax is the lower 16-bits of eax and rax
- And so on
- This is true for ebx, ecx, edx, and the numbered registers as well.
- Not all registers have byte sized references, such as esp and ebp
We will focus on Intel syntax, but know that AT&T syntax exists.
Main difference is in the source and destination operand order
edi - destination esi - source
mov edi, esi
Intel
mov %esi, %edi
AT&T
In both examples, the contents of the esi register are copied to the edi register
mov eax, 0x01 ;put 1 into eax
mov [eax], 0x01 ;put 1 into the address in eax
mov eax, [esi] ;put contents of address (esi)
push eax ;put contents of eax on top of stack
push 0x01 ;put 1 on top of stack
; and inc the stack pointer
pop eax ;put contents top of the stack into eax,
; and dec the stack pointer
[] indicates a access to memory
[base + index*size + offset]
; size can only be 1,2,4,8
[arr + esi*4 + 0] ;array of int
What could the offset be used for?
lea eax, ecx ;invalid
lea eax, [ecx] ;valid, equivalent to mov eax, ecx
lea eax, [ecx + edx] ;mov eax, ecx + edx*1 (implicit 1)
lea eax, [ecx + edx*3] ;invalid, valid numbers are 1,2,4,8
lea eax, [eax + edx*4] ;can be thought of as
; eax = (DWORD *)eax[edx] why?
lea does not access memory with the displacement operator! It only does the pointer arithmetic with no dereference!
jmp addr ;addr could be a register
; with an address or a label
this_is_a_label:
call addr ; functions are just labels (addresses), with a calling convention
ret ; using the correct calling convention,
; ret returns from the called function
syscall ; more commonly seen as 'int' for interrupt
je addr ; or jz -- if zero flag is set
jg addr ; or ja -- if greater - signed or unsigned
jl addr ; or jb -- if less - signed or unsigned
jge addr ; -- if greater or equal to
jle addr ; -- if less or equal to
js addr ; -- if sign bit is set (if negative)
carry -- used to indicate carry in arithmetic operation
zero -- if a value is zero or comparison equals 0
sign -- if negative
overflow -- if overflow occurred
Each flag is set from certain instructions
int *foo(c,d) {
char e;
void *yeet = malloc(sizeof(c)*d);
/* Stop! */
return (int *)yeet;
}
int main(int argc, char *argv[]) {
int a = 5;
int b = 7;
char *bar = foo(a,b);
return 0;
}
foo:
push ebp
mov ebp, esp
sub esp, 8 ;make room
mov ecx, [ebp + 4] ;get c
mov edx, [ebp + 8] ;get d
mov eax, 4 ;sizeof(int)
mul edx ;sizeof(int)*d
push eax ;arg to malloc
call malloc
add esp, 4 ;clean up arg
mov [esp], eax ;store in yeet
add esp, 8 ;clean up locals
pop ebp
ret
main:
push ebp
mov ebp, esp
push 5 ;a
push 7 ;b
sub esp, 4 ;bar
mov eax, [esp + 4] ;get b
mov ebx, [esp + 8] ;get a
push ebx ;d
push eax ;c
call foo
add esp, 8 ;clean up args
mov [esp], eax ;store in bar
add esp, 12 ;clean up locals
mov eax, 0 ;return 0
pop ebp
ret