Applied Reverse Engineering (HACS408E)

Please sit on the right half of the room

--->

Course Introduction

Labs

The majority of time in class will be spent on labs. This is an applied course. The weekly homeworks (50% of your grade) will require you to demonstrate mastery of the techniques taught through the labs.

Team Presentation

In lieu of a midterm exam students will present their reverse engineering efforts against malware samples assigned to each group.

Final Project

In lieu of a final exam students will work on the NSA Codebreaker Challenge during the last two weeks of class.

Grading

Assignment Percentage %
Homework 50%
Quizzes 10%
Participation 10%
Team Presentation 10%
Final Project 20%
Total 100%

Ethics

  • Do not attempt to use what you learn in this class to commit illegal acts.

  • You will learn things in this course that you potentially can use to 'steal' intellectual property and exploit commercial software.

  • It's not our intent to train a new generation of criminals.

  • Use your best judgement, what you choose to do with this knowledge is on you.

Legal

  • In the United States even if an artifact or process is protected by trade secrets, reverse-engineering the artifact or process is often lawful as long as it has been legitimately obtained.

  • Reverse engineering of computer software in the US often falls under both contract law as a breach of contract. This is because most EULAs specifically prohibit it.

  • A person who is in legal possession of a program, is permitted to reverse-engineer and circumvent its protection if this is necessary in order to achieve "interoperability" (to include publication of results).

Website

HACS408E.umd.edu

Discord

discord.gg/ETzn4AUu

Questions?

Survey

Introduction to Reverse Engineering

What is Reverse Engineering

The process by which a man-made object is deconstructed to reveal its designs, architecture, or to extract knowledge from the object.

Compilation Stack

#include <stdio.h>

// Function to calculate the factorial of a number
int factorial(int n) {
    if (n == 0 || n == 1) {
        return 1;
    }
    return n * factorial(n - 1);
}

// Function to check if a number is prime
int isPrime(int num) {
    if (num <= 1) return 0;
    for (int i = 2; i * i <= num; i++) {
        if (num % i == 0) return 0;
    }
    return 1;
}

// Function to print Fibonacci sequence up to n terms
void printFibonacci(int n) {
    int a = 0, b = 1, next;
    for (int i = 1; i <= n; i++) {
        printf("%d ", a);
        next = a + b;
        a = b;
        b = next;
    }
    printf("\n");
}

int main() {
    int choice, number;

    // Menu for the user to select an operation
    printf("Choose an option:\n");
    printf("1. Factorial\n");
    printf("2. Prime check\n");
    printf("3. Fibonacci series\n");
    printf("Enter your choice: ");
    scanf("%d", &choice);
section .data
    hello db 'Hello, World!',0  ; Null-terminated string
    hello_len equ $-hello        ; Length of the string

section .bss
    num resb 1                  ; Reserve 1 byte for input

section .text
    global _start

_start:
    ; Write the "Hello, World!" string to stdout
    mov eax, 4                  ; syscall number for sys_write
    mov ebx, 1                  ; file descriptor 1 (stdout)
    mov ecx, hello              ; pointer to the string
    mov edx, hello_len          ; length of the string
    int 0x80                    ; call kernel

    ; Read a single character from stdin
    mov eax, 3                  ; syscall number for sys_read
    mov ebx, 0                  ; file descriptor 0 (stdin)
    mov ecx, num                ; pointer to buffer
    mov edx, 1                  ; number of bytes to read
    int 0x80                    ; call kernel

    ; Check if the entered character is 'y' (yes)
    mov al, [num]               ; move the input into al
    cmp al, 'y'                 ; compare with 'y'
    je print_yes                ; jump if equal to "yes"

    ; Exit the program
    mov eax, 1                  ; syscall number for sys_exit
    xor ebx, ebx                ; return 0
    int 0x80                    ; call kernel

print_yes:
    ; Write "You pressed 'y'!" to stdout
    mov eax, 4                  ; syscall number for sys_write
    mov ebx, 1                  ; file des
A3B9F4C81D75E60B9A2F0D48E71C53BA
8C4F1E29B76D3A58E90C7B14F2E83D6A
5E7A0F1C4D2938B7A3E69C2D51F48B03
1D6C9A5B7E30F42D8C1B94A6E503D28F
F834B2A9E1F560D72B4A9C5F18372C4D
AB13E9C0482F6D7519BCEA073D49258F
9F42D8A5B17C60F9D3B4E2C78A1F5E0D
C74B1A29E38F6D05A9C17E2B5F803D14
6B1E3A479C5D028F47B9E6A0C18F35D2
AF05B27C8E39D6142F3A7B9E1C5406D8
D27B5F4C8A1E9037B6D23A149C85F0E2
8A2F67B94D01C3E5F2784A3B0D9E5F1C
F9D324B85C7E3A019D0A4E5B76C13F28
C8A1B3F04D5E72B93C6A8D14F29E603D
A9F1837B25C04E1D9A32F8B7D5603C14
E5D37B4A291C0F1E36C85D9B42A7F6E0
0D74C9A51F2B8E3A57D9C601E4B32F78
B0E53A7D9F6C18B2D45A91C02E47B3F9
D18A29F3C57B46E04D92B1A7F60C85E3
5B7A2F4E1C9360D9A8B25C07F3E14D8A
4F08C1E72A9B5D6F04D3B28F9E157C60
7D3A6E1F5B94C08D24B9F1A35C76E20F
A18C2B3E9F74D05B6A2C1E4F3D57B8F9
4D5A29F3B7E1C06D29A8F4E52B7D9C13
B74D5F9C01A23E86B9D0F4A3E27C581F
F018C3D27E5B96A14C2F9D83B57A40E6
A1B9E54D02F378C5D19B7E0A43F62C8D
E29A51B3F7C048D6B94C8A2E73D05F19
C7D28B1F5E9034A6D3C19F47B2A05E8F
F09D32A47B85C16E9A0F38D25B4C7E12
3B8A7F6C1D04E92F05D3A9B2F4E18C57
C15B8F03E27D4A96B9D01C3E74A58F02
9E7D34F8B1A02C57E6A3F4D95C18B0D2
B58A02F9D7C34E01A2D69B43E7F85C12
E93A17C5B0D2F4E6A2F15B8D04C39E70
F1A25C08D3B74F29A03E68B95D14C7E2
2B94F3D7C61E0A58D39A42F1C85B76E0
D8C14B7A3F96E2D0A15F32C9E48B07D5
C91F5B8E72A0D4F6B19C4A35D27F38E0
5A9C27B4F18E63D02A5B94F3E7C81D0F
F6A34E2D19B58C70D13F9B72C4A0E5B7
C7E19B05D3A68F24B1D7F03E4A96C2F8
A92C5B07F34E6D1A08F59B2D4E13C7F6
B73A5D94F1E20C68A35B9F7C48D02E16
9A5F7E3D1C0A42B8D49B26F0E13C5A78
E13B58D2C97A06F4D1A03F9B72C48E5F
4F9B27C6E1A05D3A8C19F4E7B25D03F6
B4D8F1C36A29E07B5A93F2D0E14C67F8
A3D27B4E5C19F68A0F1B7E3D29C54F08
C84A5E19F37B62D0A4C18B7F03D25E9F
B75D34F19A02C8E6B29F1A3D58C07E4F
1F94B37C2A06D5E1A9B08C3F27D64F52
A9E31D5F28C07B6D42F1A9C3E04B57F8
7B1F5C9A34E0D26A98F2B03D47F5E1C6

Source

Assembly

Machine Code

Reverse Engineering

#include <stdio.h>

// Function to calculate the factorial of a number
int factorial(int n) {
    if (n == 0 || n == 1) {
        return 1;
    }
    return n * factorial(n - 1);
}

// Function to check if a number is prime
int isPrime(int num) {
    if (num <= 1) return 0;
    for (int i = 2; i * i <= num; i++) {
        if (num % i == 0) return 0;
    }
    return 1;
}

// Function to print Fibonacci sequence up to n terms
void printFibonacci(int n) {
    int a = 0, b = 1, next;
    for (int i = 1; i <= n; i++) {
        printf("%d ", a);
        next = a + b;
        a = b;
        b = next;
    }
    printf("\n");
}

int main() {
    int choice, number;

    // Menu for the user to select an operation
    printf("Choose an option:\n");
    printf("1. Factorial\n");
    printf("2. Prime check\n");
    printf("3. Fibonacci series\n");
    printf("Enter your choice: ");
    scanf("%d", &choice);
section .data
    hello db 'Hello, World!',0  ; Null-terminated string
    hello_len equ $-hello        ; Length of the string

section .bss
    num resb 1                  ; Reserve 1 byte for input

section .text
    global _start

_start:
    ; Write the "Hello, World!" string to stdout
    mov eax, 4                  ; syscall number for sys_write
    mov ebx, 1                  ; file descriptor 1 (stdout)
    mov ecx, hello              ; pointer to the string
    mov edx, hello_len          ; length of the string
    int 0x80                    ; call kernel

    ; Read a single character from stdin
    mov eax, 3                  ; syscall number for sys_read
    mov ebx, 0                  ; file descriptor 0 (stdin)
    mov ecx, num                ; pointer to buffer
    mov edx, 1                  ; number of bytes to read
    int 0x80                    ; call kernel

    ; Check if the entered character is 'y' (yes)
    mov al, [num]               ; move the input into al
    cmp al, 'y'                 ; compare with 'y'
    je print_yes                ; jump if equal to "yes"

    ; Exit the program
    mov eax, 1                  ; syscall number for sys_exit
    xor ebx, ebx                ; return 0
    int 0x80                    ; call kernel

print_yes:
    ; Write "You pressed 'y'!" to stdout
    mov eax, 4                  ; syscall number for sys_write
    mov ebx, 1                  ; file des
A3B9F4C81D75E60B9A2F0D48E71C53BA
8C4F1E29B76D3A58E90C7B14F2E83D6A
5E7A0F1C4D2938B7A3E69C2D51F48B03
1D6C9A5B7E30F42D8C1B94A6E503D28F
F834B2A9E1F560D72B4A9C5F18372C4D
AB13E9C0482F6D7519BCEA073D49258F
9F42D8A5B17C60F9D3B4E2C78A1F5E0D
C74B1A29E38F6D05A9C17E2B5F803D14
6B1E3A479C5D028F47B9E6A0C18F35D2
AF05B27C8E39D6142F3A7B9E1C5406D8
D27B5F4C8A1E9037B6D23A149C85F0E2
8A2F67B94D01C3E5F2784A3B0D9E5F1C
F9D324B85C7E3A019D0A4E5B76C13F28
C8A1B3F04D5E72B93C6A8D14F29E603D
A9F1837B25C04E1D9A32F8B7D5603C14
E5D37B4A291C0F1E36C85D9B42A7F6E0
0D74C9A51F2B8E3A57D9C601E4B32F78
B0E53A7D9F6C18B2D45A91C02E47B3F9
D18A29F3C57B46E04D92B1A7F60C85E3
5B7A2F4E1C9360D9A8B25C07F3E14D8A
4F08C1E72A9B5D6F04D3B28F9E157C60
7D3A6E1F5B94C08D24B9F1A35C76E20F
A18C2B3E9F74D05B6A2C1E4F3D57B8F9
4D5A29F3B7E1C06D29A8F4E52B7D9C13
B74D5F9C01A23E86B9D0F4A3E27C581F
F018C3D27E5B96A14C2F9D83B57A40E6
A1B9E54D02F378C5D19B7E0A43F62C8D
E29A51B3F7C048D6B94C8A2E73D05F19
C7D28B1F5E9034A6D3C19F47B2A05E8F
F09D32A47B85C16E9A0F38D25B4C7E12
3B8A7F6C1D04E92F05D3A9B2F4E18C57
C15B8F03E27D4A96B9D01C3E74A58F02
9E7D34F8B1A02C57E6A3F4D95C18B0D2
B58A02F9D7C34E01A2D69B43E7F85C12
E93A17C5B0D2F4E6A2F15B8D04C39E70
F1A25C08D3B74F29A03E68B95D14C7E2
2B94F3D7C61E0A58D39A42F1C85B76E0
D8C14B7A3F96E2D0A15F32C9E48B07D5
C91F5B8E72A0D4F6B19C4A35D27F38E0
5A9C27B4F18E63D02A5B94F3E7C81D0F
F6A34E2D19B58C70D13F9B72C4A0E5B7
C7E19B05D3A68F24B1D7F03E4A96C2F8
A92C5B07F34E6D1A08F59B2D4E13C7F6
B73A5D94F1E20C68A35B9F7C48D02E16
9A5F7E3D1C0A42B8D49B26F0E13C5A78
E13B58D2C97A06F4D1A03F9B72C48E5F
4F9B27C6E1A05D3A8C19F4E7B25D03F6
B4D8F1C36A29E07B5A93F2D0E14C67F8
A3D27B4E5C19F68A0F1B7E3D29C54F08
C84A5E19F37B62D0A4C18B7F03D25E9F
B75D34F19A02C8E6B29F1A3D58C07E4F
1F94B37C2A06D5E1A9B08C3F27D64F52
A9E31D5F28C07B6D42F1A9C3E04B57F8
7B1F5C9A34E0D26A98F2B03D47F5E1C6

Source

Assembly

Machine Code

Why RE software?

  • Vulnerability Analysis
  • Cryptanalysis
  • Malware Analysis
  • Interoperability

Other Software Stacks

  • Python/Ruby  Interpreter
  • Java  Java bytecode  JVM
  • Typescript  Javascript  V8 engine
  • LLVM  WASM
  • And so on

Scope of this class

This class is focused on the reverse engineering of compiled software and related formats.

Linux Binary Analysis

  • ELF
  • Static/Dynamic
  • Malware

Some Windows

  • PE vs ELF
  • Calling conventions

Some Mobile

  • TBD

Class VM (Kali + Tools)

(or your own machine)

Tools we'll learn

  • Binaryninja
  • Ghidra
  • Gdb
  • x64dbg
  • Windbg
  • Readelf
  • Objdump
  • Binwalk
  • And more

Questions?

C Programming

General-purpose, imperative, low-level, memory management, compiled

"C was originally developed at Bell Labs by Dennis Ritchie between 1972 and 1973 to make utilities running on Unix" -Wiki

Control Statements

  • If-statements
  • While-loops
  • Do-whiles
  • For-loops
  • Switch-statements
  • Gotos
if ( 1 && !0 ){
    printf("Very true!\n");
} else {
    printf("Very false!\n");
}
while ("Is this non-zero?") {
    printf("Who thinks yes?\n");
} 
int i; /* C99 */
for (; i < 10; i++) {
    print("What is wrong with this?\n");
}

Pointers & Dynamic Memory

What are pointers used for in C?

  • referencing memory on the stack or heap
  • referencing arrays
  • extra return values

What are pointers used for in Java?

  • Objects, Objects, Objects
//stack memory, how many bytes are used?
int a[10] = {1, 2}; 

//pointer to stack memory
int *pa = a;

//pointer to heap memory
int *b = malloc(sizeof(int)*10); 

//pointer to heap memory pointing to pointers,
// how many bytes are used?
int **c = malloc(sizeof(int*)*3);

c[0] = b;
c[1] = b;
c[2] = a+1;

**(c+2) = 72;

printf("What is c[2][0]? %d\n", c[2][0]);
printf("What is c[2][-1]? %d\n", c[2][-1]);

Referencing memory on the stack or heap

Arrays are contiguous blocks of memory. Static 2D Arrays are contiguous.

Pointers & Dynamic Memory

These data structures and how they are represented in memory are important! 

int foo(int x, int y, int *z) {
    if (x > 0 && y > 0){
        *z = x*y;
        return SUCCESS;
    }
    return FAILURE;
}

int main() {
    int a = -1;
    if (foo(1, 2, &a) == FAILURE) {
        return 1;
    }

    printf("%d\n", a);
    return a;
}

How do you return multiple items from a function?

Sometimes one return value is not enough, so pointer as arguments can help receive more data from a function. 

Pointers & Dynamic Memory

Stack and Heap

What is the Stack?

 

 

 

 

 

What is the Heap?

*if you know where to look

  • Memory used to separate function frames for local memory usage
  • Starts at a high address and grows down
  • Dynamic memory that is globally accessible*
  • Must be allocated & freed manually

C experts now?

/* 1. What is wrong here? */
int *baz(){
    int i = 10;
    return &i;
}
/* 2. Is this valid? */
int *bar() {
    static int i = 5;
    return &i;
}    
/* 3. Is this valid? */
int *foo() {
    void *yeet = malloc(sizeof(double)*10);
    return (int *)yeet;
}

/* 4. Is this valid? */
int main() {
    char *yeett = (char *)foo();
    printf(yeett);
    
    /* 5. Missing something? */
    return 0;
}

Why do we care about details like these?

What does the stack look like?

int *foo(int c, int d) {
    char e;
    void *yeet = malloc(sizeof(c)*d);
    /* Stop! */
    return (int *)yeet;
}

int main(int argc, char *argv[]) {
    int a = 5;
    int b = 7;
    char *bar = foo(a,b);
    
    return 0;
}

High

Low

argv

argc

ret addr

old base

5

7

7

junk

5

ret addr

old base

junk

junk

Lab 1

x86 Assembly

Assembly

Low-level programming language that is translated into the the architecture's byte-code. Here we will use the x86_64 architecture.

What is x86_64?

64-bit architecture that supports 32-bit. Used by most modern computers.

Registers x86

  • eax - accumulator
  • ebx - base
  • ecx - counter
  • edx - data
  • edi - destination
  • esi - source
  • esp - stack pointer
  • ebp - base stack     
          frame pointer 
  • eip* - instruction pointer
  • Flags - set from instructions

High speed memory used to store information temporarily

* not accessible like the other registers

The names do not matter for the use of the registers, but sometimes are hints to how they are used.

Registers x64

Same as x86 but now we have more and larger registers! 

Heres the big picture, but we don't need all these!

Floating Point Registers

Flags

And a bunch of other stuff...

Sizes

  • rax   - 64-bits, 8-bytes, quad-word (qword) 
  • eax   - 32-bits, 4-bytes, double-word (dword)
  • ax    - 16-bits, 2-bytes, word   
  • al/ah - 8-bits,  1-byte,  byte

- eax is the lower 32-bits of rax

- ax is the lower 16-bits of eax and rax

- And so on

- This is true for ebx, ecx, edx, and the numbered registers as well. 

- Not all registers have byte sized references, such as esp and ebp

Intel vs AT&T

We will focus on Intel syntax, but know that AT&T syntax exists.

 

Main difference is in the source and destination operand order

 

edi - destination
esi - source
mov edi, esi

Intel

mov %esi, %edi

AT&T

In both examples, the contents of the esi register are copied to the edi register

mov & push/pop

mov eax, 0x01        ;put 1 into eax
mov [eax], 0x01      ;put 1 into the address in eax
mov eax, [esi]       ;put contents of address (esi)

push eax             ;put contents of eax on top of stack
push 0x01            ;put 1 on top of stack
                     ; and inc the stack pointer

pop eax              ;put contents top of the stack into eax,
                     ; and dec the stack pointer 

Displacement

[] indicates a access to memory

[base + index*size + offset]
; size can only be 1,2,4,8

[arr + esi*4 + 0]     ;array of int

What could the offset be used for?

lea

lea eax, ecx   ;invalid
lea eax, [ecx] ;valid, equivalent to mov eax, ecx

lea eax, [ecx + edx]   ;mov eax, ecx + edx*1 (implicit 1)
lea eax, [ecx + edx*3] ;invalid, valid numbers are 1,2,4,8

lea eax, [eax + edx*4] ;can be thought of as 
                       ; eax = (DWORD *)eax[edx] why?

Displacement

lea does not access memory with the displacement operator! It only does the pointer arithmetic with no dereference! 

Branching

jmp addr     ;addr could be a register
             ; with an address or a label

this_is_a_label:

call addr    ; functions are just labels (addresses), with a calling convention
ret          ; using the correct calling convention, 
             ;  ret returns from the called function
syscall      ; more commonly seen as 'int' for interrupt
je addr  ; or jz  -- if zero flag is set
jg addr  ; or ja  -- if greater - signed or unsigned 
jl addr  ; or jb  -- if less    - signed or unsigned
jge addr ;        -- if greater or equal to
jle addr ;        -- if less or equal to
js addr  ;        -- if sign bit is set (if negative)

Conditional branching

Flags

carry    -- used to indicate carry in arithmetic operation                    
zero     -- if a value is zero or comparison equals 0
sign     -- if negative
overflow -- if overflow occurred

Each flag is set from certain instructions

int *foo(c,d) {
    char e;
    void *yeet = malloc(sizeof(c)*d);
    /* Stop! */
    return (int *)yeet;
}

int main(int argc, char *argv[]) {
    int a = 5;
    int b = 7;
    char *bar = foo(a,b);
    
    return 0;
}
foo:
    push ebp
    mov ebp, esp

    sub esp, 8            ;make room
    mov ecx, [ebp + 4]    ;get c
    mov edx, [ebp + 8]    ;get d
    
    mov eax, 4     ;sizeof(int)    
    mul edx        ;sizeof(int)*d
    
    push eax       ;arg to malloc
    call malloc    
    add esp, 4     ;clean up arg
    mov [esp], eax ;store in yeet
    
    add esp, 8     ;clean up locals
    pop ebp
    ret

main:
    push ebp
    mov ebp, esp
    
    push 5       ;a
    push 7       ;b
    sub esp, 4   ;bar
    
    mov eax, [esp + 4]      ;get b
    mov ebx, [esp + 8]      ;get a

    push ebx         ;d
    push eax         ;c
    call foo
    add esp, 8       ;clean up args  
    mov [esp], eax   ;store in bar

    add esp, 12      ;clean up locals
    mov eax, 0       ;return 0
    pop ebp
    ret

Lab 2

Feedback

Week 1

By Chase Kanipe