CPSC 355: Tutorial 8

Memory & The Stack

PhD Student

Fall 2017

Outline

  • We've been doing most of our work so far with registers...
  • What happens when we run out of registers?
  • We need to start thinking about how to use memory to overcome the limitations of using registers alone

Memory

Addresses are 64-bit numbers that reference a particular byte in memory:

0xAA
0x00
0xFE
.
.
.

 
0x00
0x00
0x0000000000000000
0x0000000000000001
0x0000000000000002





0xFFFFFFFFFFFFFFFE
0xFFFFFFFFFFFFFFFF

Memory

Address

Memory

OS
Program

Heap
 

Free memory
 

Stack
 

low

high

The stack is used for local variables, static memory allocation, return addresses. We'll use it for assignment 3.

Heap is for dynamically allocated memory

Memory

OS
Program

Heap
 

Free memory
 

Stack
 

low

high

today

The Stack

OS
Program

Heap
 

Free memory
 

Stack
 

low

high

Used to store local variables and arrays whose sizes are known at run time


local vars,
return addrs,
frame pointers
etc...
 

The Stack

high

The register $sp keeps track of what address is currently at the top of the stack.


Free memory
 

local vars,
return addrs,
frame pointers
etc...
 

 $sp

The stack grows upwards as we allocate more data on it (i.e. $sp shrinks)

The Stack

high

We've actually already been using this


Free memory
 

local vars,
return addrs,
frame pointers
etc...
 
stp  x29, x30, [sp, -16]!

Add -16 to $sp before store

Then STore the Pair x29, x30

The Stack

high

We've actually already been using this


Free memory
 

local vars,
return addrs,
frame pointers
etc...
 
stp  x29, x30, [sp, -16]!

 $sp

Free memory
x29 ($fp)
x30 ($lr)

local vars,
return addrs,
frame pointers
etc...
 

 $sp

The Stack

Additionally

ldp  x29, x30, [sp], 16
Free memory
x29 ($fp)
x30 ($lr)

local vars,
return addrs,
frame pointers
etc...
 

 $sp

Free memory
x29 ($fp)
x30 ($lr)

local vars,
return addrs,
frame pointers
etc...
 

 $sp

x29 and x30 are restored from the stack

add 16 to $sp after load

The Stack

high

Allocating more space for variables


Free memory
 
old stack data
stp  x29, x30, [sp, -40 & -16]!

 $sp

Free memory
x29 ($fp)
x30 ($lr)
???
???
old stack data

 $sp

The Stack

Allocating more space for variables

mov x19, 777
str x19, [sp, 16]
Free memory
x29 ($fp)
x30 ($lr)
???
???
old stack data

 $sp

Free memory
x29 ($fp)
x30 ($lr)
777
???
old stack data

 $sp

What if the stack pointer changes?

The Stack

high

Allocating more space for variables, the correct way


Free memory
 
old stack data
stp  x29, x30, [sp, -40 & -16]!
mov  x29, sp

 $sp

Free memory
x29 ($fp)
x30 ($lr)
???
???
old stack data

 $sp

$x29

The Stack

Allocating more space for variables, the correct way

mov x19, 777
str x19, [x29, 16]
Free memory
x29 ($fp)
x30 ($lr)
???
???
old stack data

 x29

Free memory
x29 ($fp)
x30 ($lr)
777
???
old stack data

x29

x29 is always preserved by function calls -- it's a reference frame, i.e. frame pointer of $fp

The Stack

#include <stdio.h>

int main(int argc, char *argv[]) 
{
    int i, sum = 0;

    for(i = 1; i < 100; i++) {
        sum += i;
    }
    printf("1 + 2 + ... + 98 + 99 = %d", sum);
    return 0;
}

The Stack

.text
output_string:  .string "1 + 2 + ... + 98 + 99 = %d"
                .balign 4
                .global main
main:

                // Save FP and LR to the stack, alloc 8 bytes for i and sum (4 bytes each)
                stp  x29, x30, [sp, -(16 + 8) & -16]! 
                mov  x29, sp                          // Set FP to the stack addr

                mov w19, 0
                mov w20, 1

                str w19, [x29, 16]
                str w20, [x29, 20]

                // for(i = 1; i < 100; i++) { <- you write this part in assembly
                //    sum = sum + i;
                // }

                ldr x0, =output_string
                ldr w1, [x29, 20]
                bl printf

                mov w0, 0
                ldp x29, x30, [sp], -(16 + 8) & -16
                ret

The Stack

.text
output_string:  .string "1 + 2 + ... + 98 + 99 = %d\n"
                .balign 4
                .global main
main:

                // Save FP and LR to the stack, alloc 8 bytes for i and sum
                stp  x29, x30, [sp, -(16 + 8) & -16]! 
                mov  x29, sp                          // Set FP to the stack addr

                mov w19, 0
                mov w20, 1

                str w19, [x29, 16]
                str w20, [x29, 20]

loop_test:
                ldr w20, [x29, 20]
                cmp w20, 100
                b.ge loop_exit

                ldr w19, [x29, 16]

                add w19, w19, w20
                add w20, w20, 1

                str w20, [x29, 20]
                str w19, [x29, 16]
                b loop_test
loop_exit:

                ldr x0, =output_string
                ldr w1, [x29, 16]
                bl printf

                mov w0, 0
                ldp x29, x30, [sp], -(16 + 8) & -16
                ret

The Stack

define(i_v, 20)      // Define variables with respect to the frame pointer
define(sum_v, 16)

fp    .req     x29
lr    .req     x30

.text
output_string:  .string "1 + 2 + ... + 98 + 99 = %d\n"
                .balign 4
                .global main
main:

                // Save FP and LR to the stack, alloc 8 bytes for i and sum
                stp  fp, lr, [sp, -(16 + 8) & -16]! 
                mov  fp, sp                          // Set FP to the stack addr

                mov w19, 0
                mov w20, 1

                str w19, [fp, sum_v]                 // sum = 0
                str w20, [fp, i_v]                   // i = 1

loop_test:
                ldr w20, [fp, i_v]
                cmp w20, 100
                b.ge loop_exit

                ldr w19, [fp, sum_v]

                add w19, w19, w20
                add w20, w20, 1

                str w20, [fp, i_v]
                str w19, [fp, sum_v]
                b loop_test
loop_exit:

                ldr x0, =output_string
                ldr w1, [x29, sum_v]
                bl printf

                mov w0, 0
                ldp fp, lr, [sp], -(16 + 8) & -16
                ret

The Stack

#include <stdio.h>

int main(int argc, char *argv[]) 
{
    int input = 200;

    if(input > 100) {
        int temp = input - 100; // Variable introduced in new code block
        printf("Your input %d was > 100!\nThus, %d = %d - 100 > 0\n", 
            input, temp, input); 
    }
    return 0;
}

What if I you need to allocate a new variable?

The Stack

What if I you need to allocate a new variable?

define(input_v, 24)      // Define variables with respect to the frame pointer
define(temp_v, -4)

fp    .req     x29
lr    .req     x30

.text
output_string:  .string "Your input %d was > 100!\nThus, %d = %d - 100 > 0\n"
                .balign 4
                .global main
main:

                // Save FP and LR to the stack, alloc 4 bytes for input
                stp  fp, lr, [sp, -(16 + 4) & -16]! 
                mov  fp, sp                          // Set FP to the stack addr

                mov w19, 200
                str w19, [fp, input_v]                 // sum = 0

                cmp w19, 100
                b.le if_exit

if_begin:
                add sp, sp, -4 & -16  // Allocate 4 bytes on the stack for an integer
                sub w20, w19, 100     // subtract 100
                str w20, [fp, temp_v] // store it on the stack

                ldr x0, =output_string
                ldr w1, [fp, input_v]
                ldr w2, [fp, temp_v]
                ldr w3, [fp, input_v]
                bl printf

                sub sp, sp, -4 & -16  // restore the stack

if_exit:
                mov w0, 0
                ldp fp, lr, [sp], -(16 + 4) & -16
                ret

Assignment 3

#include <stdio.h>
#define SIZE 50

int main()
{
    int v[SIZE], i, j=0, temp=0;

    for(i=0; i < SIZE; i++) {
        v[i] = rand() & 0xFF;
        printf("v[%d]: %d\n", i, v[i]);
    }

    // sort the array using insertion sort
    for(i = 1; i < SIZE; i++) { 
        temp = v[i];
        for(j = i; j > 0 && temp < v[j-1]; j--) {
            v[j] = v[j - 1];
        }
	v[j] = temp;
    }

    printf("\nSorted array: \n");
    for(i = 0; i < SIZE; i++)
        printf("v[%d] = %d\n", i, v[i]);

    return 0;
}

The Stack

A few notes:

Generally, using a register is faster than a memory access

CPU cache helps this, but this is architecture dependent (in GPU architectures, a memory access is incredibly expensive, cache is only helpful if you're lucky)

The point of this assignment is to show you how to use the stack in case you don't have any registers free.

If you're writing professional assembly code, minimize number of instructions, minimize memory accesses, maximize register use.

Next Day

  • Arrays on the stack
  • Linear arrays
  • Multidimensional arrays

CPSC 355: Tutorial 8

By Joshua Horacsek

CPSC 355: Tutorial 8

  • 1,718