CPSC 355: Tutorial 8
Memory & The Stack
PhD Student
Fall 2017
Outline
- We've been doing most of our work so far with registers...
- What happens when we run out of registers?
- We need to start thinking about how to use memory to overcome the limitations of using registers alone
Memory
Addresses are 64-bit numbers that reference a particular byte in memory:
0xAA |
0x00 |
0xFE |
. . . |
0x00 |
0x00 |
0x0000000000000000 |
---|
0x0000000000000001 |
0x0000000000000002 |
0xFFFFFFFFFFFFFFFE 0xFFFFFFFFFFFFFFFF |
Memory
Address
Memory
OS |
Program |
Heap |
Free memory |
Stack |
low
high
The stack is used for local variables, static memory allocation, return addresses. We'll use it for assignment 3.
Heap is for dynamically allocated memory
Memory
OS |
Program |
Heap |
Free memory |
Stack |
low
high
today
The Stack
OS |
Program |
Heap |
Free memory |
Stack |
low
high
Used to store local variables and arrays whose sizes are known at run time
local vars, return addrs, frame pointers etc... |
The Stack
high
The register $sp keeps track of what address is currently at the top of the stack.
Free memory |
local vars, return addrs, frame pointers etc... |
$sp
The stack grows upwards as we allocate more data on it (i.e. $sp shrinks)
The Stack
high
We've actually already been using this
Free memory |
local vars, return addrs, frame pointers etc... |
stp x29, x30, [sp, -16]!
Add -16 to $sp before store
Then STore the Pair x29, x30
The Stack
high
We've actually already been using this
Free memory |
local vars, return addrs, frame pointers etc... |
stp x29, x30, [sp, -16]!
$sp
Free memory |
x29 ($fp) |
x30 ($lr) |
local vars, return addrs, frame pointers etc... |
$sp
The Stack
Additionally
ldp x29, x30, [sp], 16
Free memory |
x29 ($fp) |
x30 ($lr) |
local vars, return addrs, frame pointers etc... |
$sp
Free memory |
x29 ($fp) |
x30 ($lr) |
local vars, return addrs, frame pointers etc... |
$sp
x29 and x30 are restored from the stack
add 16 to $sp after load
The Stack
high
Allocating more space for variables
Free memory |
old stack data |
stp x29, x30, [sp, -40 & -16]!
$sp
Free memory |
x29 ($fp) |
x30 ($lr) |
??? |
??? |
old stack data |
$sp
The Stack
Allocating more space for variables
mov x19, 777
str x19, [sp, 16]
Free memory |
x29 ($fp) |
x30 ($lr) |
??? |
??? |
old stack data |
$sp
Free memory |
x29 ($fp) |
x30 ($lr) |
777 |
??? |
old stack data |
$sp
What if the stack pointer changes?
The Stack
high
Allocating more space for variables, the correct way
Free memory |
old stack data |
stp x29, x30, [sp, -40 & -16]!
mov x29, sp
$sp
Free memory |
x29 ($fp) |
x30 ($lr) |
??? |
??? |
old stack data |
$sp
$x29
The Stack
Allocating more space for variables, the correct way
mov x19, 777
str x19, [x29, 16]
Free memory |
x29 ($fp) |
x30 ($lr) |
??? |
??? |
old stack data |
x29
Free memory |
x29 ($fp) |
x30 ($lr) |
777 |
??? |
old stack data |
x29
x29 is always preserved by function calls -- it's a reference frame, i.e. frame pointer of $fp
The Stack
#include <stdio.h>
int main(int argc, char *argv[])
{
int i, sum = 0;
for(i = 1; i < 100; i++) {
sum += i;
}
printf("1 + 2 + ... + 98 + 99 = %d", sum);
return 0;
}
The Stack
.text
output_string: .string "1 + 2 + ... + 98 + 99 = %d"
.balign 4
.global main
main:
// Save FP and LR to the stack, alloc 8 bytes for i and sum (4 bytes each)
stp x29, x30, [sp, -(16 + 8) & -16]!
mov x29, sp // Set FP to the stack addr
mov w19, 0
mov w20, 1
str w19, [x29, 16]
str w20, [x29, 20]
// for(i = 1; i < 100; i++) { <- you write this part in assembly
// sum = sum + i;
// }
ldr x0, =output_string
ldr w1, [x29, 20]
bl printf
mov w0, 0
ldp x29, x30, [sp], -(16 + 8) & -16
ret
The Stack
.text
output_string: .string "1 + 2 + ... + 98 + 99 = %d\n"
.balign 4
.global main
main:
// Save FP and LR to the stack, alloc 8 bytes for i and sum
stp x29, x30, [sp, -(16 + 8) & -16]!
mov x29, sp // Set FP to the stack addr
mov w19, 0
mov w20, 1
str w19, [x29, 16]
str w20, [x29, 20]
loop_test:
ldr w20, [x29, 20]
cmp w20, 100
b.ge loop_exit
ldr w19, [x29, 16]
add w19, w19, w20
add w20, w20, 1
str w20, [x29, 20]
str w19, [x29, 16]
b loop_test
loop_exit:
ldr x0, =output_string
ldr w1, [x29, 16]
bl printf
mov w0, 0
ldp x29, x30, [sp], -(16 + 8) & -16
ret
The Stack
define(i_v, 20) // Define variables with respect to the frame pointer
define(sum_v, 16)
fp .req x29
lr .req x30
.text
output_string: .string "1 + 2 + ... + 98 + 99 = %d\n"
.balign 4
.global main
main:
// Save FP and LR to the stack, alloc 8 bytes for i and sum
stp fp, lr, [sp, -(16 + 8) & -16]!
mov fp, sp // Set FP to the stack addr
mov w19, 0
mov w20, 1
str w19, [fp, sum_v] // sum = 0
str w20, [fp, i_v] // i = 1
loop_test:
ldr w20, [fp, i_v]
cmp w20, 100
b.ge loop_exit
ldr w19, [fp, sum_v]
add w19, w19, w20
add w20, w20, 1
str w20, [fp, i_v]
str w19, [fp, sum_v]
b loop_test
loop_exit:
ldr x0, =output_string
ldr w1, [x29, sum_v]
bl printf
mov w0, 0
ldp fp, lr, [sp], -(16 + 8) & -16
ret
The Stack
#include <stdio.h>
int main(int argc, char *argv[])
{
int input = 200;
if(input > 100) {
int temp = input - 100; // Variable introduced in new code block
printf("Your input %d was > 100!\nThus, %d = %d - 100 > 0\n",
input, temp, input);
}
return 0;
}
What if I you need to allocate a new variable?
The Stack
What if I you need to allocate a new variable?
define(input_v, 24) // Define variables with respect to the frame pointer
define(temp_v, -4)
fp .req x29
lr .req x30
.text
output_string: .string "Your input %d was > 100!\nThus, %d = %d - 100 > 0\n"
.balign 4
.global main
main:
// Save FP and LR to the stack, alloc 4 bytes for input
stp fp, lr, [sp, -(16 + 4) & -16]!
mov fp, sp // Set FP to the stack addr
mov w19, 200
str w19, [fp, input_v] // sum = 0
cmp w19, 100
b.le if_exit
if_begin:
add sp, sp, -4 & -16 // Allocate 4 bytes on the stack for an integer
sub w20, w19, 100 // subtract 100
str w20, [fp, temp_v] // store it on the stack
ldr x0, =output_string
ldr w1, [fp, input_v]
ldr w2, [fp, temp_v]
ldr w3, [fp, input_v]
bl printf
sub sp, sp, -4 & -16 // restore the stack
if_exit:
mov w0, 0
ldp fp, lr, [sp], -(16 + 4) & -16
ret
Assignment 3
#include <stdio.h>
#define SIZE 50
int main()
{
int v[SIZE], i, j=0, temp=0;
for(i=0; i < SIZE; i++) {
v[i] = rand() & 0xFF;
printf("v[%d]: %d\n", i, v[i]);
}
// sort the array using insertion sort
for(i = 1; i < SIZE; i++) {
temp = v[i];
for(j = i; j > 0 && temp < v[j-1]; j--) {
v[j] = v[j - 1];
}
v[j] = temp;
}
printf("\nSorted array: \n");
for(i = 0; i < SIZE; i++)
printf("v[%d] = %d\n", i, v[i]);
return 0;
}
The Stack
A few notes:
Generally, using a register is faster than a memory access
CPU cache helps this, but this is architecture dependent (in GPU architectures, a memory access is incredibly expensive, cache is only helpful if you're lucky)
The point of this assignment is to show you how to use the stack in case you don't have any registers free.
If you're writing professional assembly code, minimize number of instructions, minimize memory accesses, maximize register use.
Next Day
- Arrays on the stack
- Linear arrays
- Multidimensional arrays
CPSC 355: Tutorial 8
By Joshua Horacsek
CPSC 355: Tutorial 8
- 1,733