More Syscalls and Floating Point operations
PhD Student
Fall 2017
I want to do one more example before we move on to floating point arithmetic
File copy example
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
int fd_in, fd_out, bytes_read, total_bytes = 0;
char data;
if(argc != 3) {
printf("usage: %s input_file output_file\n", argv[0]);
exit(-1);
}
fd_in = openat(AT_FDCWD, argv[1], O_RDONLY);
if(fd_in < 0) {
printf("Couldn't open \"%s\" for reading!\n", argv[1]);
exit(-1);
}
fd_out = openat(AT_FDCWD, argv[2], O_WRONLY | O_CREAT, 0666);
if(fd_out < 0) {
printf("Couldn't open \"%s\" for writing!\n", argv[2]);
close(fd_in);
exit(-1);
}
total_bytes = bytes_read = read(fd_in, &data, sizeof(char));
if(bytes_read == 0) { // Nothing to write!
close(fd_in);
close(fd_out);
return 0;
}
do {
if(write(fd_out, &data, sizeof(char)) != sizeof(char)) {
printf("Error writing to \"%s\".\n", argv[2]);
break;
}
bytes_read = read(fd_in, &data, sizeof(char));
total_bytes += bytes_read;
} while(bytes_read == sizeof(char));
printf("Wrote %d bytes!\n", total_bytes);
close(fd_in);
close(fd_out);
return 0;
}
// File: Copy.asm
// Description:
// Takes two file arguments on the command line
// copies the data in the first to the second
.data
// Declare all strings
fmt1: .string "usage: %s input_file output_file\n"
fmt2: .string "Couldn't open %s for reading\n"
fmt3: .string "Couldn't open %s for writing\n"
fmt4: .string "Error writing to file %s\n"
fmt5: .string "Wrote %d bytes\n"
// Define macros
define(argc_r, x19)
define(argv_r, x20)
.text
.global main
// Declare variable locations on the stack
fd_out_s = 16
fd_in_s = 20
bytes_read_s = 24
total_bytes_s = 28
data_s = 32
main:
stp x29, x30, [sp, -48]! // Stack setup
mov x29, sp
// Save argc and argv into callee saved registers
mov argc_r, x0
mov argv_r, x1
cmp argc_r, 3 // if we have 3 arguments, continue
b.eq success_argc
// Otherwise bail
adrp x0, fmt1
add x0, x0, :lo12:fmt1
ldr x1, [argv_r] // Load the first element of argv
// which is a pointer to a string containing the exe name
bl printf // printf(fmt1, argv[0]);
mov w0, -1
bl exit // exit(-1)
success_argc:
// setup for fd_in = atopen();
mov w0, -100
ldr x1, [argv_r, 8] // load x1 = argv[1]
mov w2, wzr
// setup syscall
mov x8, 56 // 56 = syscall atopen
svc 0 // w0 = atopen(-100, argv[1], 0);
str w0, [x29, fd_in_s] // fd_in = w0
cmp w0, 0
b.ge success_open_input // if fd >= 0 we successfully opened the file
adrp x0, fmt2
add x0, x0, :lo12:fmt2
ldr x1, [argv_r, 8] // x1 = argv[1]
bl printf // printf(fmt2, argv[1]
mov w0, -1
bl exit // exit(-1)
success_open_input:
// Now we setup to load output
mov w0, -100
ldr x1, [argv_r, 16] // x1 = argv[2]
mov w2, 0101 // w2 = O_WRONLY | O_CREAT = 0101 in octal
mov w3, 0666 // w3 = 0666 in octal, which means everyone can read/write to this file
mov x8, 56 // use openat syscall
svc 0 // w0 = openat(-100, argv[2], 0101, 0666)
str w0, [x29, fd_out_s] // fd_out = w0
cmp w0, 0
b.ge success_open_output // if fd_out >= 0 continue to read/write data
// Again, bail if we couldn't open the file
adrp x0, fmt3
add x0, x0, :lo12:fmt3
ldr x1, [argv_r, 16] // x1 = argv[2]
bl printf // printf(fmt3, argv[2])
ldr w0, [x29, fd_in_s]
mov x8, 57 // 57 = close syscall
svc 0 // close(fd_in)
mov w0, -1
bl exit // exit(-1)
success_open_output:
// setup arguments for read
ldr w0, [x29, fd_in_s] // read from fd_in
add x1, x29, data_s // &data = address of data variable on the stack
mov w2, 1 // read one byte
// setup syscall
mov x8, 63 // 63 = read syscall
svc 0 // read(fd_in, &data, 1)
// Save into stack vars
str w0, [x29, total_bytes_s]
str w0, [x29, bytes_read_s]
cmp w0, 0 // if we have no bytes to write
b.eq done_loop // then we're done
doloop_top:
ldr w0, [x29, fd_out_s] // fd_out is the file we want
add x1, x29, data_s // tell write() to write data
// from &data = address of data variable on stack
mov w2, 1 // tell write() to write 1 byte
mov x8, 64 // 64 = syscall for write
svc 0 // write(fd_out, &data, 1)
cmp w0, 1 // if we wrote one byte
b.eq success_write // move on to reading another
// otherwise bail
ldr x0, =fmt4
ldr x1, [argv_r, 16] // x1 = argv[2]
bl printf // printf(fmt4, argv[2]
b done_loop
success_write:
// setup arguments for read
ldr w0, [x29, fd_in_s] // read() from file fd_in
add x1, x29, data_s // read() into address &data
mov w2, 1 // read() one byte
// setup syscall
mov x8, 63 // read() syscall = 63
svc 0 // read(fd, &data, 1)
str w0, [x29, bytes_read_s] // bytes_read = w0
ldr w1, [x29, total_bytes_s]
add w1, w1, w0
str w1, [x29, total_bytes_s] // total_bytes += bytes_read
cmp w0, 1 // if bytes_read == 1
b.eq doloop_top // goto top and try to write it to the output
done_loop:
adrp x0, fmt5
add x0, x0, :lo12:
ldr w1, [x29, total_bytes_s]
bl printf // printf(fmt5, total_bytes)
ldr w0, [x29, fd_in_s]
mov x8, 57 // 57 = syscall close
svc 0 // close(fd_in)
ldr w0, [x29, fd_out_s]
mov x8, 57 // 57 = syscall close
svc 0 // close(fd_in)
mov w0, wzr // 0 return value
ldp x29, x30, [sp], 48 // Stack teardown
ret
ARMv8 has 32 floating point registers (sort of, they're actually 128bit, but ignore that for now)
An example...
Recall the taylor series of an infinitely differentiable function (i.e. holomorphic or analytic function)
Or its Maclaurin expansion
For \(e^x\) we have
We can stop after some amount of terms in the sum
In a machine, eventually we start to lose precision, so we stop after the terms stop decreasing the approximation beyond a threshold
#include <math.h>
#include <stdio.h>
double taylor_exp(double x) {
double n = 0;
double nfact = 1;
double exp, exp_old;
double xx = 1;
exp = 1.0;
do {
n += 1.0;
nfact *= n;
xx *= x;
exp_old = exp;
exp += xx/nfact;
} while(fabs(exp_old - exp) > 1e-10);
return exp;
}
int main(int argc, char *argv[]) {
printf("%f\n", taylor_exp(0.0));
printf("%f\n", taylor_exp(1.0));
printf("%f\n", taylor_exp(2.0));
}
.data
fmt1: .string "%f\n"
threshold: .double 0r1e-10 // A double prec. constant
.text
.balign 4
.global main
main:
stp x29, x30, [sp, -16]!
mov x29, sp
fmov d0, xzr
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
fmov d0, 1.0
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
fmov d0,2.0
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
ldp x29, x30, [sp], 16
ret
taylor_exp: // d0 is the first argument
n_s = 16
nfact_s = 24
exp_s = 32
exp_old_s = 40
xx_s = 48
stp x29, x30, [sp, -64]! // Standard stack setup/teardown
mov x29, sp
fmov d1, xzr // Convert 0 to a double
str d1, [x29, n_s] // n = 0.0
fmov d1, 1.0
str d1, [x29, nfact_s] // nfact = 1.0
str d1, [x29, xx_s] // xx = 1.0
str d1, [x29, exp_s] // exp = 1.0
exp_do_top:
ldr d1, [x29, n_s] // loading and storing to addrs works just as it does for x
fmov d2, 1.0
fadd d1, d1, d2 // d1 = d1 + 1
str d1, [x29, n_s] // n = d1
// Build up the factorial over each iteration of the loop
ldr d2, [x29, nfact_s]
fmul d2, d2, d1
str d2, [x29, nfact_s] // nfact = nfact * n = n * (n-1)!
ldr d3, [x29, xx_s]
fmul d3, d0, d3
str d3, [x29, xx_s] // xx = x*xx = x^n
ldr d4, [x29, exp_s]
str d4, [x29, exp_old_s] // exp_old = exp
fdiv d2, d3, d2 // d2 = x^n / nfact
fadd d5, d4, d2 // d5 = exp + x^n/nfact
str d5, [x29, exp_s] // exp = d5
ldr d1, [x29, exp_s]
ldr d2, [x29, exp_old_s]
fsub d1, d1, d2 // d1 = exp - exp_old
fabs d1, d1 // d1 = fabs(exp - exp_old)
adrp x0, threshold // Load the addr of the threshold
add x0, x0, :lo12: threshold
ldr d2, [x0] // load threshold value into d2
fcmp d1, d2 // fabs(exp - exp_old) cmp threshold
b.gt exp_do_top // if it still contributes to the upper 10 digits, keep going
ldr d0, [x29, exp_s] // load ~exp(x) into d0 (return value)
ldp x29, x30, [sp], 64 // stack teardown
ret
Floating point stuff/Work Day