CPSC 355: Tutorial 16
More Syscalls and Floating Point operations
PhD Student
Fall 2017
Syscalls
I want to do one more example before we move on to floating point arithmetic
File copy example
- Open a file, (check if it's valid)
- Create a file (check if it's valid)
- Copy each byte from the first file to the second file
Copy a file
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
int fd_in, fd_out, bytes_read, total_bytes = 0;
char data;
if(argc != 3) {
printf("usage: %s input_file output_file\n", argv[0]);
exit(-1);
}
fd_in = openat(AT_FDCWD, argv[1], O_RDONLY);
if(fd_in < 0) {
printf("Couldn't open \"%s\" for reading!\n", argv[1]);
exit(-1);
}
fd_out = openat(AT_FDCWD, argv[2], O_WRONLY | O_CREAT, 0666);
if(fd_out < 0) {
printf("Couldn't open \"%s\" for writing!\n", argv[2]);
close(fd_in);
exit(-1);
}
total_bytes = bytes_read = read(fd_in, &data, sizeof(char));
if(bytes_read == 0) { // Nothing to write!
close(fd_in);
close(fd_out);
return 0;
}
do {
if(write(fd_out, &data, sizeof(char)) != sizeof(char)) {
printf("Error writing to \"%s\".\n", argv[2]);
break;
}
bytes_read = read(fd_in, &data, sizeof(char));
total_bytes += bytes_read;
} while(bytes_read == sizeof(char));
printf("Wrote %d bytes!\n", total_bytes);
close(fd_in);
close(fd_out);
return 0;
}
// File: Copy.asm
// Description:
// Takes two file arguments on the command line
// copies the data in the first to the second
.data
// Declare all strings
fmt1: .string "usage: %s input_file output_file\n"
fmt2: .string "Couldn't open %s for reading\n"
fmt3: .string "Couldn't open %s for writing\n"
fmt4: .string "Error writing to file %s\n"
fmt5: .string "Wrote %d bytes\n"
// Define macros
define(argc_r, x19)
define(argv_r, x20)
.text
.global main
// Declare variable locations on the stack
fd_out_s = 16
fd_in_s = 20
bytes_read_s = 24
total_bytes_s = 28
data_s = 32
main:
stp x29, x30, [sp, -48]! // Stack setup
mov x29, sp
// Save argc and argv into callee saved registers
mov argc_r, x0
mov argv_r, x1
cmp argc_r, 3 // if we have 3 arguments, continue
b.eq success_argc
// Otherwise bail
adrp x0, fmt1
add x0, x0, :lo12:fmt1
ldr x1, [argv_r] // Load the first element of argv
// which is a pointer to a string containing the exe name
bl printf // printf(fmt1, argv[0]);
mov w0, -1
bl exit // exit(-1)
success_argc:
// setup for fd_in = atopen();
mov w0, -100
ldr x1, [argv_r, 8] // load x1 = argv[1]
mov w2, wzr
// setup syscall
mov x8, 56 // 56 = syscall atopen
svc 0 // w0 = atopen(-100, argv[1], 0);
str w0, [x29, fd_in_s] // fd_in = w0
cmp w0, 0
b.ge success_open_input // if fd >= 0 we successfully opened the file
adrp x0, fmt2
add x0, x0, :lo12:fmt2
ldr x1, [argv_r, 8] // x1 = argv[1]
bl printf // printf(fmt2, argv[1]
mov w0, -1
bl exit // exit(-1)
success_open_input:
// Now we setup to load output
mov w0, -100
ldr x1, [argv_r, 16] // x1 = argv[2]
mov w2, 0101 // w2 = O_WRONLY | O_CREAT = 0101 in octal
mov w3, 0666 // w3 = 0666 in octal, which means everyone can read/write to this file
mov x8, 56 // use openat syscall
svc 0 // w0 = openat(-100, argv[2], 0101, 0666)
str w0, [x29, fd_out_s] // fd_out = w0
cmp w0, 0
b.ge success_open_output // if fd_out >= 0 continue to read/write data
// Again, bail if we couldn't open the file
adrp x0, fmt3
add x0, x0, :lo12:fmt3
ldr x1, [argv_r, 16] // x1 = argv[2]
bl printf // printf(fmt3, argv[2])
ldr w0, [x29, fd_in_s]
mov x8, 57 // 57 = close syscall
svc 0 // close(fd_in)
mov w0, -1
bl exit // exit(-1)
success_open_output:
// setup arguments for read
ldr w0, [x29, fd_in_s] // read from fd_in
add x1, x29, data_s // &data = address of data variable on the stack
mov w2, 1 // read one byte
// setup syscall
mov x8, 63 // 63 = read syscall
svc 0 // read(fd_in, &data, 1)
// Save into stack vars
str w0, [x29, total_bytes_s]
str w0, [x29, bytes_read_s]
cmp w0, 0 // if we have no bytes to write
b.eq done_loop // then we're done
doloop_top:
ldr w0, [x29, fd_out_s] // fd_out is the file we want
add x1, x29, data_s // tell write() to write data
// from &data = address of data variable on stack
mov w2, 1 // tell write() to write 1 byte
mov x8, 64 // 64 = syscall for write
svc 0 // write(fd_out, &data, 1)
cmp w0, 1 // if we wrote one byte
b.eq success_write // move on to reading another
// otherwise bail
ldr x0, =fmt4
ldr x1, [argv_r, 16] // x1 = argv[2]
bl printf // printf(fmt4, argv[2]
b done_loop
success_write:
// setup arguments for read
ldr w0, [x29, fd_in_s] // read() from file fd_in
add x1, x29, data_s // read() into address &data
mov w2, 1 // read() one byte
// setup syscall
mov x8, 63 // read() syscall = 63
svc 0 // read(fd, &data, 1)
str w0, [x29, bytes_read_s] // bytes_read = w0
ldr w1, [x29, total_bytes_s]
add w1, w1, w0
str w1, [x29, total_bytes_s] // total_bytes += bytes_read
cmp w0, 1 // if bytes_read == 1
b.eq doloop_top // goto top and try to write it to the output
done_loop:
adrp x0, fmt5
add x0, x0, :lo12:
ldr w1, [x29, total_bytes_s]
bl printf // printf(fmt5, total_bytes)
ldr w0, [x29, fd_in_s]
mov x8, 57 // 57 = syscall close
svc 0 // close(fd_in)
ldr w0, [x29, fd_out_s]
mov x8, 57 // 57 = syscall close
svc 0 // close(fd_in)
mov w0, wzr // 0 return value
ldp x29, x30, [sp], 48 // Stack teardown
ret
Floating Point Arithmetic
ARMv8 has 32 floating point registers (sort of, they're actually 128bit, but ignore that for now)
- d0-d31 are 64 bit double precision registers
- s0-s31 are the lower 32 bits of these registers and are used for single precision FP operations
- d8-d15 are callee saved registers
- d0-d7 and d16-d31 may be overwritten by subroutines.
- d0-d7 are used to pass floating point arguments into a function
Floating Point Arithmetic
- fmov S/Dd, S/Dn: move S/Dn into S/Dd
- fmov S/Dd, #fpimm: move immediate value into S/Dd (pretty limited must be of the form \(\pm n / 16 \times 2^r\) where \(n \in [16, 31]\) and \(r \in [-3, 4]\))
- fcvt Sd, Dn: Convert Dn to single prec. Sd
- fcvt Dd, Sn: Convert Sn to double prec. Dd
- fadd, fsub S/Dd, S/Dn, S/Dm: Add or subtract S/Dn, S/Dm, place result in S/Dd
- fmul S/Dd, S/Dn, S/Dm: S/Dd = S/Dn * S/Dm
- fdiv S/Dd, S/Dn, S/Dm: S/Dd = S/Dn / S/Dm
- fmadd S/Dd, S/Dn, S/Dm, S/Da: S/Dd = S/Da + S/Dn * S/Dm
Floating Point Arithmetic
- fabs S/Dd, S/Dn: S/Dd = abs(S/Dn)
- fneg S/Dd, S/Dn: S/Dd = -(S/Dn)
- fcmp S/Dn, S/Dm: Compares two registers, stores result in condition flags
- fcmp S/Dn, #fpimm: Compares register and immediate value, stores result in condition flags
An example...
Taylor Series
Recall the taylor series of an infinitely differentiable function (i.e. holomorphic or analytic function)
f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(a)}{n!}(x-a)^n
f(x)=n=0∑∞n!f(n)(a)(x−a)n
Or its Maclaurin expansion
f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(0)}{n!}x^n
f(x)=n=0∑∞n!f(n)(0)xn
For \(e^x\) we have
e^x = \displaystyle\sum^\infty_{n=0} \frac{x^n }{n!}
ex=n=0∑∞n!xn
Taylor Series
We can stop after some amount of terms in the sum
e^x \approx \displaystyle\sum^j_{n=0} \frac{x^n }{n!}
ex≈n=0∑jn!xn
In a machine, eventually we start to lose precision, so we stop after the terms stop decreasing the approximation beyond a threshold
Taylor Series
#include <math.h>
#include <stdio.h>
double taylor_exp(double x) {
double n = 0;
double nfact = 1;
double exp, exp_old;
double xx = 1;
exp = 1.0;
do {
n += 1.0;
nfact *= n;
xx *= x;
exp_old = exp;
exp += xx/nfact;
} while(fabs(exp_old - exp) > 1e-10);
return exp;
}
int main(int argc, char *argv[]) {
printf("%f\n", taylor_exp(0.0));
printf("%f\n", taylor_exp(1.0));
printf("%f\n", taylor_exp(2.0));
}
Taylor Series
.data
fmt1: .string "%f\n"
threshold: .double 0r1e-10 // A double prec. constant
.text
.balign 4
.global main
main:
stp x29, x30, [sp, -16]!
mov x29, sp
fmov d0, xzr
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
fmov d0, 1.0
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
fmov d0,2.0
bl taylor_exp
adrp x0, fmt1
add x0, x0, :lo12:fmt1
bl printf
ldp x29, x30, [sp], 16
ret
Taylor Series
taylor_exp: // d0 is the first argument
n_s = 16
nfact_s = 24
exp_s = 32
exp_old_s = 40
xx_s = 48
stp x29, x30, [sp, -64]! // Standard stack setup/teardown
mov x29, sp
fmov d1, xzr // Convert 0 to a double
str d1, [x29, n_s] // n = 0.0
fmov d1, 1.0
str d1, [x29, nfact_s] // nfact = 1.0
str d1, [x29, xx_s] // xx = 1.0
str d1, [x29, exp_s] // exp = 1.0
exp_do_top:
ldr d1, [x29, n_s] // loading and storing to addrs works just as it does for x
fmov d2, 1.0
fadd d1, d1, d2 // d1 = d1 + 1
str d1, [x29, n_s] // n = d1
// Build up the factorial over each iteration of the loop
ldr d2, [x29, nfact_s]
fmul d2, d2, d1
str d2, [x29, nfact_s] // nfact = nfact * n = n * (n-1)!
ldr d3, [x29, xx_s]
fmul d3, d0, d3
str d3, [x29, xx_s] // xx = x*xx = x^n
Taylor Series
ldr d4, [x29, exp_s]
str d4, [x29, exp_old_s] // exp_old = exp
fdiv d2, d3, d2 // d2 = x^n / nfact
fadd d5, d4, d2 // d5 = exp + x^n/nfact
str d5, [x29, exp_s] // exp = d5
ldr d1, [x29, exp_s]
ldr d2, [x29, exp_old_s]
fsub d1, d1, d2 // d1 = exp - exp_old
fabs d1, d1 // d1 = fabs(exp - exp_old)
adrp x0, threshold // Load the addr of the threshold
add x0, x0, :lo12: threshold
ldr d2, [x0] // load threshold value into d2
fcmp d1, d2 // fabs(exp - exp_old) cmp threshold
b.gt exp_do_top // if it still contributes to the upper 10 digits, keep going
ldr d0, [x29, exp_s] // load ~exp(x) into d0 (return value)
ldp x29, x30, [sp], 64 // stack teardown
ret
Next Day
Floating point stuff/Work Day
CPSC 355: Tutorial 16
By Joshua Horacsek
CPSC 355: Tutorial 16
- 1,729