CPSC 355: Tutorial 16

More Syscalls and Floating Point operations

J. J. Horacsek

PhD Student

Fall 2017

Syscalls

I want to do one more example before we move on to floating point arithmetic

File copy example

Open a file, (check if it's valid)
Create a file (check if it's valid)
Copy each byte from the first file to the second file

Copy a file

#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
    int fd_in, fd_out, bytes_read, total_bytes = 0;
    char data;

    if(argc != 3) {
        printf("usage: %s input_file output_file\n", argv[0]);
        exit(-1);
    }

    fd_in  = openat(AT_FDCWD, argv[1], O_RDONLY);
    if(fd_in < 0) {
        printf("Couldn't open \"%s\" for reading!\n", argv[1]);
        exit(-1);
    }

    fd_out = openat(AT_FDCWD, argv[2], O_WRONLY | O_CREAT, 0666);
    if(fd_out < 0) {
        printf("Couldn't open \"%s\" for writing!\n", argv[2]);
        close(fd_in);
        exit(-1);
    }

    total_bytes = bytes_read = read(fd_in, &data, sizeof(char));
    if(bytes_read == 0) { // Nothing to write!
        close(fd_in);
        close(fd_out);
        return 0;
    }
    do {
        if(write(fd_out, &data, sizeof(char)) != sizeof(char)) {
            printf("Error writing to \"%s\".\n", argv[2]);
            break;
        }
        bytes_read = read(fd_in, &data, sizeof(char));
        total_bytes += bytes_read;
    } while(bytes_read == sizeof(char));

    printf("Wrote %d bytes!\n", total_bytes);

    close(fd_in);
    close(fd_out);
    return 0;
}

// File: Copy.asm
// Description: 
// Takes two file arguments on the command line 
// copies the data in the first to the second

.data
// Declare all strings
fmt1: .string "usage: %s input_file output_file\n"
fmt2: .string "Couldn't open %s for reading\n"
fmt3: .string "Couldn't open %s for writing\n"
fmt4: .string "Error writing to file %s\n"
fmt5: .string "Wrote %d bytes\n"

// Define macros
define(argc_r, x19)
define(argv_r, x20)

.text
.global main

// Declare variable locations on the stack
fd_out_s      = 16
fd_in_s       = 20
bytes_read_s  = 24
total_bytes_s = 28
data_s        = 32

main:
   stp x29, x30, [sp, -48]! // Stack setup
   mov x29, sp

   // Save argc and argv into callee saved registers
   mov argc_r, x0
   mov argv_r, x1

   cmp argc_r, 3     // if we have 3 arguments, continue
   b.eq success_argc
   
   // Otherwise bail
   adrp x0, fmt1
   add x0, x0, :lo12:fmt1
   ldr x1, [argv_r]  // Load the first element of argv
                     // which is a pointer to a string containing the exe name
   bl printf         // printf(fmt1, argv[0]);

   mov w0, -1
   bl exit           // exit(-1)

success_argc:
   // setup for fd_in = atopen();
   mov w0, -100
   ldr x1, [argv_r, 8]     // load x1 = argv[1]
   mov w2, wzr

   // setup syscall 
   mov x8, 56              // 56 = syscall atopen
   svc 0                   // w0 = atopen(-100, argv[1], 0);

   str w0, [x29, fd_in_s]  // fd_in = w0
   cmp w0, 0
   b.ge success_open_input // if fd >= 0 we successfully opened the file

   adrp x0, fmt2
   add x0, x0, :lo12:fmt2
   ldr x1, [argv_r, 8]     // x1 = argv[1]
   bl printf               // printf(fmt2, argv[1]

   mov w0, -1
   bl exit                 // exit(-1)

success_open_input:
   // Now we setup to load output
   mov w0, -100        
   ldr x1, [argv_r, 16]     // x1 = argv[2]
   mov w2, 0101             // w2 = O_WRONLY | O_CREAT = 0101 in octal
   mov w3, 0666             // w3 = 0666 in octal, which means everyone can read/write to this file

   mov x8, 56               // use openat syscall
   svc 0                    // w0 = openat(-100, argv[2], 0101, 0666)

   str w0, [x29, fd_out_s]  // fd_out = w0
   cmp w0, 0
   b.ge success_open_output // if fd_out >= 0 continue to read/write data

   // Again, bail if we couldn't open the file
   adrp x0, fmt3
   add x0, x0, :lo12:fmt3   
   ldr x1, [argv_r, 16]     // x1 = argv[2]
   bl printf                // printf(fmt3, argv[2])

   ldr w0, [x29, fd_in_s]
   mov x8, 57               // 57 = close syscall
   svc 0                    // close(fd_in)

   mov w0, -1
   bl exit                  // exit(-1)

success_open_output:

   // setup arguments for read
   ldr w0, [x29, fd_in_s]   // read from fd_in
   add x1, x29, data_s      // &data = address of data variable on the stack
   mov w2, 1                // read one byte

   // setup syscall
   mov x8, 63               // 63 = read syscall
   svc 0                    // read(fd_in, &data, 1)

   // Save into stack vars
   str w0, [x29, total_bytes_s]
   str w0, [x29, bytes_read_s]
   
   cmp w0, 0               // if we have no bytes to write
   b.eq done_loop          // then we're done

doloop_top:
   ldr w0, [x29, fd_out_s] // fd_out is the file we want
   add x1, x29, data_s     // tell write() to write data 
                           // from &data = address of data variable on stack
   mov w2, 1               // tell write() to write 1 byte

   mov x8, 64              // 64 = syscall for write
   svc 0                   // write(fd_out, &data, 1)

   cmp w0, 1               // if we wrote one byte
   b.eq success_write      // move on to reading another

   // otherwise bail
   ldr x0, =fmt4
   ldr x1, [argv_r, 16]    // x1 = argv[2]
   bl printf               // printf(fmt4, argv[2]
   b done_loop

success_write:

   // setup arguments for read
   ldr w0, [x29, fd_in_s]  // read() from file fd_in
   add x1, x29, data_s     // read() into address &data
   mov w2, 1               // read() one byte

   // setup syscall
   mov x8, 63              // read() syscall = 63
   svc 0                   // read(fd, &data, 1)

   str w0, [x29, bytes_read_s]  // bytes_read = w0
   ldr w1, [x29, total_bytes_s] 
   add w1, w1, w0
   str w1, [x29, total_bytes_s] // total_bytes += bytes_read

   cmp w0, 1                    // if bytes_read == 1
   b.eq doloop_top              // goto top and try to write it to the output

done_loop:
   adrp x0, fmt5
   add x0, x0, :lo12:
   ldr w1, [x29, total_bytes_s]
   bl printf                    // printf(fmt5, total_bytes)

   ldr w0, [x29, fd_in_s]       
   mov x8, 57                   // 57 = syscall close
   svc 0                        // close(fd_in)

   ldr w0, [x29, fd_out_s]
   mov x8, 57                   // 57 = syscall close
   svc 0                        // close(fd_in)

   mov w0, wzr                  // 0 return value
   ldp x29, x30, [sp], 48       // Stack teardown
   ret

Floating Point Arithmetic

ARMv8 has 32 floating point registers (sort of, they're actually 128bit, but ignore that for now)

d0-d31 are 64 bit double precision registers
s0-s31 are the lower 32 bits of these registers and are used for single precision FP operations
d8-d15 are callee saved registers
d0-d7 and d16-d31 may be overwritten by subroutines.
d0-d7 are used to pass floating point arguments into a function

Floating Point Arithmetic

fmov S/Dd, S/Dn: move S/Dn into S/Dd
fmov S/Dd, #fpimm: move immediate value into S/Dd (pretty limited must be of the form \(\pm n / 16 \times 2^r\) where \(n \in [16, 31]\) and \(r \in [-3, 4]\))
fcvt Sd, Dn: Convert Dn to single prec. Sd
fcvt Dd, Sn: Convert Sn to double prec. Dd
fadd, fsub S/Dd, S/Dn, S/Dm: Add or subtract S/Dn, S/Dm, place result in S/Dd
fmul S/Dd, S/Dn, S/Dm: S/Dd = S/Dn * S/Dm
fdiv S/Dd, S/Dn, S/Dm: S/Dd = S/Dn / S/Dm
fmadd S/Dd, S/Dn, S/Dm, S/Da: S/Dd = S/Da + S/Dn * S/Dm

Floating Point Arithmetic

fabs S/Dd, S/Dn: S/Dd = abs(S/Dn)
fneg S/Dd, S/Dn: S/Dd = -(S/Dn)
fcmp S/Dn, S/Dm: Compares two registers, stores result in condition flags
fcmp S/Dn, #fpimm: Compares register and immediate value, stores result in condition flags

An example...

Taylor Series

Recall the taylor series of an infinitely differentiable function (i.e. holomorphic or analytic function)

f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(a)}{n!}(x-a)^n

f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(a)}{n!}(x-a)^n

Or its Maclaurin expansion

f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(0)}{n!}x^n

f(x) = \displaystyle\sum^\infty_{n=0} \frac{f^{(n)}(0)}{n!}x^n

For \(e^x\) we have

e^x = \displaystyle\sum^\infty_{n=0} \frac{x^n }{n!}

e^x = \displaystyle\sum^\infty_{n=0} \frac{x^n }{n!}

Taylor Series

We can stop after some amount of terms in the sum

e^x \approx \displaystyle\sum^j_{n=0} \frac{x^n }{n!}

e^x \approx \displaystyle\sum^j_{n=0} \frac{x^n }{n!}

In a machine, eventually we start to lose precision, so we stop after the terms stop decreasing the approximation beyond a threshold

Taylor Series

#include <math.h>
#include <stdio.h>

double taylor_exp(double x) {
    double n = 0;
    double nfact = 1;
    double exp, exp_old;
    double xx = 1;

    exp = 1.0;

    do {
        n += 1.0;
        nfact *= n;
        xx *= x;

        exp_old = exp;
        exp += xx/nfact;

    } while(fabs(exp_old - exp) > 1e-10);
    return exp;
}

int main(int argc, char *argv[]) {
    printf("%f\n", taylor_exp(0.0));
    printf("%f\n", taylor_exp(1.0));
    printf("%f\n", taylor_exp(2.0));
}

Taylor Series

.data
fmt1: .string "%f\n"
threshold: .double 0r1e-10 // A double prec. constant

.text
.balign 4
.global main
main:
   stp x29, x30, [sp, -16]!
   mov x29, sp

   fmov d0, xzr
   bl taylor_exp

   adrp x0, fmt1
   add x0, x0, :lo12:fmt1
   bl printf

   fmov d0, 1.0
   bl taylor_exp

   adrp x0, fmt1
   add x0, x0, :lo12:fmt1
   bl printf

   fmov d0,2.0
   bl taylor_exp

   adrp x0, fmt1
   add x0, x0, :lo12:fmt1
   bl printf

   ldp x29, x30, [sp], 16
   ret

Taylor Series

taylor_exp: // d0 is the first argument
n_s       = 16
nfact_s   = 24
exp_s     = 32
exp_old_s = 40
xx_s      = 48

   stp x29, x30, [sp, -64]! // Standard stack setup/teardown
   mov x29, sp

   fmov d1, xzr             // Convert 0 to a double
   str d1, [x29, n_s]       // n = 0.0

   fmov d1, 1.0
   str d1, [x29, nfact_s]   // nfact = 1.0
   str d1, [x29, xx_s]      // xx = 1.0
   str d1, [x29, exp_s]     // exp = 1.0

exp_do_top:
   ldr d1, [x29, n_s]       // loading and storing to addrs works just as it does for x       
   fmov d2, 1.0             
   fadd d1, d1, d2          // d1 = d1 + 1
   str d1, [x29, n_s]       // n = d1

   // Build up the factorial over each iteration of the loop
   ldr d2, [x29, nfact_s]
   fmul d2, d2, d1
   str d2, [x29, nfact_s]   // nfact = nfact * n = n * (n-1)!

   ldr d3, [x29, xx_s]
   fmul d3, d0, d3
   str d3, [x29, xx_s]      // xx = x*xx = x^n

Taylor Series


   ldr d4, [x29, exp_s]
   str d4, [x29, exp_old_s] // exp_old = exp
   fdiv d2, d3, d2          // d2 = x^n / nfact
   fadd d5, d4, d2          // d5 = exp + x^n/nfact
   str d5, [x29, exp_s]     // exp = d5

   ldr d1, [x29, exp_s]
   ldr d2, [x29, exp_old_s]
   fsub d1, d1, d2          // d1 = exp - exp_old
   fabs d1, d1              // d1 = fabs(exp - exp_old)

   adrp x0, threshold       // Load the addr of the threshold
   add x0, x0, :lo12: threshold
   ldr d2, [x0]             // load threshold value into d2

   fcmp d1, d2              // fabs(exp - exp_old) cmp threshold
   b.gt exp_do_top          // if it still contributes to the upper 10 digits, keep going

   ldr d0, [x29, exp_s]     // load ~exp(x) into d0 (return value)
   ldp x29, x30, [sp], 64   // stack teardown
   ret

Next Day

Floating point stuff/Work Day