Principles of Computer Systems
Winter 2021
Stanford University
Computer Science Department
Lecturer: Chris Gregg and
Nick Troccoli
Layering: decomposing systems into components with well-defined responsibilities, specifying repcise APIs between them (above and below)
$cat people.txt | sort | uniq > list.txt
vnode
abstraction of a file within the kernelprocess control blocks
, and they are stored in the process table
file descriptor table
read, write,
and close
)$ cat in.txt > out.txt
works)$ ./main 1> log.txt 2> log.txt
$ ./main 1> log.txt 2>&1
Opens log.txt twice (two file table entries)
Opens log.txt once, two descriptors for same file table entry
// file: testfd.c
#include <stdio.h>
#include <unistd.h>
#include <string.h>
int main(int argc, char **argv)
{
const char* error = "One plus one is\ntwo.\n";
const char* msg = "One plus two is\n";
write(2, error, strlen(error));
write(1, msg, strlen(msg));
return 0;
}
1
2
pos: 0
pos: 0
log.txt
fd table
file table
vnode
1
2
pos: 0
log.txt
fd table
file table
vnode
cgregg@myth60:$ ./testfd 1> log.txt 2> log.txt
cgregg@myth60:$ cat log.txt
One plus two is
two.
cgregg@myth60:$
cgregg@myth60:$ ./testfd 1> log.txt 2>&1
cgregg@myth60:$ cat log.txt
One plus one is
two.
One plus two is
cgregg@myth60:$
bash
shell calls make
, which itself calls g++
, each of them inserts text into the same terminal window: those three files could be stdin, stdout, and stderr for a terminal0xFFFFFFFFFFFFFFFF
0x0
libc.so
bash
heap
stack
libdl.so
data
read()
and write()
operate on the buffer cachesync()
system call flushes buffers associated with filelibc.so
libc.so
Process A
Process B
Buffer Cache
mmap()
libc.so
libc.so
Process A
Process B
Buffer Cache
Program: code you write to execute tasks
Process: an instance of your program running; consists of program and execution state.
Key idea: multiple processes can run the same program
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
printf("Goodbye!\n");
return 0;
}
Process 5621
Your computer runs many processes simultaneously - even with just 1 processor core (how?)
When you run a program from the terminal, it runs in a new process.
// getpid.c
#include <stdio.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
pid_t myPid = getpid();
printf("My process ID is %d\n", myPid);
return 0;
}
$ ./getpid
My process ID is 18814
$ ./getpid
My process ID is 18831
$ ./myprogram
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
$ ./myprogram
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
$ ./myprogram
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process B
$ ./myprogram
Hello, world!
Goodbye!
Goodbye!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process B
$ ./myprogram2
fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
Goodbye, 2!
Goodbye, 2!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
fork()
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
(Am I the parent or the child?)
Is there a way for the processes to tell which is the parent and which is the child?
Key Idea: the return value of fork() is different in the parent and the child.
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
$ ./myprogram
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111
OR
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111
OR
We can no longer assume the order in which our program will execute! The OS decides the order.
fork()
// basic-fork.c
int main(int argc, char *argv[]) {
printf("Greetings from process %d! (parent %d)\n", getpid(), getppid());
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
printf("Bye-bye from process %d! (parent %d)\n", getpid(), getppid());
return 0;
}
$ ./basic-fork
Greetings from process 29686! (parent 29351)
Bye-bye from process 29686! (parent 29351)
Bye-bye from process 29687! (parent 29686)
$ ./basic-fork
Greetings from process 29688! (parent 29351)
Bye-bye from process 29689! (parent 29688
Bye-bye from process 29688! (parent 29351)
How do I debug two processes at once? gdb
has built-in support for debugging multiple processes
set detach-on-fork off
gdb
to capture any fork
'd processes, though it pauses them upon the fork
.
info inferiors
gdb
has captured.inferior X
detach inferior X
gdb
to stop watching the process, and continue itbasic-fork
program right here.Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Parent
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
Parent
Child 3
GChild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
GChild 3
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
GGChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Observations:
a
is printed by the original process.$ ./fork-puzzle
a
b
c
b
c
c
c
$ ./fork-puzzle
a
b
c
b
c
c
$ c
What happened here?
$ ./fork-puzzle
a
b
b
c
c
c
c
Questions:
Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
-> 4: parent, child 1, child 2, Gchild 1
-> Child 3, GGchild 1, Gchild 3, Gchild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Next time: the power of the fork()
Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
From earlier fork tree: