Principles of Computer Systems
Winter 2021
Stanford University
Computer Science Department
Lecturer: Chris Gregg and
Nick Troccoli
Layering: decomposing systems into components with well-defined responsibilities, specifying repcise APIs between them (above and below)
$cat people.txt | sort | uniq > list.txt vnode abstraction of a file within the kernelprocess control blocks, and they are stored in the process table
file descriptor tableread, write, and close)$ cat in.txt > out.txt works)$ ./main 1> log.txt 2> log.txt
$ ./main 1> log.txt 2>&1
Opens log.txt twice (two file table entries)
Opens log.txt once, two descriptors for same file table entry
// file: testfd.c
#include <stdio.h>
#include <unistd.h>
#include <string.h>
int main(int argc, char **argv)
{
const char* error = "One plus one is\ntwo.\n";
const char* msg = "One plus two is\n";
write(2, error, strlen(error));
write(1, msg, strlen(msg));
return 0;
}1
2
pos: 0
pos: 0
log.txt
fd table
file table
vnode
1
2
pos: 0
log.txt
fd table
file table
vnode
cgregg@myth60:$ ./testfd 1> log.txt 2> log.txt
cgregg@myth60:$ cat log.txt
One plus two is
two.
cgregg@myth60:$cgregg@myth60:$ ./testfd 1> log.txt 2>&1
cgregg@myth60:$ cat log.txt
One plus one is
two.
One plus two is
cgregg@myth60:$bash shell calls make, which itself calls g++, each of them inserts text into the same terminal window: those three files could be stdin, stdout, and stderr for a terminal0xFFFFFFFFFFFFFFFF
0x0
libc.so
bash
heap
stack
libdl.so
data
read() and write() operate on the buffer cachesync() system call flushes buffers associated with filelibc.so
libc.so
Process A
Process B
Buffer Cache
mmap()
libc.so
libc.so
Process A
Process B
Buffer Cache
Program: code you write to execute tasks
Process: an instance of your program running; consists of program and execution state.
Key idea: multiple processes can run the same program
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
printf("Goodbye!\n");
return 0;
}Process 5621
Your computer runs many processes simultaneously - even with just 1 processor core (how?)
When you run a program from the terminal, it runs in a new process.
// getpid.c
#include <stdio.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
pid_t myPid = getpid();
printf("My process ID is %d\n", myPid);
return 0;
}$ ./getpid
My process ID is 18814
$ ./getpid
My process ID is 18831$ ./myprogram
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
$ ./myprogram
Hello, world!fork()
fork() creates a second process that is a clone of the first:
pid_t fork();int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
$ ./myprogram
Hello, world!fork()
fork() creates a second process that is a clone of the first:
pid_t fork();int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process B
$ ./myprogram
Hello, world!
Goodbye!
Goodbye!fork()
fork() creates a second process that is a clone of the first:
pid_t fork();int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}Process B
$ ./myprogram2fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}$ ./myprogram2
Hello, world!fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}$ ./myprogram2
Hello, world!fork()
fork() creates a second process that is a clone of the first:
pid_t fork();Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}$ ./myprogram2
Hello, world!
Goodbye, 2!
Goodbye, 2!fork()
fork() creates a second process that is a clone of the first:
pid_t fork();Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}fork()
fork() creates a second process that is a clone of the first:
pid_t fork();fork()
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}(Am I the parent or the child?)
Is there a way for the processes to tell which is the parent and which is the child?
Key Idea: the return value of fork() is different in the parent and the child.
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();$ ./myprogramfork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111OR
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111OR
We can no longer assume the order in which our program will execute! The OS decides the order.
fork()
// basic-fork.c
int main(int argc, char *argv[]) {
printf("Greetings from process %d! (parent %d)\n", getpid(), getppid());
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
printf("Bye-bye from process %d! (parent %d)\n", getpid(), getppid());
return 0;
}$ ./basic-fork
Greetings from process 29686! (parent 29351)
Bye-bye from process 29686! (parent 29351)
Bye-bye from process 29687! (parent 29686)
$ ./basic-fork
Greetings from process 29688! (parent 29351)
Bye-bye from process 29689! (parent 29688
Bye-bye from process 29688! (parent 29351)How do I debug two processes at once? gdb has built-in support for debugging multiple processes
set detach-on-fork off
gdb to capture any fork'd processes, though it pauses them upon the fork.
info inferiors
gdb has captured.inferior X
detach inferior X
gdb to stop watching the process, and continue itbasic-fork program right here.Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Parent
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 1
Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 1
Parent
Child 3
GChild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}GChild 3
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}GGChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Observations:
a is printed by the original process.$ ./fork-puzzle
a
b
c
b
c
c
c$ ./fork-puzzle
a
b
c
b
c
c
$ c
What happened here?
$ ./fork-puzzle
a
b
b
c
c
c
cQuestions:
Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
-> 4: parent, child 1, child 2, Gchild 1
-> Child 3, GGchild 1, Gchild 3, Gchild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Next time: the power of the fork()
Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
From earlier fork tree: