CS110: Principles of Computer Systems
Winter 2021-2022
Stanford University
Instructors: Nick Troccoli and Jerry Cain
Illustration courtesy of Roz Cyrus.
Creating processes and running other programs
Inter-process communication
Signals
Race Conditions
assign3: implement multiprocessing programs like "trace" (to trace another program's behavior) and "farm" (parallelize tasks)
assign4: implement your own shell!
Today's Ed Thread: https://edstem.org/us/courses/16701/discussion/1002824
Today's Ed Thread: https://edstem.org/us/courses/16701/discussion/1002824
Program: code you write to execute tasks
Process: an instance of your program running; consists of program and execution state.
Key idea: multiple processes can run the same program
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
printf("Goodbye!\n");
return 0;
}
Process 5621
Your computer runs many processes simultaneously - even with just 1 processor core (how?)
When you run a program from the terminal, it runs in a new process.
// getpid.c
#include <stdio.h>
#include <unistd.h>
int main(int argc, char *argv[]) {
pid_t myPid = getpid();
printf("My process ID is %d\n", myPid);
return 0;
}
$ ./getpid
My process ID is 18814
$ ./getpid
My process ID is 18831
Today's Ed Thread: https://edstem.org/us/courses/16701/discussion/1002824
$ ./myprogram
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
$ ./myprogram
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
$ ./myprogram
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process B
$ ./myprogram
Hello, world!
Goodbye!
Goodbye!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process A
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
fork();
printf("Goodbye!\n");
return 0;
}
Process B
$ ./myprogram2
fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
Process A
pid_t fork();
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
$ ./myprogram2
Hello, world!
Goodbye, 2!
Goodbye, 2!
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
Illustration courtesy of Roz Cyrus.
The parent process’ file descriptor table is cloned on fork and the reference counts within the relevant open file table entries are incremented. This explains how the child can still output to the same terminal!
Illustration courtesy of Roz Cyrus.
fork()
Process B
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
Process A
int main(int argc, char *argv[]) {
int x = 2;
printf("Hello, world!\n");
fork();
printf("Goodbye, %d!\n", x);
return 0;
}
(Am I the parent or the child?)
Is there a way for the processes to tell which is the parent and which is the child?
Key Idea: the return value of fork() is different in the parent and the child.
fork()
fork() creates a second process that is a clone of the first:
pid_t fork();
$ ./myprogram
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork();
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
$ ./myprogram2
Hello, world!
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111
OR
$ ./myprogram
Hello, world!
fork returned 111
fork returned 0
fork()
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 111
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 110
int main(int argc, char *argv[]) {
printf("Hello, world!\n");
pid_t pidOrZero = fork(); // 0
printf("fork returned %d\n", pidOrZero);
return 0;
}
Process 111
$ ./myprogram
Hello, world!
fork returned 0
fork returned 111
OR
We can no longer assume the order in which our program will execute! The OS decides the order.
fork()
// basic-fork.c
int main(int argc, char *argv[]) {
printf("Greetings from process %d! (parent %d)\n", getpid(), getppid());
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
printf("Bye-bye from process %d! (parent %d)\n", getpid(), getppid());
return 0;
}
$ ./basic-fork
Greetings from process 29686! (parent 29351)
Bye-bye from process 29686! (parent 29351)
Bye-bye from process 29687! (parent 29686)
$ ./basic-fork
Greetings from process 29688! (parent 29351)
Bye-bye from process 29689! (parent 29688
Bye-bye from process 29688! (parent 29351)
What happens to variables and addresses?
int main(int argc, char *argv[]) {
char str[128];
strcpy(str, "Hello");
printf("str's address is %p\n", str);
pid_t pid = fork();
if (pid == 0) {
// The child should modify str
printf("I am the child. str's address is %p\n", str);
strcpy(str, "Howdy");
printf("I am the child and I changed str to %s. str's address is still %p\n", str, str);
} else {
// The parent should sleep and print out str
printf("I am the parent. str's address is %p\n", str);
printf("I am the parent, and I'm going to sleep for 2 seconds.\n");
sleep(2);
printf("I am the parent. I just woke up. str's address is %p, and its value is %s\n", str, str);
}
return 0;
}
How can the parent and child use the same address to store different data?
$ ./fork-copy
str's address is 0x7ffc8cfa9990
I am the parent. str's address is 0x7ffc8cfa9990
I am the parent, and I'm going to sleep for 2 seconds.
I am the child. str's address is 0x7ffc8cfa9990
I am the child and I changed str to Howdy. str's address is still 0x7ffc8cfa9990
I am the parent. I just woke up. str's address is 0x7ffc8cfa9990, and its value is Hello
Isn't it expensive to make copies of all memory when forking?
$ ./fork-copy
str's address is 0x7ffc8cfa9990
I am the parent. str's address is 0x7ffc8cfa9990
I am the parent, and I'm going to sleep for 2 seconds.
I am the child. str's address is 0x7ffc8cfa9990
I am the child and I changed str to Howdy. str's address is still 0x7ffc8cfa9990
I am the parent. I just woke up. str's address is 0x7ffc8cfa9990, and its value is Hello
Key Idea: all state is copied from the parent to the child, even the random number generator seed! Both the parent and child will get the same return value from random().
int main(int argc, char *argv[]) {
// Initialize the random number with a "seed value"
// this seed state is used to generate future random numbers
srandom(time(NULL));
printf("This program will make you question what 'randomness' means...\n");
pid_t pidOrZero = fork();
// Parent goes first - both processes *always* get the same roll (why?)
if (pidOrZero != 0) {
int diceRoll = (random() % 6) + 1;
printf("I am the parent and I rolled a %d\n", diceRoll);
sleep(1);
} else {
sleep(1);
int diceRoll = (random() % 6) + 1;
printf("I am the child and I'm guessing the parent rolled a %d\n", diceRoll);
}
return 0;
}
How do I debug two processes at once? gdb
has built-in support for debugging multiple processes
set detach-on-fork off
gdb
to capture any fork
'd processes, though it pauses them upon the fork
.
info inferiors
gdb
has captured.inferior X
detach inferior X
gdb
to stop watching the process, and continue itbasic-fork
program right here.Today's Ed Thread: https://edstem.org/us/courses/16701/discussion/1002824
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Parent
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
Parent
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 1
Parent
Child 3
GChild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
GChild 3
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
GGChild 1
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Here's a useful (but mind-melting) example of a program where child processes themselves call fork():
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Observations:
a
is printed by the original process.$ ./fork-puzzle
a
b
c
b
c
c
c
$ ./fork-puzzle
a
b
c
b
c
c
$ c
What happened here?
$ ./fork-puzzle
a
b
b
c
c
c
c
Questions:
Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
-> 4: parent, child 1, child 2, Gchild 1
-> Child 3, GGchild 1, Gchild 3, Gchild 2
// fork-puzzle.c
static const char *kTrail = "abc";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Today's Ed Thread: https://edstem.org/us/courses/16701/discussion/1002824
waitpid()
A function that a parent can call to wait for its child to exit:
pid_t waitpid(pid_t pid, int *status, int options);
waitpid()
int main(int argc, char *argv[]) {
printf("Before.\n");
pid_t pidOrZero = fork();
if (pidOrZero == 0) {
sleep(2);
printf("I (the child) slept and the parent still waited up for me.\n");
} else {
pid_t result = waitpid(pidOrZero, NULL, 0);
printf("I (the parent) finished waiting for the child. This always prints last.\n");
}
return 0;
}
$ ./waitpid
Before.
I (the child) slept and the parent still waited up for me.
I (the parent) finished waiting for the child. This always prints last.
$
waitpid()
The output will be the same every time! The parent will always wait for the child to finish before continuing.
int main(int argc, char *argv[]) {
pid_t pid = fork();
if (pid == 0) {
printf("I'm the child, and the parent will wait up for me.\n");
return 110; // contrived exit status (not a bad number, though)
} else {
int status;
int result = waitpid(pid, &status, 0);
if (WIFEXITED(status)) {
printf("Child exited with status %d.\n", WEXITSTATUS(status));
} else {
printf("Child terminated abnormally.\n");
}
return 0;
}
}
Pass in the address of an integer as the second parameter to get the child's status.
$ ./separate
I am the child, and the parent will wait up for me.
Child exited with status 110.
$
Next time: more waitpid(), execvp() and writing our first shell program
int main(int argc, char *argv[]) {
printf("Starting the program\n");
pid_t pidOrZero1 = fork();
pid_t pidOrZero2 = fork();
if (pidOrZero1 != 0 && pidOrZero2 != 0) {
printf("Hello\n");
}
if (pidOrZero2 != 0) {
printf("Hi there\n");
}
return 0;
}
How many processes run in total?
a) 1 b) 2 c) 3 d) 4
How many times is "Hello" printed?
a) 1 b) 2 c) 3 d) 4
How many times is "Hi there" printed?
a) 1 b) 2 c) 3 d) 4
Parent
pidOrZero1 = nonzero
pidOrZero2 = nonzero
int main(int argc, char *argv[]) {
printf("Starting the program\n");
pid_t pidOrZero1 = fork();
pid_t pidOrZero2 = fork();
if (pidOrZero1 != 0 && pidOrZero2 != 0) {
printf("Hello\n");
}
if (pidOrZero2 != 0) {
printf("Hi there\n");
}
return 0;
}
First Child
pidOrZero1 = 0
pidOrZero2 = nonzero
Grandchild
pidOrZero1 = 0
pidOrZero2 = 0
Second Child
pidOrZero1 = nonzero
pidOrZero2 = 0
Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Questions:
// fork-puzzle-full.c
static const char *kTrail = "abcd";
int main(int argc, char *argv[]) {
for (int i = 0; i < strlen(kTrail); i++) {
printf("%c\n", kTrail[i]);
pid_t pidOrZero = fork();
assert(pidOrZero >= 0);
}
return 0;
}
Child 2
GChild 1
Child 1
Parent
Child 3
GChild 2
GChild 3
GGChild 1
From earlier fork tree: