Principles of Computer Systems
Spring 2019
Stanford University
Computer Science Department
Lecturer: Chris Gregg
// job-list-fixed.c
static void reapProcesses(int sig) {
while (true) {
pid_t pid = waitpid(-1, NULL, WNOHANG);
if (pid <= 0) break;
printf("Job %d removed from job list.\n", pid);
}
}
char * const kArguments[] = {"date", NULL};
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGCHLD);
for (size_t i = 0; i < 3; i++) {
sigprocmask(SIG_BLOCK, &set, NULL);
pid_t pid = fork();
if (pid == 0) {
sigprocmask(SIG_UNBLOCK, &set, NULL);
execvp(kArguments[0], kArguments);
}
sleep(1); // force parent off CPU
printf("Job %d added to job list.\n", pid);
sigprocmask(SIG_UNBLOCK, &set, NULL);
}
return 0;
}
execvp
had a child of its own, and that child ended.execvp
all of the original code is gone. Therefore, the signal handler cannot be called, because it doesn't exist any longer.execvp
boundary.int kill(pid_t pid, int signum);
int raise(int signum); // equivalent to kill(getpid(), signum);
kill
and raise
kill
system call. And processes can even send themselves signals using raise
.kill
system call is analogous to the /bin/kill
shell command.
kill
implies SIGKILL
implies death.kill
and raise
. Just make sure you call it properly.pid
parameter is overloaded to provide more flexible signaling.
pid
is a positive number, the target is the process with that pid.pid
is a negative number less than -1, the targets are all processes within the process group abs(pid)
. We'll rely on this in Assignment 4.pid
can also be 0 or -1, but we don't need to worry about those. See the man page for kill
if you're curious.job-list-broken
and job-list-fixed
examples from the prior slide deck highlight a key issue that comes with the introduction of signals and signal handling.
job-list-broken
nor job-list-fixed
can anticipate when a child process will finish up. That means it has no control over when SIGCHLD
signals arrive.SIGCHLD
signals.
SIGCHLD
handlers to surface information about what process exited. We've seen a lot of that already.job-list-fixed
program does.simplesh
example from last week. The full program is right here.
simplesh
, we had no choice, because we hadn't learned about signals or signal handlers yet.// simplesh.c
int main(int argc, char *argv[]) {
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) execvp(argv[0], argv);
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitpid(pid, NULL, 0);
}
}
printf("\n");
return 0;
}
// simplesh-with-redundancy.c
static void reapProcesses(int sig) {
while (waitpid(-1, NULL, WNOHANG) > 0) {;} // nonblocking, iterate until retval is -1 or 0
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) {
execvp(argv[0], argv);
printf("%s: Command not found\n", argv[0]);
exit(0);
}
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitpid(pid, NULL, 0);
}
}
printf("\n");
return 0;
}
waitpid
to halt the shell until its foreground process has exited.
waitpid
call to block until that process has terminated.SIGCHLD
handler is invoked, and its waitpid
call is the one that culls the foreground process's resources.SIGCHLD
handler exits, normal execution resumes, and the original call to waitpid
returns -1 to state that there is no trace of a process with the supplied pid
.waitpid
from main
just to block until the foreground process vanishes.waitpid
—i.e. invoking a system call when you know it will fail—the waitpid
call is redundant and replicates functionality better managed in the SIGCHLD
handler.
waitpid
in one place: the SIGCHLD
handler.stsh
) where multiple processes are running in the foreground as part of a pipeline (e.g. more words.txt | tee copy.txt | sort | uniq
)waitpid
from only one place.// simplesh-with-race-and-spin.c
static pid_t fgpid = 0; // global, intially 0, and 0 means no foreground process
static void reapProcesses(int sig) {
while (true) {
pid_t pid = waitpid(-1, NULL, WNOHANG);
if (pid <= 0) break;
if (pid == fgpid) fgpid = 0; // clear foreground process
}
}
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
while (fgpid == pid) {;}
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) execvp(argv[0], argv);
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitForForegroundProcess(pid);
}
}
printf("\n");
return 0;
}
fgpid
to hold the process is of the foreground process. When there's no foreground process, fgpid
is 0.
reapProcesses
, we have to choice but to make fgpid
a global.fgpid
is set to hold that process's pid. The shell then blocks by spinning in place until fgpid
is cleared by reapProcesses
.waitpid
code to reside in the handler and nowhere else.reapProcesses
is invoked on its behalf before
normal execution flow updates fgpid
. If that happens, the shell will spin forever and never advance up to the shell prompt. This is a race condition, and race conditions are no-nos.while (fgpid == pid) {;}
is also a no-no. This allows the shell to spin on the CPU even when it can't do any meaningful work.
simplesh
to yield the CPU and to only be considered for CPU time when there's a chance the foreground process has exited.SIGCHLD
before forking, and only lifting that block after the global fgpid
has been set.
// simplesh-with-spin.c
// code for reapProcesses omitted, because it's the same as before
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
unblockSIGCHLD(); // lift only after fgpid has been set
while (fgpid == pid) {;}
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
blockSIGCHLD();
pid_t pid = fork();
if (pid == 0) {
unblockSIGCHLD();
execvp(argv[0], argv);
}
if (isbg) {
printf("%d %s\n", pid, command);
unblockSIGCHLD();
} else {
waitForForegroundProcess(pid);
}
}
}
// simples-utils.c
// includes a collection of helper functions
static void toggleSIGCHLDBlock(int how) {
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
sigprocmask(how, &mask, NULL);
}
void blockSIGCHLD() {
toggleSIGCHLDBlock(SIG_BLOCK);
}
void unblockSIGCHLD() {
toggleSIGCHLDBlock(SIG_UNBLOCK);
}
Note that we call unblockSIGCHLD
in the child, before the execvp
call. We do so, because the child will otherwise inherit the signal block.
blockSIGCHLD
before fork
, and we don't lift the block until fgpid
has been set to the pid
of the new foreground process.unblockSIGCHLD
in the child right before the execvp
call.
fork
and rely on SIGCHLD
signals and signal handling.unblockSIGCHLD
, the child process inherits the SIGCHLD
block across the execvp
boundary. That would compromise the child ability to work properly.unblockSIGCHLD
for background processes. We do so after bookkeeping information is printf
-ed to the screen, as we did for job-list-fixed
.while (fgpid == pid) {;}
while (fgpid == pid) {usleep(100000);}
, as we have in this version.usleep
call will push the shell off the CPU every time it realizes it shouldn't have gotten it in the first place. But we'd really prefer to keep the shell off the CPU until the OS has some information suggesting the foreground process is done.pause
function, which forces the process to sleep until some unblocked signal arrives. This sounds promising, because we know fgpid
can only be changed because a SIGCHLD
signal comes in and reapProcesses
is executed.
simplesh
whose waitForForegroundProcess
implementation relies on pause
is presented below on the left.SIGCHLD
may arrive after fgpid == pid
evaluates to true
but before the call to pause
it's committed to. That would be unfortunate, because it's possible simplesh
isn't managing any other processes, which means that no other signals, much less SIGCHLD
signals, will arrive to lift simplesh
out of its pause
call. That would leave simplesh
in a state of deadlock.// simplesh-with-pause-1.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
unblockSIGCHLD();
while (fgpid == pid) {
pause();
}
}
// simplesh-with-pause-2.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
while (fgpid == pid) {
unblockSIGCHLD();
pause();
blockSIGCHLD();
}
unblockSIGCHLD();
}
waitForForegroundProcess
on the prior slide is that each lifts the block on SIGCHLD
before going to sleep via pause
.SIGCHLD
you're relying on to notify the parent that the child has finished could very well arrive in the narrow space between lift and sleep. That would inspire deadlock.pause
called sigsuspend
, which asks that the OS change the blocked set to the one provided, but only after the caller has been forced off the CPU. When some unblocked signal arrives, the process gets the CPU, the signal is handled, the original blocked set is restored, and sigsuspend
returns.farm
and your Assignment 4 stsh
.// simplesh-all-better.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
sigset_t empty;
sigemptyset(&empty);
while (fgpid == pid) {
sigsuspend(&empty);
}
unblockSIGCHLD();
}
printf
calls succeed, and that all calls to printf
are atomic. Assume nothing about scheduling or time slice durations.static void bat(int unused) {
printf("pirate\n");
exit(0);
}
int main(int argc, char *argv[]) {
signal(SIGUSR1, bat);
pid_t pid = fork();
if (pid == 0) {
printf("ghost\n");
return 0;
}
kill(pid, SIGUSR1);
printf("ninja\n"); return 0;
}
printf
calls succeed, and that all calls to printf
are atomic. Assume nothing about scheduling or time slice durations.static void bat(int unused) {
printf("pirate\n");
exit(0);
}
int main(int argc, char *argv[]) {
signal(SIGUSR1, bat);
pid_t pid = fork();
if (pid == 0) {
printf("ghost\n");
return 0;
}
kill(pid, SIGUSR1);
printf("ninja\n"); return 0;
}
printf
calls succeed, and that all calls to printf
are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}
printf
calls succeed, and that all calls to printf
are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}
>
of the counter
> 0
test is changed to a >=
, then counter
values of zeroes would be included in each possible output. How many different outputs are now possible? (No need to list the outputs—just present the number.)printf
calls succeed, and that all calls to printf
are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}
>
of the counter
> 0
test is changed to a >=
, then counter
values of zeroes would be included in each possible output. How many different outputs are now possible? (No need to list the outputs—just present the number.)