Principles of Computer Systems
Spring 2019
Stanford University
Computer Science Department
Lecturer: Chris Gregg
// job-list-fixed.c
static void reapProcesses(int sig) {
while (true) {
pid_t pid = waitpid(-1, NULL, WNOHANG);
if (pid <= 0) break;
printf("Job %d removed from job list.\n", pid);
}
}
char * const kArguments[] = {"date", NULL};
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGCHLD);
for (size_t i = 0; i < 3; i++) {
sigprocmask(SIG_BLOCK, &set, NULL);
pid_t pid = fork();
if (pid == 0) {
sigprocmask(SIG_UNBLOCK, &set, NULL);
execvp(kArguments[0], kArguments);
}
sleep(1); // force parent off CPU
printf("Job %d added to job list.\n", pid);
sigprocmask(SIG_UNBLOCK, &set, NULL);
}
return 0;
}
execvp had a child of its own, and that child ended.execvp all of the original code is gone. Therefore, the signal handler cannot be called, because it doesn't exist any longer.execvp boundary.int kill(pid_t pid, int signum);
int raise(int signum); // equivalent to kill(getpid(), signum);kill and raise
kill system call. And processes can even send themselves signals using raise.kill system call is analogous to the /bin/kill shell command.
kill implies SIGKILL implies death.kill and raise. Just make sure you call it properly.pid parameter is overloaded to provide more flexible signaling.
pid is a positive number, the target is the process with that pid.pid is a negative number less than -1, the targets are all processes within the process group abs(pid). We'll rely on this in Assignment 4.pid can also be 0 or -1, but we don't need to worry about those. See the man page for kill if you're curious.job-list-broken and job-list-fixed examples from the prior slide deck highlight a key issue that comes with the introduction of signals and signal handling.
job-list-broken nor job-list-fixed can anticipate when a child process will finish up. That means it has no control over when SIGCHLD signals arrive.SIGCHLD signals.
SIGCHLD handlers to surface information about what process exited. We've seen a lot of that already.job-list-fixed program does.simplesh example from last week. The full program is right here.
simplesh, we had no choice, because we hadn't learned about signals or signal handlers yet.// simplesh.c
int main(int argc, char *argv[]) {
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) execvp(argv[0], argv);
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitpid(pid, NULL, 0);
}
}
printf("\n");
return 0;
}
// simplesh-with-redundancy.c
static void reapProcesses(int sig) {
while (waitpid(-1, NULL, WNOHANG) > 0) {;} // nonblocking, iterate until retval is -1 or 0
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) {
execvp(argv[0], argv);
printf("%s: Command not found\n", argv[0]);
exit(0);
}
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitpid(pid, NULL, 0);
}
}
printf("\n");
return 0;
}waitpid to halt the shell until its foreground process has exited.
waitpid call to block until that process has terminated.SIGCHLD handler is invoked, and its waitpid call is the one that culls the foreground process's resources.SIGCHLD handler exits, normal execution resumes, and the original call to waitpid returns -1 to state that there is no trace of a process with the supplied pid.waitpid from main just to block until the foreground process vanishes.waitpid—i.e. invoking a system call when you know it will fail—the waitpid call is redundant and replicates functionality better managed in the SIGCHLD handler.
waitpid in one place: the SIGCHLD handler.stsh) where multiple processes are running in the foreground as part of a pipeline (e.g. more words.txt | tee copy.txt | sort | uniq)waitpid from only one place.// simplesh-with-race-and-spin.c
static pid_t fgpid = 0; // global, intially 0, and 0 means no foreground process
static void reapProcesses(int sig) {
while (true) {
pid_t pid = waitpid(-1, NULL, WNOHANG);
if (pid <= 0) break;
if (pid == fgpid) fgpid = 0; // clear foreground process
}
}
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
while (fgpid == pid) {;}
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
pid_t pid = fork();
if (pid == 0) execvp(argv[0], argv);
if (isbg) {
printf("%d %s\n", pid, command);
} else {
waitForForegroundProcess(pid);
}
}
printf("\n");
return 0;
}
fgpid to hold the process is of the foreground process. When there's no foreground process, fgpid is 0.
reapProcesses, we have to choice but to make fgpid a global.fgpid is set to hold that process's pid. The shell then blocks by spinning in place until fgpid is cleared by reapProcesses.waitpid code to reside in the handler and nowhere else.reapProcesses is invoked on its behalf before normal execution flow updates fgpid. If that happens, the shell will spin forever and never advance up to the shell prompt. This is a race condition, and race conditions are no-nos.while (fgpid == pid) {;} is also a no-no. This allows the shell to spin on the CPU even when it can't do any meaningful work.
simplesh to yield the CPU and to only be considered for CPU time when there's a chance the foreground process has exited.SIGCHLD before forking, and only lifting that block after the global fgpid has been set.
// simplesh-with-spin.c
// code for reapProcesses omitted, because it's the same as before
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
unblockSIGCHLD(); // lift only after fgpid has been set
while (fgpid == pid) {;}
}
int main(int argc, char *argv[]) {
signal(SIGCHLD, reapProcesses);
while (true) {
// code to initialize command, argv, and isbg omitted for brevity
blockSIGCHLD();
pid_t pid = fork();
if (pid == 0) {
unblockSIGCHLD();
execvp(argv[0], argv);
}
if (isbg) {
printf("%d %s\n", pid, command);
unblockSIGCHLD();
} else {
waitForForegroundProcess(pid);
}
}
}
// simples-utils.c
// includes a collection of helper functions
static void toggleSIGCHLDBlock(int how) {
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
sigprocmask(how, &mask, NULL);
}
void blockSIGCHLD() {
toggleSIGCHLDBlock(SIG_BLOCK);
}
void unblockSIGCHLD() {
toggleSIGCHLDBlock(SIG_UNBLOCK);
}Note that we call unblockSIGCHLD in the child, before the execvp call. We do so, because the child will otherwise inherit the signal block.
blockSIGCHLD before fork, and we don't lift the block until fgpid has been set to the pid of the new foreground process.unblockSIGCHLD in the child right before the execvp call.
fork and rely on SIGCHLD signals and signal handling.unblockSIGCHLD, the child process inherits the SIGCHLD block across the execvp boundary. That would compromise the child ability to work properly.unblockSIGCHLD for background processes. We do so after bookkeeping information is printf-ed to the screen, as we did for job-list-fixed.while (fgpid == pid) {;}while (fgpid == pid) {usleep(100000);}, as we have in this version.usleep call will push the shell off the CPU every time it realizes it shouldn't have gotten it in the first place. But we'd really prefer to keep the shell off the CPU until the OS has some information suggesting the foreground process is done.pause function, which forces the process to sleep until some unblocked signal arrives. This sounds promising, because we know fgpid can only be changed because a SIGCHLD signal comes in and reapProcesses is executed.
simplesh whose waitForForegroundProcess implementation relies on pause is presented below on the left.SIGCHLD may arrive after fgpid == pid evaluates to true but before the call to pause it's committed to. That would be unfortunate, because it's possible simplesh isn't managing any other processes, which means that no other signals, much less SIGCHLD signals, will arrive to lift simplesh out of its pause call. That would leave simplesh in a state of deadlock.// simplesh-with-pause-1.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
unblockSIGCHLD();
while (fgpid == pid) {
pause();
}
}// simplesh-with-pause-2.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
while (fgpid == pid) {
unblockSIGCHLD();
pause();
blockSIGCHLD();
}
unblockSIGCHLD();
}
waitForForegroundProcess on the prior slide is that each lifts the block on SIGCHLD before going to sleep via pause.SIGCHLD you're relying on to notify the parent that the child has finished could very well arrive in the narrow space between lift and sleep. That would inspire deadlock.pause called sigsuspend, which asks that the OS change the blocked set to the one provided, but only after the caller has been forced off the CPU. When some unblocked signal arrives, the process gets the CPU, the signal is handled, the original blocked set is restored, and sigsuspend returns.farm and your Assignment 4 stsh.// simplesh-all-better.c
static void waitForForegroundProcess(pid_t pid) {
fgpid = pid;
sigset_t empty;
sigemptyset(&empty);
while (fgpid == pid) {
sigsuspend(&empty);
}
unblockSIGCHLD();
}
printf calls succeed, and that all calls to printf are atomic. Assume nothing about scheduling or time slice durations.static void bat(int unused) {
printf("pirate\n");
exit(0);
}
int main(int argc, char *argv[]) {
signal(SIGUSR1, bat);
pid_t pid = fork();
if (pid == 0) {
printf("ghost\n");
return 0;
}
kill(pid, SIGUSR1);
printf("ninja\n"); return 0;
}
printf calls succeed, and that all calls to printf are atomic. Assume nothing about scheduling or time slice durations.static void bat(int unused) {
printf("pirate\n");
exit(0);
}
int main(int argc, char *argv[]) {
signal(SIGUSR1, bat);
pid_t pid = fork();
if (pid == 0) {
printf("ghost\n");
return 0;
}
kill(pid, SIGUSR1);
printf("ninja\n"); return 0;
}
printf calls succeed, and that all calls to printf are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}printf calls succeed, and that all calls to printf are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}> of the counter > 0 test is changed to a >=, then counter values of zeroes would be included in each possible output. How many different outputs are now possible? (No need to list the outputs—just present the number.)printf calls succeed, and that all calls to printf are atomic. Assume nothing about scheduling or time slice durations.int main(int argc, char *argv[]) {
pid_t pid;
int counter = 0;
while (counter < 2) {
pid = fork();
if (pid > 0) break;
counter++;
printf("%d", counter);
}
if (counter > 0) printf("%d", counter);
if (pid > 0) {
waitpid(pid, NULL, 0);
counter += 5;
printf("%d", counter);
}
return 0;
}> of the counter > 0 test is changed to a >=, then counter values of zeroes would be included in each possible output. How many different outputs are now possible? (No need to list the outputs—just present the number.)