Spring 2021
Instructors Roz Cyrus and Jerry Cain
PDF
int main(int argc, char *argv[]) {
int fdin = open(argv[1], O_RDONLY);
int fdout = open(argv[2], O_WRONLY | O_CREAT | O_EXCL, 0644);
char buffer[1024];
while (true) {
ssize_t bytesRead = read(fdin, buffer, sizeof(buffer));
if (bytesRead == 0) break;
size_t bytesWritten = 0;
while (bytesWritten < bytesRead) {
bytesWritten += write(fdout, buffer + bytesWritten, bytesRead - bytesWritten);
}
}
close(fdin);
close(fdout)
return 0;
}
tee
program that ships with Linux copies everything from standard input to standard output, making zero or more extra copies in the named files supplied as user program arguments.
one.txt
, two.txt
, and three.txt
.If the file vowels.txt
contains the five vowels and the newline character, and tee
is invoked as follows, one.txt
would be rewritten to contain only the English vowels.
$ cat vowels.txt | ./tee one.txt
aeiou
$ cat one.txt
aeiou
t
executable, with error checking, is right here.$ cat alphabet.txt | tee one.txt two.txt three.txt
abcdefghijklmnopqrstuvwxyz
$ cat one.txt
abcdefghijklmnopqrstuvwxyz
$ cat two.txt
abcdefghijklmnopqrstuvwxyz
$ diff one.txt two.txt
$ diff one.txt three.txt
$
int main(int argc, char *argv[]) {
int fds[argc];
fds[0] = STDOUT_FILENO;
for (size_t i = 1; i < argc; i++)
fds[i] = open(argv[i], O_WRONLY | O_CREAT | O_TRUNC, 0644);
char buffer[2048];
while (true) {
ssize_t numRead = read(STDIN_FILENO, buffer, sizeof(buffer));
if (numRead == 0) break;
for (size_t i = 0; i < argc; i++) writeall(fds[i], buffer, numRead);
}
for (size_t i = 1; i < argc; i++) close(fds[i]);
return 0;
}
static void writeall(int fd, const char buffer[], size_t len) {
size_t numWritten = 0;
while (numWritten < len) {
numWritten += write(fd, buffer + numWritten, len - numWritten);
}
}
argc
incidentally equals the number of descriptors we need to write to. That's why we declare an int array (or rather, a descriptor array) of length argc
.STDIN_FILENO
is a built-in constant for the number 0, which is the descriptor normally linked to standard input. STDOUT_FILENO
is a constant for the number 1, which is the default descriptor bound to standard output.stat
and lstat
are system calls that populate a struct
stat
with information about some named file. The prototypes of the two are:int stat(const char *pathname, struct stat *st);
int lstat(const char *pathname, struct stat *st);
stat
and lstat
operate exactly the same way, except when the named file is a link, stat
returns information about the file the link ultimately references, and lstat
returns information about the link itself.struct stat {
dev_t st_dev; // id of device containing file
ino_t st_ino; // id of data structure on device
mode_t st_mode; // mode of file
// many other fields (file size, create time, etc.)
};
st_mode
field—which is the only one we'll really pay much attention to—isn't so much a single value as it is a collection of bits encoding multiple pieces of information about file type and permissions. A collection of bit masks and macros can be used to extract information from this st_mode
field.poohbear@myth53$ find /usr/include -name stdio.h -print
/usr/include/x86_64-linux-gnu/bits/stdio.h
/usr/include/stdio.h
/usr/include/bsd/stdio.h
/usr/include/c++/7/tr1/stdio.h
/usr/include/c++/10/tr1/stdio.h
/usr/include/c++/8/tr1/stdio.h
/usr/include/c++/9/tr1/stdio.h
poohbear@myth53$
poohbear@myth53$ ./search /usr/include stdio.h
/usr/include/x86_64-linux-gnu/bits/stdio.h
/usr/include/stdio.h
/usr/include/bsd/stdio.h
/usr/include/c++/7/tr1/stdio.h
/usr/include/c++/10/tr1/stdio.h
/usr/include/c++/8/tr1/stdio.h
/usr/include/c++/9/tr1/stdio.h
poohbear@myth53$
int main(int argc, char *argv[]) {
const char *directory = argv[1];
struct stat st;
stat(directory, &st);
if (!S_ISDIR(st.st_mode)) return 0;
size_t length = strlen(directory);
const char *pattern = argv[2];
char path[kMaxPath + 1];
strcpy(path, directory);
// buffer overflow impossible, directory length <= kMaxPath else stat fails
listMatches(path, length, pattern);
return 0;
}
listMatches
makes use of three library functions to iterate over all files within a directory. Let's play with those before tackling listMatches.DIR *opendir(const char *dirname);
struct dirent *readdir(DIR *dirp);
int closedir(DIR *dirp);
static void listEntries(const char *name) {
struct stat st;
stat(name, &st);
if (!S_ISDIR(st.st_mode)) return;
DIR *dir = opendir(name);
while (true) {
struct dirent *de = readdir(dir);
if (de == NULL) break;
printf("+ %s\n", de->d_name);
}
closedir(dir);
}
opendir
gets anything other than an accessible directory, it returns NULL
.de
has surfaced all entries, readdir
returns NULL
.struct
dirent
is only guaranteed to contain a d_name
field, which stores the entry's name as a C string. .
and ..
are included in the sequence of named entries.
listMatches
. static void listMatches(char path[], size_t length, const char *name) {
DIR *dir = opendir(path);
if (dir == NULL) return; // it's a directory, but permission to open was denied
strcpy(path + length++, "/");
while (true) {
struct dirent *de = readdir(dir);
if (de == NULL) break; // we've iterated over every directory entry, so stop
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue;
if (length + strlen(de->d_name) > kMaxPath) continue;
strcpy(path + length, de->d_name);
struct stat st;
lstat(path, &st);
if (S_ISREG(st.st_mode)) {
if (strcmp(de->d_name, name) == 0) printf("%s\n", path);
} else if (S_ISDIR(st.st_mode)) {
listMatches(path, length + strlen(de->d_name), name);
}
}
closedir(dir);
}
.
and ..
, else we're threatened with infinite recursion.
lstat
instead of stat
so we know whether an entry is a link. We ignore all links because, again, we want to avoid infinite recursion.stat
record identifies something as a regular file, we print the entire path if and only if the entry name matches the name of interest.stat
record identifies something as a directory, we recursively dip into it to see if any descendents match name
.