Version: 3
System programming
Lecture 2:
Process. Work modes, memory, resources. Interrupts. Communication with the kernel. System calls.
Processes
Hardware
Time
Filesystem
IPC
Network
Users
Data structures
Virtualization
.text
.data
.stack
.heap
.stack
.stack
File descriptors
Signal queue
IPC
Memory
int
main(int argc, char **argv)
{
/* set locale to make iswXXXX function work */
if (setlocale(LC_CTYPE, "C.UTF-8") == NULL &&
setlocale(LC_CTYPE, "en_US.UTF-8") == NULL &&
setlocale(LC_CTYPE, "en_US.utf8") == NULL)
fprintf(stderr, "Failed to set locale to C.UTF-8\n");
fpconv_check();
/* Enter interactive mode after executing 'script' */
bool interactive = false;
/* Lua interpeter options, e.g. -e and -l */
int optc = 0;
char **optv = NULL;
auto guard = make_scoped_guard([=]{ if (optc) free(optv); });
static struct option longopts[] = {
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
{NULL, 0, 0, 0},
};
static const char *opts = "+hVvie:l:";
int ch;
while ((ch = getopt_long(argc, argv, opts, longopts, NULL)) != -1) {
switch (ch) {
case 'V':
case 'v':
print_version();
return 0;
Process is a virtualization mechanism
P1
P2
P1
P2
of processor
of memory
/**
* 30.09.2018
* #include/linux/sched.h
* 618 lines.
*/
struct task_struct {
struct thread_info thread_info;
volatile long state;
void *stack;
atomic_t usage;
unsigned int cpu;
int prio;
struct mm_struct *mm;
int exit_state;
int exit_code;
int exit_signal;
pid_t pid;
struct task_struct *parent;
struct list_head children;
u64 start_time;
const struct cred *cred;
struct files_struct *files;
struct thread_struct thread;
};
Parent process
Child process
Parent code section
Parent code section
New program's code section
fork();
int execl/le/lp/v/vp/vP(const char *path, const char *arg0, ...);
pid = wait(&status);
exit();
vladislav$ ps aux
USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND
root 1 0,0 0,1 4373080 15616 ?? Ss 31aug18 90:58.65 /sbin/launchd
Mac
vladislav$ ps aux
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 1 0.0 0.2 160256 9636 ? Ss oct05 0:06 /sbin/init
Linux
struct task_struct init_task = {
.state = 0, /* Runnable. */
.stack = init_stack,
.usage = ATOMIC_INIT(2),
.flags = PF_KTHREAD,
.prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
.cpus_allowed = CPU_MASK_ALL,
.mm = NULL,
.real_parent = &init_task,
.parent = &init_task,
.files = &init_files,
.signal = &init_signals,
};
EXPORT_SYMBOL(init_task);
struct task_struct {
struct task_struct *parent;
struct list_head children;
};
Process tree
int main()
{
printf("I am process %d\n", (int) getpid());
char *mem = (char *) calloc(1, 100);
pid_t child_pid = fork();
pid_t my_pid = getpid();
if (child_pid == 0) {
printf("%d: I am child, fork returned %d\n",
(int) my_pid, (int) child_pid);
printf("%d: child is terminated with code 100\n",
(int) my_pid);
printf("%d: memory values are set to 1\n", (int) my_pid);
memset(mem, 1, 100);
return 100;
}
printf("%d: I am parent, fork returned %d\n",
(int) my_pid, (int) child_pid);
int stat;
pid_t wait_result = wait(&stat);
printf("%d: wait returned %d and stat %d\n", (int) my_pid,
(int) wait_result, stat);
printf("%d: memory values are %d\n", (int) my_pid, (int) mem[0]);
printf("%d: returned child code was %d\n", (int) my_pid,
WEXITSTATUS(stat));
return 0;
}
Now there are 2 processes
In the child it is true
int main()
{
printf("I am process %d\n", (int) getpid());
char *mem = (char *) calloc(1, 100);
pid_t child_pid = fork();
pid_t my_pid = getpid();
if (child_pid == 0) {
printf("%d: I am child, fork returned %d\n",
(int) my_pid, (int) child_pid);
printf("%d: child is terminated with code 100\n",
(int) my_pid);
printf("%d: memory values are set to 1\n", (int) my_pid);
memset(mem, 1, 100);
return 100;
}
printf("%d: I am parent, fork returned %d\n",
(int) my_pid, (int) child_pid);
int stat;
pid_t wait_result = wait(&stat);
printf("%d: wait returned %d and stat %d\n", (int) my_pid,
(int) wait_result, stat);
printf("%d: memory values are %d\n", (int) my_pid, (int) mem[0]);
printf("%d: returned child code was %d\n", (int) my_pid,
WEXITSTATUS(stat));
return 0;
}
vladislav$ gcc 1_fork.c
vladislav$ ./a.out
I am process 45601
45601: I am parent, fork returned 45602
45602: I am child, fork returned 0
45602: child is terminated with code 100
45602: memory values are set to 1
45601: wait returned 45602 and stat 25600
45601: memory values are 0
45601: returned child code was 100
The child does something, prints ...
Finishes with code 100
The parent waits for the child termination
Obtains its return code - 100
vladislav$> man wait # or man 2 wait
WIFEXITED(status)
WIFSIGNALED(status)
WIFSTOPPED(status)
WEXITSTATUS(status)
WTERMSIG(status)
WCOREDUMP(status)
WSTOPSIG(status)
Why is fork() fast even in big processes?
Copy On Write - COW
1 point
Parent
Child
int a = 100;
a = 200;
a = 200;
Copy the page
int a = 100;
int a = 200;
int main()
{
printf("I am process %d\n", (int) getpid());
char *mem = (char *) calloc(1, 100);
pid_t child_pid = fork();
pid_t my_pid = getpid();
if (child_pid == 0) {
printf("%d: I am child, fork returned %d\n",
(int) my_pid, (int) child_pid);
printf("%d: child is terminated with code 100\n",
(int) my_pid);
printf("%d: memory values are set to 1\n", (int) my_pid);
memset(mem, 1, 100);
return 100;
}
printf("%d: I am parent, fork returned %d\n",
(int) my_pid, (int) child_pid);
int stat;
pid_t wait_result = wait(&stat);
printf("%d: wait returned %d and stat %d\n", (int) my_pid,
(int) wait_result, stat);
printf("%d: memory values are %d\n", (int) my_pid, (int) mem[0]);
printf("%d: returned child code was %d\n", (int) my_pid,
WEXITSTATUS(stat));
return 0;
}
The memory is nullifed before fork()
int main()
{
printf("I am process %d\n", (int) getpid());
char *mem = (char *) calloc(1, 100);
pid_t child_pid = fork();
pid_t my_pid = getpid();
if (child_pid == 0) {
printf("%d: I am child, fork returned %d\n",
(int) my_pid, (int) child_pid);
printf("%d: child is terminated with code 100\n",
(int) my_pid);
printf("%d: memory values are set to 1\n", (int) my_pid);
memset(mem, 1, 100);
return 100;
}
printf("%d: I am parent, fork returned %d\n",
(int) my_pid, (int) child_pid);
int stat;
pid_t wait_result = wait(&stat);
printf("%d: wait returned %d and stat %d\n", (int) my_pid,
(int) wait_result, stat);
printf("%d: memory values are %d\n", (int) my_pid, (int) mem[0]);
printf("%d: returned child code was %d\n", (int) my_pid,
WEXITSTATUS(stat));
return 0;
}
vladislav$ gcc 1_fork.c
vladislav$ ./a.out
I am process 45601
45601: I am parent, fork returned 45602
45602: I am child, fork returned 0
45602: child is terminated with code 100
45602: memory values are set to 1
45601: wait returned 45602 and stat 25600
45601: memory values are 0
45601: returned child code was 100
The child changes the value to 1
The parent is not affected
pid_t
vfork(void);
Why is wait() needed?
exit();
Zombie-process
wait();
Zombie-process
Parent
vladislav$ ps aux | grep a.out
v.shpilevoy 45758 0,0 0,0 0 0 s000 Z 2:15 0:00.00 (a.out)
int main()
{
if (fork() == 0)
return 0;
sleep(15);
wait(NULL);
return 0;
}
void
_start()
{
printf("hello, world\n");
exit(0);
}
Linux
Mac
$> gcc -nostartfiles 1_5_exit.c $> ./a.out hello, world $>
$> gcc -e __start 1_5_exit.c
$> ./a.out
hello, world
$>
void
_start()
{
printf("hello, world\n");
//exit(0);
}
$> gcc -nostartfiles 1_5_exit.c
$> ./a.out
hello, world
Segmentation fault (core dumped)
$>
Process' life:
Proces resources:
0x0
0xffffffff
.text
.data
.bss
.heap
.stack
.env
Section .bss
Section .data
void test_f() {
static int a = 100;
}
const char *b = NULL;
long c[3] = {1, 2, 3};
static int d = 200;
void test_f() {
static int a;
}
const char *b;
long c[3];
static int d;
void
free(void *ptr);
void *
malloc(size_t size);
void *
brk(const void *addr);
0x0
0xffffffff
32MB
32MB
16MB
16MB
16MB
16MB
8MB
8MB
8MB
8MB
8MB
8MB
8MB
8MB
Slab-allocator
Slicing of big slabs into smaller allocations and their handing out
malloc(14 * 1024 * 1024)
malloc(8 * 1024 * 1024)
brk();
.heap
.mmap
< 32Mb
>= 32Mb
malloc();
mmap();
dlopen();
/**
* 08.10.2018
* 162 lines.
*/
struct mm_struct {
struct vm_area_struct *mmap;
unsigned long task_size;
unsigned long start_code, end_code;
unsigned long start_data, end_data;
unsigned long start_brk, brk;
unsigned long start_stack;
unsigned long arg_start, arg_end;
unsigned long env_start, env_end;
};
/**
* 08.10.2018
* 63 lines.
*/
struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
struct vm_area_struct *vm_next;
struct vm_area_struct *vm_prev;
unsigned long vm_flags;
struct file * vm_file;
void * vm_private_data;
};
Process memory - all the segments
Segment list
struct vm_area_struct *mmap;
Process memory size
unsigned long task_size;
Addresses of sections .text, .data, .heap, .stack, .env
unsigned long start_code, end_code;
unsigned long start_data, end_data;
unsigned long start_brk, brk;
unsigned long start_stack;
unsigned long arg_start, arg_end;
unsigned long env_start, env_end;
One segment
unsigned long vm_start;
unsigned long vm_end;
Borders
struct vm_area_struct *vm_next;
struct vm_area_struct *vm_prev;
Neighbours
struct file * vm_file;
void * vm_private_data;
Content
unsigned long vm_flags;
Access flags
#define VM_READ 0x00000001
#define VM_WRITE 0x00000002
#define VM_EXEC 0x00000004
#define VM_SHARED 0x00000008
0x0
0xffffffffffffffff
'Holes' in the virtual address space - no physical mapping. Access attempt = Segmentation Fault
int uninitialized;
const char *str = "const char *str";
const char str2[] = "const char str2[]";
void test_stack(void)
{
int a;
printf("stack top in test_stack: %p\n", &a);
const char *str3 = "const char *str3";
const char str4[] = "const char str4[]";
char str5[] = "char str5[]";
char b = 'x';
char c = 'x';
char d = 'x';
int e = 32;
int f = 64;
int g = 128;
printf("a = %d\n", a);
a = 10;
}
int main(void)
{
int a = 20;
printf("stack top in main: %p\n", &a);
test_stack();
test_stack();
return 0;
}
vladislav$> gcc -c 2_proc_memory.c -o obj.o
vladislav$> objdump -s -d obj.o
Содержимое раздела .text:
0000 554889e5 4883ec40 488d3dee 00000048 UH..H..@H.=....H
0010 8d75e848 8b050000 0000488b 00488945 .u.H......H..H.E
0020 f8b000e8 00000000 488d3d08 01000048 ........H.=....H
0030 8d35e400 00004889 75e0488b 35ea0000 .5....H.u.H.5...
0040 00488975 ec8b0d08 00000089 4df4c645 .H.u........M..E
0050 df78c645 de78c645 dd78c745 d8200000 .x.E.x.E.x.E. ..
0060 00c745d4 40000000 c745d080 0000008b ..E.@....E......
0070 75e88945 ccb000e8 00000000 c745e80a u..E.........E..
0080 00000048 8b3d0000 0000488b 3f488b55 ...H.=....H.?H.U
0090 f84839d7 8945c80f 85060000 004883c4 .H9..E.......H..
00a0 405dc3e8 00000000 0f1f8400 00000000 @]..............
00b0 554889e5 4883ec10 488d3d80 00000048 UH..H...H.=....H
00c0 8d75f8c7 45fc0000 0000c745 f8140000 .u..E......E....
00d0 00b000e8 00000000 8945f4e8 00000000 .........E......
00e0 e8000000 0031c048 83c4105d c3 .....1.H...].
Содержимое раздела .cstring:
00ed 636f6e73 74206368 6172202a 73747200 const char *str.
00fd 73746163 6b20746f 7020696e 20746573 stack top in tes
010d 745f7374 61636b3a 2025700a 00636f6e t_stack: %p..con
011d 73742063 68617220 2a737472 33006368 st char *str3.ch
012d 61722073 7472355b 5d006120 3d202564 ar str5[].a = %d
013d 0a007374 61636b20 746f7020 696e206d ..stack top in m
014d 61696e3a 2025700a 00 ain: %p..
Содержимое раздела .data:
0158 ed000000 00000000 ........
Содержимое раздела .const:
0160 636f6e73 74206368 61722073 7472325b const char str2[
0170 5d000000 00000000 00000000 00000000 ]...............
0180 636f6e73 74206368 61722073 7472345b const char str4[
0190 5d00 ].
Mac
Linux
Contents of section .text:
0000 554889e5 4883ec50 64488b04 25280000 UH..H..PdH..%(..
0010 00488945 f831c048 8d45b848 89c6488d .H.E.1.H.E.H..H.
0020 3d000000 00b80000 0000e800 00000048 =..............H
0030 8d050000 00004889 45c848b8 636f6e73 ......H.E.H.cons
0040 74206368 48ba6172 20737472 345b4889 t chH.ar str4[H.
0050 45e04889 55e866c7 45f05d00 48b86368 E.H.U.f.E.].H.ch
0060 61722073 74724889 45d4c745 dc355b5d ar strH.E..E.5[]
0070 00c645b5 78c645b6 78c645b7 78c745bc ..E.x.E.x.E.x.E.
0080 20000000 c745c040 000000c7 45c48000 ....E.@....E...
0090 00008b45 b889c648 8d3d0000 0000b800 ...E...H.=......
00a0 000000e8 00000000 c745b80a 00000090 .........E......
00b0 488b45f8 64483304 25280000 007405e8 H.E.dH3.%(...t..
00c0 00000000 c9c35548 89e54883 ec106448 ......UH..H...dH
00d0 8b042528 00000048 8945f831 c0c745f4 ..%(...H.E.1..E.
00e0 14000000 488d45f4 4889c648 8d3d0000 ....H.E.H..H.=..
00f0 0000b800 000000e8 00000000 b8000000 ................
0100 00e80000 0000b800 000000e8 00000000 ................
0110 b8000000 00488b55 f8644833 14252800 .....H.U.dH3.%(.
0120 00007405 e8000000 00c9c3 ..t........
Contents of section .text:
UH..H..PdH..%(...H.E.1.H.E.H..H.
=..............H......H.E.H.const
chH.ar str4[H.E.H.U.f.E.].H.char
strH.E..E.5[]..E.x.E.x.E.x.E. ...
.E.@....E......E...H.=...........
....E......H.E.dH3.%(...t........
UH..H...dH..%(...H.E.1..E.....H.E
.H..H.=..........................
.............H.U.dH3.%(...t........
mov %rax,-0x2c(%rbp)
movl $0x5d5b35,-0x24(%rbp)
movb $0x78,-0x4b(%rbp)
movb $0x78,-0x4a(%rbp)
movb $0x78,-0x49(%rbp)
movl $0x20,-0x44(%rbp)
movl $0x40,-0x40(%rbp)
movl $0x80,-0x3c(%rbp)
Where are str, str2, str3? Why?
In .data, because they are all global and initialized
1 point
const char *s = "abc";
const char s[] = "abc";
char s[] = "abc";
Variable s and constant string "abc".
Constant array s.
Array s.
s = "cde";
s[0] = 'c';
Why are str2 (.data) and str4 (.text) in different sections?
Because of different visibility scopes: str2 is global and fell into .data.
const char *str = "const char *str";
const char str2[] = "const char str2[]";
void
test_stack()
{
int a;
printf("stack top in test_stack: %p\n", &a);
const char *str3 = "const char *str3";
const char str4[] = "const char str4[]";
char str5[] = "char str5[]";
char b = 'x';
char c = 'x';
char d = 'x';
int e = 32;
int f = 64;
int g = 128;
printf("a = %d\n", a);
a = 10;
}
1 point
Push, call
Return
LIFO
0xffffffff
0x0
int uninitialized;
const char *str = "const char *str";
const char str2[] = "const char str2[]";
void another_function(void)
{
char array[128];
memset(array, 0, sizeof(array));
printf("called another function, stack is %p\n", array);
}
void test_stack(void)
{
int a;
printf("stack top in test_stack: %p\n", &a);
const char *str3 = "const char *str3";
const char str4[] = "const char str4[]";
char str5[] = "char str5[]";
char b = 'x';
char c = 'x';
char d = 'x';
int e = 32;
int f = 64;
int g = 128;
printf("a = %d\n", a);
a = 10;
}
int main(void)
{
int a = 20;
printf("stack top in main: %p\n", &a);
test_stack();
test_stack();
another_function();
test_stack();
return 0;
}
vladislav$> gcc -c 2_proc_memory.c -o obj.o
vladislav$> gcc obj.o
vladislav$> ./a.out
stack top in main: 0x7ffeeaa5e9f8
stack top in test_stack: 0x7ffeeaa5e9c8
a = -358225424
stack top in test_stack: 0x7ffeeaa5e9c8
a = 10
called another function, stack is 0x7ffeeaa5e950
stack top in test_stack: 0x7ffeeaa5e9c8
a = 0
Stack grows down
It is reused
But reused by all functions
int main()
{
int fd_me = open("3_fs_proc.c", O_RDONLY);
char *shared_mem = (char *) mmap(NULL, 100, PROT_READ,
MAP_FILE | MAP_SHARED, fd_me, 0);
char buf[128];
sprintf(buf, "/proc/%d/maps", (int) getpid());
int fd = open(buf, O_RDONLY);
printf("print %s\n", buf);
if (fd == -1) {
printf("exit %s\n", strerror(errno));
exit(1);
}
int nbyte;
while ((nbyte = read(fd, buf, sizeof(buf))) > 0)
printf("%.*s", nbyte, buf);
printf("\n");
close(fd);
munmap(shared_mem, 100);
close(fd_me);
return 0;
}
vladislav$> gcc 3_fs_proc.c
vladislav$> ./a.out
sprintf(buf, "/proc/%d/maps", (int) getpid());
int fd = open(buf, O_RDONLY);
vladislav$>./a.out
print /proc/816/maps
55cad3bee000-55cad3bef000 r-xp 00000000 08:01 3670028 /home/vladislav/a.out
55cad3def000-55cad3df0000 r--p 00001000 08:01 3670028 /home/vladislav/a.out
55cad3df0000-55cad3df1000 rw-p 00002000 08:01 3670028 /home/vladislav/a.out
55cad4479000-55cad449a000 rw-p 00000000 00:00 0 [heap]
7fe06c0a2000-7fe06c289000 r-xp 00000000 08:01 1315532 /lib/x86_64-linux-gnu/libc-2.27.so
7fe06c289000-7fe06c489000 ---p 001e7000 08:01 1315532 /lib/x86_64-linux-gnu/libc-2.27.so
7fe06c489000-7fe06c48d000 r--p 001e7000 08:01 1315532 /lib/x86_64-linux-gnu/libc-2.27.so
7fe06c48d000-7fe06c48f000 rw-p 001eb000 08:01 1315532 /lib/x86_64-linux-gnu/libc-2.27.so
7fe06c48f000-7fe06c493000 rw-p 00000000 00:00 0
7fe06c493000-7fe06c4ba000 r-xp 00000000 08:01 1315504 /lib/x86_64-linux-gnu/ld-2.27.so
7fe06c6a4000-7fe06c6a6000 rw-p 00000000 00:00 0
7fe06c6b9000-7fe06c6ba000 r--s 00000000 08:01 3670336 /home/vladislav/3_fs_proc.c
7fe06c6ba000-7fe06c6bb000 r--p 00027000 08:01 1315504 /lib/x86_64-linux-gnu/ld-2.27.so
7fe06c6bb000-7fe06c6bc000 rw-p 00028000 08:01 1315504 /lib/x86_64-linux-gnu/ld-2.27.so
7fe06c6bc000-7fe06c6bd000 rw-p 00000000 00:00 0
7ffcb1886000-7ffcb18a7000 rw-p 00000000 00:00 0 [stack]
7ffcb1989000-7ffcb198c000 r--p 00000000 00:00 0 [vvar]
7ffcb198c000-7ffcb198e000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
unsigned long vm_flags;
struct file * vm_file;
void * vm_private_data;
};
.text
.data
.bss
.heap
.mmap
.stack
.env
Shared library segment
int main()
{
char *shared_mem = (char *) mmap(NULL, 100, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED, -1, 0);
char *private_mem = (char *) malloc(100);
shared_mem[0] = 55;
private_mem[0] = 55;
if (fork() == 0) {
shared_mem[0] = 56;
private_mem[0] = 56;
goto exit;
}
wait(NULL);
printf("shared: %d, private: %d\n", (int) shared_mem[0],
(int) private_mem[0]);
exit:
munmap(shared_mem, 100);
free(private_mem);
return 0;
}
vladislav$> gcc 4_shared_mem.c
vladislav$> ./a.out
shared: 56, private: 55
Private memory - COW
Shared memory - no COW
The child changes both, and what?
Only shared memory change is visible in the parent
int main(int argc, char **argv, char **env)
{
char *path = getenv("PATH");
printf("env: %p\n", path);
printf("argv: %p\n", argv);
int a;
printf("stack: %p\n", &a);
void *m = malloc(100);
printf("heap: %p\n", m);
free(m);
int i = 0;
while (env[i] != NULL)
printf("%s\n", env[i++]);
return 0;
}
vladislav$> gcc 5_argv_env.c
vladislav$> ./a.out
env: 0x7ffda1c1ae8f
argv: 0x7ffda1c194f8
stack: 0x7ffda1c193f0
heap: 0x555d856c5670
LC_PAPER=ru_RU.UTF-8
LC_MONETARY=ru_RU.UTF-8
XDG_MENU_PREFIX=gnome-
LANG=en_US.UTF-8
DISPLAY=:0
GNOME_SHELL_SESSION_MODE=ubuntu
PWD=/home/vladislav
HOME=/home/vladislav
...
char *
getenv(const char *name);
int
setenv(const char *name, const char *value,
int overwrite);
int
putenv(char *string);
int
unsetenv(const char *name);
"name=value"
Contents of section .debug_info:
0000 8f030000 04000000 00000801 00000000 ................
0010 0c000000 00000000 00000000 00000000 ................
0020 00110100 00000000 00000000 00020000 ................
Contents of section .debug_abbrev:
0000 01110125 0e130b03 0e1b0e11 01120710 ...%............
0010 17000002 1600030e 3a0b3b0b 49130000 ........:.;.I...
0020 0324000b 0b3e0b03 0e000004 24000b0b .$...>......$...
Contents of section .debug_aranges:
0000 2c000000 02000000 00000800 00000000 ,...............
0010 00000000 00000000 11010000 00000000 ................
0020 00000000 00000000 00000000 00000000 ................
Contents of section .debug_line:
0000 e0000000 0200b300 00000101 fb0e0d00 ................
0010 01010101 00000001 0000012f 7573722f .........../usr/
0020 6c69622f 6763632f 7838365f 36342d6c lib/gcc/x86_64-l
0030 696e7578 2d676e75 2f372f69 6e636c75 inux-gnu/7/inclu
0040 6465002f 7573722f 696e636c 7564652f de./usr/include/
0050 7838365f 36342d6c 696e7578 2d676e75 x86_64-linux-gnu
Contents of section .debug_str:
0000 5f5f6f66 665f7400 5f494f5f 72656164 __off_t._IO_read
0010 5f707472 005f6368 61696e00 73697a65 _ptr._chain.size
0020 5f74005f 73686f72 74627566 00474e55 _t._shortbuf.GNU
0030 20433131 20372e33 2e30202d 6d74756e C11 7.3.0 -mtun
0040 653d6765 6e657269 63202d6d 61726368 e=generic -march
0050 3d783836 2d363420 2d67202d 66737461 =x86-64 -g -fsta
0060 636b2d70 726f7465 63746f72 2d737472 ck-protector-str
0070 6f6e6700 5f494f5f 325f315f 73746465 ong._IO_2_1_stde
struct task_struct {
/* ... */
struct files_struct *files;
/* ... */
};
struct files_struct {
struct fdtable *fdt;
};
struct fdtable {
unsigned int max_fds;
struct file **fd;
};
struct file {
struct path f_path;
struct inode *f_inode;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
loff_t f_pos;
const struct cred *f_cred;
};
Process keeps opened file descriptors
In a table of descriptors
The table is just an ordinary array
File descriptor in the kernel is a structure. In user space - number, an index in the descriptor array
Predefined standard descriptors
#define STDIN_FILENO 0
#define STDOUT_FILENO 1
#define STDERR_FILENO 2
int main()
{
char buf[] = "write to 1\n";
write(STDOUT_FILENO, buf, sizeof(buf));
printf("stdout fileno = %d\n", STDOUT_FILENO);
return 0;
}
vladislav$> gcc 6_stdout.c
vladislav$> ./a.out
write to 1
stdout fileno = 1
More descriptors via open().
task 1
task 2
struct file
stdout
fd1
fd1
stdout
pos = 0
count = 1
struct file
pos = 0
count = 1
struct inode
struct file: stdout
pos = 0
count = 1
struct file: stdout
pos = 0
count = 1
struct inode
struct inode
Why can struct inode stdin/out/err of different processes be different?
Because of different consoles. Each console is a 'file'.
1 point
int main()
{
int fd = open("tmp.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
pid_t pid = getpid();
printf("press any key to write my pid %d\n", (int) pid);
getchar();
dprintf(fd, "%d ", (int) pid);
close(fd);
return 0;
}
Problem of appending to a file
X 2
vladislav$> gcc 7_basic_append.c
vladislav$> ./a.out
press any key to write my pid 46152
vladislav$> ./a.out
press any key to write my pid 46153
vladislav$> #press enter
vladislav$> #press enter
vladislav$> cat tmp.txt
46153
open("tmp.txt", O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR);
Why does not lseek + write work instead of O_APPEND?
Because of interrupts. A process can be interrupted after lseek before write. Another process can write into the file here.
1 point
int
dup(int fildes);
int
dup2(int fildes, int fildes2);
task 1
struct file
fd1
fd2
pos = 0
count = 2
struct inode
fd2 = dup(fd1);
int
pipe(int fildes[2]);
vladislav$> ps aux | grep audio
_coreaudiod 37568 6048 ?? Ss пт10 48:17.63 /usr/sbin/coreaudiod
Pipe
int main()
{
int fd = open("tmp.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
int fd2 = dup(fd);
dprintf(fd2, "1 ");
close(fd2);
dprintf(fd, "2 ");
close(fd);
return 0;
}
vladislav$> gcc 8_dup.c
vladislav$> ./a.out
vladislav$> cat tmp.txt
1 2
What happens with file descriptors after fork()?
Nothing, they are kept as is. The child and the parent will literally share one struct file as if dup() would be called on all the descriptors.
1 point
int main()
{
int fd = open("tmp.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
int fd2 = dup(fd);
if (fork() == 0) {
close(fd);
dprintf(fd2, "%d ", (int) getpid());
close(fd2);
return 0;
}
close(fd2);
wait(NULL);
dprintf(fd, "%d ", (int) getpid());
close(fd);
return 0;
}
vladislav$> gcc 9_fork_dup.c
vladislav$> ./a.out
vladislav$> cat tmp.txt
46297 46296
task 1
task 2
struct file
fd1
fd2
pos = 0
count = 2
struct inode
int
pipe(int fildes[2]);
task 1
struct file
fd[0]
fd[1]
pos = 0
count = 1
flags = read
struct file
pos = 0
count = 1
flags = write
int main()
{
int to_parent[2];
int to_child[2];
pipe(to_child);
pipe(to_parent);
char buf[16];
if (fork() == 0) {
close(to_parent[0]);
close(to_child[1]);
read(to_child[0], buf,
sizeof(buf));
printf("%d: read %s\n",
(int) getpid(), buf);
write(to_parent[1], "hello2",
sizeof("hello2"));
return 0;
}
close(to_parent[1]);
close(to_child[0]);
write(to_child[1], "hello1",
sizeof("hello"));
read(to_parent[0], buf, sizeof(buf));
printf("%d: read %s\n", (int) getpid(),
buf);
wait(NULL);
return 0;
}
vladislav$> gcc 10_pipe.c
vladislav$> ./a.out
46312: read hello1
46311: read hello2
Two channels. to_child[1] -> to_child[0], to_parent[1] ->to_parent[0]
The child closes unused descriptors. Now it reads from to_child[0] and writes to to_parent[1].
The parent closes unused descriptors. Now it reads from to_parent[0] and writes to to_child[1].
parent
to_parent[0]
to_parent[0]
to_parent[1]
to_child[0]
to_child[1]
pos = 0
count = 2
flags = read
child
to_parent[0]
to_parent[1]
to_child[0]
to_child[1]
pos = 0
count = 2
flags = write
pos = 0
count = 2
flags = read
pos = 0
count = 2
flags = write
to_parent[1]
to_child[0]
to_child[1]
to_parent[1]
to_child[0]
pos = 0
count = 1
flags = read
pos = 0
count = 1
flags = write
to_parent[0]
to_child[1]
pos = 0
count = 1
flags = write
pos = 0
count = 1
flags = read
int
main(int argc, const char **argv)
{
int channel[2];
int need_close = argc > 1 &&
strcmp(argv[1], "close") == 0;
pipe(channel);
if (fork() == 0) {
char buf[16];
printf("child: started\n");
if (need_close) {
printf("child: close output channel\n");
close(channel[1]);
}
while (read(channel[0], buf, sizeof(buf)) > 0)
printf("child: read %s\n", buf);
printf("child: EOF\n");
return 0;
}
write(channel[1], "100", 3);
printf("parent: written 100\n");
if (need_close) {
printf("parent: close output channel\n");
close(channel[1]);
}
printf("parent: waiting for child termination ...\n");
wait(NULL);
return 0;
}
$> gcc 10_5_pipe_close.c $> ./a.out parent: written 100 parent: waiting for child termination ... child: started child: read 100
^C $> ./a.out close parent: written 100 parent: close output channel parent: waiting for child termination ... child: started child: close output channel child: read 100 child: EOF $>
Create channel between parent and child - write to channel[1], read from channel[0]
Child reads from channel[0] until end of file
Parent writes to channel[1], waits for child termination, and exits
With command line option close the program closes channel[1], when it is not needed anymore
But it hangs ...
int main()
{
int to_child[2];
pipe(to_child);
char buf[16];
dup2(to_child[0], 0);
if (fork() == 0) {
close(to_child[1]);
int n;
scanf("%d", &n);
printf("%d: read %d\n", (int) getpid(), n);
return 0;
}
close(to_child[0]);
write(to_child[1], "100", sizeof("100"));
wait(NULL);
return 0;
}
vladislav$> gcc 11_advanced_pipe.c
vladislav$> ./a.out
46339: read 100
int main()
{
int to_child[2];
pipe(to_child);
char buf[16];
dup2(to_child[0], 0);
if (fork() == 0) {
close(to_child[1]);
return execlp("python3", "python3", "-i", NULL);
}
close(to_child[0]);
const char cmd[] = "print(100 + 200)";
write(to_child[1], cmd, sizeof(cmd));
close(to_child[1]);
wait(NULL);
return 0;
}
vladislav$> gcc 12_exec.c
vladislav$> ./a.out
Python 3.6.5 (v3.6.5:f59c0932b4, Mar 28 2018, 05:52:31)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> ...
300
>>>
int
execl(const char *path, const char *arg0, ...);
int
execle(const char *path, const char *arg0, ...);
int
execlp(const char *file, const char *arg0, ...);
int
execv(const char *path, char *const argv[]);
int
execvp(const char *file, char *const argv[]);
int
execvP(const char *file, const char *search_path, char *const argv[]);
Fork() copies:
Fork() does not copy:
Mutexes, semaphores
Message queues
Shared memory
Process works with these IPC but does not own them. Then can live longer than the creator.
int
mkfifo(const char *path, mode_t mode);
int
shmget(key_t key, size_t size, int shmflg);
int
semget(key_t key, int nsems, int semflg);
int
msgget(key_t key, int msgflg);
Stack overflow, COW
Handle exception
Foreign or not existing memory, 0 division
Kill process
int main()
{
int a = 1;
int b = 2;
char buffer[100];
buffer[0] = 3;
buffer[50] = 4;
buffer[10000] = 5;
return 0;
}
vladislav$> ulimit -c unlimited
vladislav$> gcc -g 13_core_dump.c
vladislav$> ./a.out
Segmentation fault: 11 (core dumped)
vladislav$> lldb --core /cores/core.46461
(lldb) target create --core "/cores/core.46461"
Core file '/cores/core.46461' (x86_64) was loaded.
(lldb) bt
* thread #1, stop reason = signal SIGSTOP
* frame #0: 0x0000000102a6ef66 a.out`main at 13_core_dump.c:8
frame #1: 0x00007fff5531f015 libdyld.dylib`start + 1
(lldb) f 0
frame #0: 0x0000000102a6ef66 a.out`main at 13_core_dump.c:8
5 char buffer[100];
6 buffer[0] = 3;
7 buffer[50] = 4;
-> 8 buffer[10000] = 5;
9 return 0;
10 }
(lldb) p a
(int) $0 = 1
(lldb) p b
(int) $1 = 2
(lldb) p buffer[0]
(char) $2 = '\x03'
System call - execution of code in the kernel context
Standards does not describe 'system call' concept. They describe only API. "Syscallability" depends on hardware, OS, kernel.
.long sys_ni_syscall /* old ulimit syscall holder */
.long sys_ni_syscall /* sys_olduname */
.long sys_umask /* 60 */
.long sys_chroot
.long sys_ustat
.long sys_dup2
.long sys_getppid
.long sys_getpgrp /* 65 */
.long sys_setsid
.long sys_sigaction
.long sys_sgetmask
.long sys_ssetmask
.long sys_setreuid16 /* 70 */
.long sys_setregid16
.long sys_sigsuspend
.long sys_sigpending
A part of system call table in the kernel
asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
SYSCALL_DEFINE2(dup2, unsigned int, oldfd,
unsigned int, newfd)
{
/* ... implementation. */
}
How does syscall enter the kernel context?
1) Via registers and a special interrupt;
2) Via a special processor instruction;
2 points
int 0x80
sysenter/sysexit syscall/sysret
Process
int 0x80
Handler address?
0x0...0x80...
IDT - Interrupt Descriptor Table
Process
syscall sysenter
%eax ( %ebx , %ecx , %edx , %esi , %edi )
System call number
<= 5 arguments
Pointer on stack, when > 5 arguments
0xffffffff
0x0
Process puts arguments and programmatically calls a special interrupt - the kernel catches it and reads the arguments
vladislav$>./a.out
print /proc/816/maps
...
7fe06c6b9000-7fe06c6ba000 r--s 00000000 08:01 3670336 /home/vladislav/3_fs_proc.c
7fe06c6ba000-7fe06c6bb000 r--p 00027000 08:01 1315504 /lib/x86_64-linux-gnu/ld-2.27.so
7fe06c6bb000-7fe06c6bc000 rw-p 00028000 08:01 1315504 /lib/x86_64-linux-gnu/ld-2.27.so
7fe06c6bc000-7fe06c6bd000 rw-p 00000000 00:00 0
7ffcb1886000-7ffcb18a7000 rw-p 00000000 00:00 0 [stack]
7ffcb1989000-7ffcb198c000 r--p 00000000 00:00 0 [vvar]
7ffcb198c000-7ffcb198e000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
User space subsystem, application, service processes
Public and documented DLL libraries
Windows kernel
User space
Kernel space
System calls
Function calls
CreateProcess()
CreateProcessInternalW()
NtCreateUserProcess()
New processes are always cloned from other processes. There is the single common ancestor for all of them - init-process.
Process = resource virtualization. It hides from us things like physical memory, system calls, interrupts, number of CPU cores, etc.
Process has multiple memory sections for different purposes like main stack, heap, dynamic libraries, global variables, command line, etc. Can see them in /proc/<pid>/maps
File descriptors are not only for files. Can communicate with other processes too like with pipe() + dup() + dup2(). Be careful how FDs are inherited on fork().
Process communicates with the world via the kernel. To the kernel it talks via the system calls. They work like calling certain functions inside the kernel to use files, network, time, to read certain system-wide info
Lectures: slides.com/gerold103/decks/sysprog_eng
Memory. Virtual and physical. Cache levels and cache lines. User and kernel memory. False sharing.
Next time:
Press on the heart, if like the lecture