(Kernel) Task Switching in Rust

Jayden Qi

Task Switching?

On the Cpu

Core 1

Core 4

Core 3

Core 2

Task 4

Task 3

Task 2

Task 1

On the Cpu

Core 1

Core 4

Core 3

Core 2

Task 4

Task 3

Task 2

Task 1

Task 5

On the Cpu

Core 1

Core 4

Core 3

Core 2

Task 4

Task 3

Task 2

Task 1

Task 5

Preempt or Yield?

Preemptive multitasking is fully controlled by a scheduler

Cooperative multitasking gives control to programs themselves

async fn my_task() {
    println!("Task started");
    
    sleep(Duration::from_secs(1)).await;

    println!("Task completed");
}

#[tokio::main]
async fn main() {
    tokio::join!(my_task(), my_task());
}
fn my_task() {
	for i in 0..100 {
    	println!("{}", i);
    }
}

fn main() {
    let t1 = thread::spawn(|| my_task());
    let t2 = thread::spawn(|| my_task());

    t1.join().unwrap();
    t2.join().unwrap();
}

Prelude

Everything will be on x86_64

Ground Rules

  1. Single core
  2. Simplest scheduler
  3. No user space

How does a task run

  • CPU registers
    • General purpose registers
    • Instruction pointer
    • Stack pointer
    • ... and more
  • Stack

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

...

...

ptr

ptr

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

...

42

ptr

ptr

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

42

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

42

73

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

73

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

Representing a task

struct ProcessControlBlock {
    pub task_type: TaskType,
    
    // these two most important
    pub regs: TaskRegisters,
    pub state: TaskState,
    
    pub stack_start: NonNull<()>,
}

When to switch?

  • Cpu timer
  • Schedule interrupts

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

42

TCB list

TCB

state: running

registers: <uninitialized>

 

TCB

state: waiting

registers: <rbx:99 ...>

 

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

73

42

ptr

ptr

42

TCB list

TCB

state: running

registers: <rbx:73 ...>

 

TCB

state: waiting

registers: <rbx:99 ...>

 

73

42

ptr

ptr

The Stack

    mov     rax, 42
    mov     rbx, 73
    push    rax
    push    rbx
    pop     rax
    pop     rbx

Registers

RIP

RSP

RAX

RBX

42

TCB list

TCB

state: waiting

registers: <rbx:73 ...>

 

TCB

state: waiting

registers: <rbx:99 ...>

 

73

42

ptr

ptr

99

100

ptr

ptr

Registers

RIP

RSP

RAX

RBX

TCB list

TCB

state: waiting

registers: <rbx:73 ...>

 

TCB

state: waiting

registers: <rbx:99 ...>

 

ptr

99

100

ptr

other stack

; other task's
; instructions

random memory

Code

  • Getting task state
  • Putting it back on

Calling conventions

/// switch to a task
///
/// # Safety
/// what do you think might be unsafe about this
#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        "push rax",
        "push rbx",
        "push rcx",
        "push rdx",
        "push rsi",
        "push rdi",
        "push rbp",
        "push r8",
        "push r9",
        "push r10",
        "push r11",
        "push r12",
        "push r13",
        "push r14",
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B", // location of msr
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    let mut scheduler = TASK_SCHEDULER.lock();

    // save current task context first
    let mut current_task = scheduler.task_list.pop_front().unwrap();

    if current_task.state == TaskState::Terminated {
        trace!("task ended at {:#X}", current_task.regs.interrupt_rsp);
    } else {
        current_task.state = TaskState::Ready;
        current_task.regs = unsafe { *current_task_context };
        scheduler.task_list.push_back(current_task);
        trace!("task paused at {:#X}", current_task.regs.interrupt_rsp);
    }

    // run front task
    let next_task = scheduler.task_list.front_mut().unwrap();

    next_task.state = TaskState::Running;
    unsafe { *current_task_context = next_task.regs };
}

Interrupt semantics

Image courtesy of

Phil Opperman

#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        "push rax",
        "push rbx",
        "push rcx",
        "push rdx",
        "push rsi",
        "push rdi",
        "push rbp",
        "push r8",
        "push r9",
        "push r10",
        "push r11",
        "push r12",
        "push r13",
        "push r14",
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

Visualization

#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        "push rax",
        "push rbx",
        "push rcx",
        "push rdx",
        "push rsi",
        "push rdi",
        "push rbp",
        "push r8",
        "push r9",
        "push r10",
        "push r11",
        "push r12",
        "push r13",
        "push r14",
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

SS

RSP 

RFLAGS

CS

RIP

Stack

CPU regs

RSP

RIP

...

other registers

#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        "push rax",
        "push rbx",
        "push rcx",
        "push rdx",
        "push rsi",
        "push rdi",
        "push rbp",
        "push r8",
        "push r9",
        "push r10",
        "push r11",
        "push r12",
        "push r13",
        "push r14",
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

SS

RSP 

RFLAGS

CS

RIP

Stack

CPU regs

RSP

RIP

...

other registers

RAX

...

R15

...

other registers

RSP

#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    let mut scheduler = TASK_SCHEDULER.lock();

    // save current task context first
    let mut current_task = scheduler.task_list.pop_front().unwrap();

    if current_task.state == TaskState::Terminated {
        trace!("task ended at {:#X}", current_task.regs.interrupt_rsp);
    } else {
        current_task.state = TaskState::Ready;
        current_task.regs = unsafe { *current_task_context };
        scheduler.task_list.push_back(current_task);
        trace!("task paused at {:#X}", current_task.regs.interrupt_rsp);
    }

    // run front task
    let next_task = scheduler.task_list.front_mut().unwrap();

    next_task.state = TaskState::Running;
    unsafe { *current_task_context = next_task.regs };
}
// reverse order of stack push
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr(C)]
struct TaskRegisters {
    r15: u64,
    r14: u64,
    r13: u64,
    r12: u64,
    r11: u64,
    r10: u64,
    r9: u64,
    r8: u64,
    rbp: u64,
    rdi: u64,
    rsi: u64,
    rdx: u64,
    rcx: u64,
    rbx: u64,
    rax: u64,

    // pushed by cpu after interrupt
    interrupt_rip: u64,
    interrupt_cs: u64,
    interrupt_rflags: u64,
    interrupt_rsp: u64,
    interrupt_ss: u64,
}

SS

RSP 

RFLAGS

CS

RIP

Current Stack

RAX

...

R15

// reverse order of stack push
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr(C)]
struct TaskRegisters {
    r15: u64,
    r14: u64,
    r13: u64,
    r12: u64,
    r11: u64,
    r10: u64,
    r9: u64,
    r8: u64,
    rbp: u64,
    rdi: u64,
    rsi: u64,
    rdx: u64,
    rcx: u64,
    rbx: u64,
    rax: u64,

    // pushed by cpu after interrupt
    interrupt_rip: u64,
    interrupt_cs: u64,
    interrupt_rflags: u64,
    interrupt_rsp: u64,
    interrupt_ss: u64,
}

SS

RSP 

RFLAGS

CS

RIP

Current Stack

RAX

...

R15

#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    let mut scheduler = TASK_SCHEDULER.lock();

    // save current task context first
    let mut current_task = scheduler.task_list.pop_front().unwrap();

    if current_task.state == TaskState::Terminated {
        trace!("task ended at {:#X}", current_task.regs.interrupt_rsp);
    } else {
        current_task.state = TaskState::Ready;
        current_task.regs = unsafe { *current_task_context };
        scheduler.task_list.push_back(current_task);
        trace!("task paused at {:#X}", current_task.regs.interrupt_rsp);
    }

    // run front task
    let next_task = scheduler.task_list.front_mut().unwrap();

    next_task.state = TaskState::Running;
    unsafe { *current_task_context = next_task.regs };
}
#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        "push rax",
        "push rbx",
        "push rcx",
        "push rdx",
        "push rsi",
        "push rdi",
        "push rbp",
        "push r8",
        "push r9",
        "push r10",
        "push r11",
        "push r12",
        "push r13",
        "push r14",
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

new SS

new RSP 

new RFLAGS

new CS

new RIP

Stack

CPU regs

RSP

RIP

...

other registers

new RAX

...

new R15

...

other registers

RSP

#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        // ...
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

new SS

new RSP 

new RFLAGS

new CS

new RIP

Stack

CPU regs

RSP

RIP

...

other registers

new RAX

...

new R15

...

other registers

RSP

new RAX

...

new R15

#[unsafe(naked)]
pub unsafe extern "x86-interrupt" fn schedule() {
    naked_asm!(
        // ...
        "push r15",
        "mov rdi, rsp",        // put current task's stack pointer
        "call schedule_inner", // call scheduler with rsp
        // send EOI to cpu
        "xor eax, eax",
        "xor edx, edx",
        "mov ecx, 0x80B",
        "wrmsr",
        // pop new task registers in reverse order
        "pop r15",
        "pop r14",
        "pop r13",
        "pop r12",
        "pop r11",
        "pop r10",
        "pop r9",
        "pop r8",
        "pop rbp",
        "pop rdi",
        "pop rsi",
        "pop rdx",
        "pop rcx",
        "pop rbx",
        "pop rax",
        "iretq",
    );
}

/// inner function to switch tasks
#[unsafe(no_mangle)]
unsafe extern "C" fn schedule_inner(current_task_context: *mut TaskRegisters) {
    // ...
}

new SS

new RSP 

new RFLAGS

new CS

new RIP

Stack

CPU regs

RSP

RIP

new RAX

...

new R15

...

other registers

RSP

Stack

; new task's instructions

new SS

new RSP 

new RFLAGS

new CS

new RIP

New task's Stack

CPU regs

RSP

RIP

new RAX

...

new R15

...

other registers

RSP

Stack

Thanks for listening

https://github.com/JayAndJef

Task Switching in Rust

By Jayden Q

Task Switching in Rust

  • 94