Piotr Gankiewicz
# whoami
+10 YOE
# intro
# intro
# iggy
# iggy
# iggy
# iggy
# iggy
# iggy
# stream
# stream
# build
# build
struct Stream {
id: u32,
offset: u64,
path: String
}
struct Message {
offset: u64,
payload: Vec<u8>
}
# build
impl Stream {
fn append(&mut self, message: Message) {
// TODO: Persist the append-only data
}
fn poll(&self, offset: u64, count: u64) -> Vec<Message> {
// TODO: Load the persisted data by offset
}
}
# build
impl Message {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = vec![];
bytes.put_u64(self.offset);
bytes.put_u32(self.payload.len());
bytes.put(&self.payload);
bytes
}
}
# build
impl Message {
fn from_bytes(bytes: &[u8]) -> Message {
let offset = bytes[0..8].into();
let length = bytes[8..12].into();
let payload = bytes[12..12 + length].to_vec();
Message {
offset,
payload
}
}
}
# build
|................|
|....hello.......|
|.............wor|
|ld|
00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00
05 00 00 00 68 65 6c 6c 6f 01 00 00 00 00 00 00
00 02 00 00 00 00 00 00 00 05 00 00 00 77 6f 72
6c 64
# build
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
let mut file = file::open(&self.path).await;
file.write_all(&bytes).await;
}
}
# build
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
let mut file = file::open(&self.path).await;
file.write_all(&bytes).await;
file.sync_all().await;
}
}
# build
# build
# build
async fn append(&mut self, messages: Vec<Message>) {
for message in message {
self.unsaved_messages.push(message);
}
if self.unsaved_messages.len() < 1000 {
return;
}
let mut bytes = vec![];
for message in self.unsaved_messages {
bytes.put(message.as_bytes());
}
file.write_all(&bytes).await;
file.sync_all().await; // fsync() here?
self.unsaved_messages.clear();
}
# build
spawn(async move {
let mut interval = interval(Duration::from_secs(5));
loop {
interval.tick().await;
stream.persist_unsaved_messages().await; // fsync()
}
}
# build
# build
# build
# build
async fn append(&mut self, message: Message) {
// ... previous stuff
self.position += bytes.len(); // Message as bytes
let mut file = file::open(&self.index_path).await;
file.write_u32(self.position).await;
}
# build
async fn poll(&self, offset: u64, count: u64) -> Vec<Message> {
let file = file::open(&self.index_path).await;
file.seek(SeekFrom::Start(4 * offset)).await;
let position = file.read_u32().await;
let file = file::open(&self.stream_path).await;
file.seek(SeekFrom::Start(position)).await;
// Load N messages based on the count
}
# network
# network
struct Server {
streams: HashMap<u32, Stream>,
clients: HashMap<u32, Client>,
}
impl Server {
async fn append(&self, stream_id: u32, message: Message) {
let stream = self.get_stream(stream_id);
stream.append(message).await
}
async fn poll(&self, stream_id: u32, offset: u64, count: u64) -> Vec<Message> {
let stream = self.get_stream(stream_id);
stream.poll(offset, count).await
}
}
# network
struct Server {
streams: HashMap<u32, Arc<RwLock<Stream>>>,
clients: HashMap<u32, Client>,
}
impl Server {
async fn append(&self, stream_id: u32, message: Message) {
let stream = self.get_stream(stream_id);
let stream = stream.write().await; // Acquire write lock
stream.append(message).await
}
async fn poll(&self, stream_id: u32, offset: u64, count: u64) -> Vec<Message> {
let stream = self.get_stream(stream_id);
let stream = stream.read().await; // Acquire read lock
stream.poll(offset, count).await
}
}
# network
# network
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Partition>
}
struct Partition {
id: u64,
offset: u64,
path: String
}
# network
struct Server {
topics: HashMap<u32, Arc<RwLock<Topic>>>,
clients: HashMap<u32, Client>
}
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Partition>
}
# network
struct Server {
topics: HashMap<u32, Topic>,
clients: HashMap<u32, Client>
}
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Arc<RwLock<Partition>>>
}
# performance
# performance
fn write_value_at<const N: usize>(slice: &mut [u8],
value: [u8; N], position: usize) {
let slice = &mut slice[position..position + N];
let ptr = slice.as_mut_ptr();
unsafe {
std::ptr::copy_nonoverlapping(value.as_ptr(), ptr, N);
}
}
# performance
# performance
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
// 1. Open file
let mut file = file::open(&self.path).await;
// 2. Write to file
file.write_all(&bytes).await;
} // 3. Close file
}
# performance
# performance
# performance
# performance
let file = file::open(&self.path).await?;
let mut position = 0;
let buffer = Vec::with_capacity(4);
let (result, buffer) = file.read_exact_at(buffer, position).await;
if result.is_err() {
return Err(Error::InvalidOffset);
}
let offset = u32::from_le_bytes(buffer.try_into()?);
position += 4;
# performance
# performance
const O_DIRECT = 0x4000;
const O_DSYNC = 0x4096;
let file = std::fs::File::options()
.read(true)
.write(true)
.custom_flags(O_DIRECT | O_DSYNC)
.open(self.file_path);
# performance
# performance
impl Stream {
async fn append(
&self,
partition_id: u32,
message: Message
) {
let partition = self.get_partition(partition_id);
// Maybe a context switch if lock is contended
let partition = partition.write().await;
// Context switch due to async
partition.append(message).await
}
}
# performance
# performance
https://tokio.rs/blog/2019-10-scheduler
# performance
# performance
# performance
# performance
# thanks