Piotr Gankiewicz
# whoami
+10 YOE
# intro
# intro
# iggy
# iggy
# iggy
# iggy
# iggy
# iggy
# stream
# stream
# build
# build
struct Stream {
id: u32,
offset: u64,
path: String
}
struct Message {
offset: u64,
payload: Vec<u8>
}
# build
impl Stream {
fn append(&mut self, message: Message) {
// TODO: Persist the append-only data
}
fn poll(&self, offset: u64, count: u64) -> Vec<Message> {
// TODO: Load the persisted data by offset
}
}
# build
impl Message {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = vec![];
bytes.put_u64(self.offset);
bytes.put_u32(self.payload.len());
bytes.put(&self.payload);
bytes
}
}
# build
impl Message {
fn from_bytes(bytes: &[u8]) -> Message {
let offset = bytes[0..8].into();
let length = bytes[8..12].into();
let payload = bytes[12..12 + length].to_vec();
Message {
offset,
payload
}
}
}
# build
|................|
|....hello.......|
|.............wor|
|ld|
00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00
05 00 00 00 68 65 6c 6c 6f 01 00 00 00 00 00 00
00 02 00 00 00 00 00 00 00 05 00 00 00 77 6f 72
6c 64
# build
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
let mut file = file::open(&self.path).await;
file.write_all(&bytes).await;
}
}
# build
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
let mut file = file::open(&self.path).await;
file.write_all(&bytes).await;
file.sync_all().await;
}
}
# build
# build
# build
# build
async fn append(&mut self, messages: Vec<Message>) {
for message in message {
self.unsaved_messages.push(message);
}
if self.unsaved_messages.len() < 1000 {
return;
}
let mut bytes = vec![];
for message in self.unsaved_messages {
bytes.put(message.as_bytes());
}
file.write_all(&bytes).await;
file.sync_all().await; // fsync() here?
self.unsaved_messages.clear();
}
# build
spawn(async move {
let mut interval = interval(Duration::from_secs(5));
loop {
interval.tick().await;
stream.persist_unsaved_messages().await; // fsync()
}
}
# build
# build
async fn poll(start_offset: u64) -> Vec<Message> {
let mut messages = vec![];
let file = file::open(&self.path).await;
loop {
let Ok(offset) = file.read_u64().await else {
break; // EOF
}
let length = file.read_u32().await;
let mut payload = vec![0; length];
file.read_exact(&mut payload).await;
if offset >= start_offset {
messages.push(Message {offset, payload});
}
}
messages
}
# build
# build
async fn append(&mut self, message: Message) {
// ... previous stuff
self.position += bytes.len(); // Message as bytes
let mut file = file::open(&self.index_path).await;
file.write_u32(self.position).await;
}
# build
async fn poll(&self, offset: u64, count: u64) -> Vec<Message> {
let file = file::open(&self.index_path).await;
file.seek(SeekFrom::Start(4 * offset)).await;
let position = file.read_u32().await;
let file = file::open(&self.stream_path).await;
file.seek(SeekFrom::Start(position)).await;
// Load N messages based on the count
}
# build
# network
# network
let listener = TcpListener::bind("127.0.0.1:5000")
.await
.expect("Unable to start TCP server.")
loop {
match listener.accept().await {
Ok((tcp_stream, address)) => {
info!("Accepted new connection: {address}");
spawn(async move {
handle_connection(address, tcp_stream).await
});
}
Err(err) => error!("Can't handle connection: {err}")
}
}
# network
+-----------------------------------------------------------+
| | | |
| CODE | LENGTH | PAYLOAD |
| | | |
+-----------------------------------------------------------+
| 4 bytes | 4 bytes | N bytes |
# network
struct Server {
streams: HashMap<u32, Stream>,
clients: HashMap<u32, Client>,
}
impl Server {
async fn append(&self, stream_id: u32, message: Message) {
let stream = self.get_stream(stream_id);
stream.append(message).await
}
async fn poll(&self, stream_id: u32, offset: u64, count: u64) -> Vec<Message> {
let stream = self.get_stream(stream_id);
stream.poll(offset, count).await
}
}
# network
struct Server {
streams: HashMap<u32, Arc<RwLock<Stream>>>,
clients: HashMap<u32, Client>,
}
impl Server {
async fn append(&self, stream_id: u32, message: Message) {
let stream = self.get_stream(stream_id);
let stream = stream.write().await; // Acquire write lock
stream.append(message).await
}
async fn poll(&self, stream_id: u32, offset: u64, count: u64) -> Vec<Message> {
let stream = self.get_stream(stream_id);
let stream = stream.read().await; // Acquire read lock
stream.poll(offset, count).await
}
}
# network
# network
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Partition>
}
struct Partition {
id: u64,
offset: u64,
path: String
}
# network
struct Server {
topics: HashMap<u32, Arc<RwLock<Topic>>>,
clients: HashMap<u32, Client>
}
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Partition>
}
# network
struct Server {
topics: HashMap<u32, Topic>,
clients: HashMap<u32, Client>
}
struct Topic {
id: u32,
path: String,
partitions: HashMap<u32, Arc<RwLock<Partition>>>
}
# performance
# performance
fn write_value_at<const N: usize>(slice: &mut [u8],
value: [u8; N], position: usize) {
let slice = &mut slice[position..position + N];
let ptr = slice.as_mut_ptr();
unsafe {
std::ptr::copy_nonoverlapping(value.as_ptr(), ptr, N);
}
}
# performance
# performance
impl Stream {
async fn append(&mut self, message: Message) {
self.offset += 1;
message.offset = self.offset;
let bytes = message.as_bytes();
// 1. Open file
let mut file = file::open(&self.path).await;
// 2. Write to file
file.write_all(&bytes).await;
} // 3. Close file
}
# performance
# performance
# performance
const O_DIRECT = 0x4000;
const O_DSYNC = 0x4096;
let file = std::fs::File::options()
.read(true)
.write(true)
.custom_flags(O_DIRECT | O_DSYNC)
.open(self.file_path)
.unwrap();
# performance
fn new(size: usize) -> Self {
let layout = Layout::from_size_align(size, 512)
.expect("Fail to create layout for DMA Buffer");
let data_ptr = unsafe { alloc::alloc(layout) };
let data = ptr::NonNull::new(data_ptr)
.expect("Not null pointer");
Self { data, layout, size }
}
# performance
# performance
# performance
let file = file::open(&self.path).await?;
let mut position = 0;
let buffer = Vec::with_capacity(4);
let (result, buffer) = file.read_exact_at(buffer, position).await;
if result.is_err() {
return Err(Error::InvalidOffset);
}
let offset = u32::from_le_bytes(buffer.try_into()?);
position += 4;
# performance
# performance
# performance
impl Stream {
async fn append(
&self,
partition_id: u32,
message: Message
) {
let partition = self.get_partition(partition_id);
// Maybe a context switch if lock is contended
let partition = partition.write().await;
// Context switch due to async
partition.append(message).await
}
}
# performance
# performance
# performance
let cores = 0..available_parallelism()?.into();
let connections = cores
.into_iter()
.map(|core| ShardConnector::new(core, cores))
.collect::<Vec<_>>();
let handles = cores
.map(|core| { std::thread::Builder::new()
.name(format!("Thread #{core}"))
.spawn(move || {
let connections = connections.clone();
monoio::utils::bind_to_cpu_set(Some(cpu))
.expect("Failed to set thread affinity");
let mut rt = RuntimeBuilder::<IoUringDriver>::new()
.build()
.expect("Failed to build monoio runtime");
// Init the shard
});
# performance
# performance
# thanks