Demystifying AsyncIO

Building Your Own Event Loop in Python

whoami

Open-source:

  • ODMantic (async "ORM" for MongoDB)
  • CPython typo contributor

 

Building

@art049

Arthur Pastel

Software Engineer

Ever heard of?

  • AsyncIO
  • Event Loop
  • Futures
  • callbacks
  • coroutines
  • File descriptors
  • Selectors

What?

import asyncio

async def greet():
    print("Hello")
    await asyncio.sleep(1)
    print("World")

if __name__ == "__main__":
	asyncio.run(greet())

What?

import asyncio

async def greet():
    print("Hello")
    await asyncio.sleep(1)
    print("World")

if __name__ == "__main__":
	asyncio.run(greet())
Hello

What?

import asyncio

async def greet():
    print("Hello")
    await asyncio.sleep(1)
    print("World")

if __name__ == "__main__":
	asyncio.run(greet())
Hello
World

An actual example

import requests
import random

POKE_URL = "https://pokeapi.co/api/v2"
POKE_COUNT = 1000

def encounter_a_pokemon():
    pokemon_id = random.randint(1, POKE_COUNT)
    resp = requests.get(f"{POKE_URL}/pokemon/{pokemon_id}")
    pokemon = resp.json()["name"].title()
    print(f"A Wild {pokemon} Appears!")

def encounter_multiple_pokemons(n: int):
    for _ in range(n):
        encounter_a_pokemon()

if __name__ == "__main__":
    encounter_multiple_pokemons(5)

An actual example

GIL is free

The GIL is the lock that allows a single thread to execute python code in the interpreter

The point of AsyncIO

The point of AsyncIO

import asyncio
import httpx
import random

POKE_URL = "https://pokeapi.co/api/v2/"
client = httpx.AsyncClient(base_url=POKE_URL)
POKE_COUNT = 1000

async def encounter_a_pokemon():
    pokemon_id = random.randint(1, POKE_COUNT)
    resp = await client.get(f"/pokemon/{pokemon_id}")
    pokemon = resp.json()["name"].title()
    print(f"A Wild {pokemon} Appears!")

async def encounter_multiple_pokemons(n: int):
    tasks = (encounter_a_pokemon() for _ in range(n))
    await asyncio.gather(*tasks)

if __name__ == "__main__":
    asyncio.run(encounter_multiple_pokemons(5))

The truth behind

asyncio.run(greet())

asyncio.run

loop = asyncio.new_event_loop()
task = loop.create_task(greet())
loop.run_until_complete(task)

Creating our event loop

From the Abstract CPython specification

The goal

from fastapi import FastAPI
from hypercorn import Config
from hypercorn.asyncio import serve

from loop import MyEventLoop

app = FastAPI()


@app.get("/")
async def root():
    return {"message": "Hello World"}


loop = MyEventLoop()
config = Config()
loop.run_until_complete(serve(app, config))

The beginning

Scheduling Callbacks

loop = MyEventLoop()

def foo():
    print("foo")
  
def bar():
    print("bar")
    
loop.call_soon(foo)
loop.call_soon(bar)
loop.run_forever()

call_soon

Scheduling Callbacks

call_soon

Scheduling Callbacks

loop = MyEventLoop()

def foo():
    print("foo")
  
def bar():
    print("bar")
    
loop.call_soon(foo)
loop.call_soon(bar)
loop.run_forever()

class MyEventLoop(AbstractEventLoop):
    def __init__(self):
        self._ready_handles = deque[Handle]()
        self._should_stop = False

    def call_soon(
        self, callback: Callable, *args: Any
    ) -> Handle:
        handle = Handle(callback, args, self)
        self._ready_handles.append(handle)
        return handle
    
    def _run_once(self):
        while len(self._ready_handles) > 0:
            handle = self._ready_handles.popleft()
            print(f"Running handle {handle}")
            handle._run()
    
    def run_forever(self):
        while not self._should_stop:
            self._run_once()

call_soon

Scheduling Callbacks

loop = MyEventLoop()

def foo():
    print("Hello")

loop.call_soon(foo)

def baz():
    print("Wooorld")

loop.call_at(time.time() + 5, baz)
loop.run_forever()

call_at

Scheduling Callbacks

call_at

Scheduling Callbacks

loop = MyEventLoop()

def foo():
    print("Hello")

loop.call_soon(foo)

def baz():
    print("Wooorld")

loop.call_at(time.time() + 5, baz)
loop.run_forever()
class MyEventLoop(AbstractEventLoop):
	# ...
    def call_at(
        self, when: float, callback: Callable, *args: Any
    ) -> TimerHandle:
        timer_handle = TimerHandle(
          when=when, callback=callback, args=args, loop=self
        )
        print(f"Created timer handle {timer_handle}")
        bisect.insort(self._scheduled_handles, timer_handle)
        timer_handle._scheduled = True
        return timer_handle
      
    def _run_once(self):
        now = time.time()
        while (
            len(self._scheduled_handles) > 0
            and self._scheduled_handles[0].when() <= now
        ):
            handle = self._scheduled_handles.popleft()
            self._ready_handles.append(handle)
        #... previous ready handle execution logic

call_at

Scheduling Callbacks

loop = MyEventLoop()
loop.call_soon(
    lambda: print("Hello")
)
loop.call_later(
    5, # seconds
    lambda: print("World")
)
loop.run_forever()
class MyEventLoop(AbstractEventLoop):
    # ...

    def call_later(
        self,
        delay: float,
        callback: Callable,
        *args: Any
    ) -> TimerHandle:
        return self.call_at(
          self.time() + delay,
          callback,
          *args
        )
      
    # ...

call_later

Futures

Futures

loop = MyEventLoop()
future = loop.create_future()

print(future)

future.set_result(42)
print(future)

<Future pending>

<Future finished result=42>

Futures

from loop import MyEventLoop

loop = MyEventLoop()
future = loop.create_future()

def on_future_done(future):
    print(f"Future is done! Result: {future.result()}")

future.add_done_callback(on_future_done)
loop.call_later(1, future.set_result, 42)

loop.run_until_complete(future)
Future is done! Result: 42

Awaiting Futures

class MyEventLoop(AbstractEventLoop):
    # ...

    def run_until_complete(f: Future):
        while not f.done():
            self._run_once()

Building some
AsyncIO Helpers

Sleeping

def sleep(
    seconds: float, loop: AbstractEventLoop
):
    future = loop.create_future()
    loop.call_later(seconds, future.set_result, None)
    return future

Waiting on multiple futures

def gather(*futures: Future, loop: AbstractEventLoop):
    gathered = loop.create_future()
    future_count = len(futures)
    completed_count = 0
    
    def on_done(future):
        nonlocal completed_count
        completed_count += 1
        if completed_count == future_count:
            gathered.set_result(
              [future.result() for future in futures]
            )
    
    for future in futures:
        future.add_done_callback(on_done)

    return gathered

What about I/Os ?

File descriptors

The selector

The file descriptor table

open("/home/user/foo.txt", O_WRONLY | O_CREAT, 0644);

System Call (here in C):

The file descriptor table

open("/home/user/foo.txt", O_WRONLY | O_CREAT, 0644);

System Call (here in C):

The implementation

select(rlist, wlist, xlist, timeout)

iterable of file descriptors

Return ready file descriptors

Actually handling I/Os

class MyEventLoop(AbstractEventLoop):
    # ...
    def add_reader(self, fd, callback, *args):
        ...
    def remove_reader(self, fd):
        ...
    def add_writer(self, fd, callback, *args):
        ...
    def remove_writer(self, fd):
        ...

✅ IOs

coroutines

@types.coroutine
def process_data(db):
    data = yield from read_data(db)
async def process_data(db):
    data = await read_data(db)

Async Context Managers

Async Iterators

Generator style coroutines

async/await coroutines

The truth behind

asyncio.run(greet())

asyncio.run

loop = asyncio.new_event_loop()

task = loop.create_task(greet())

loop.run_until_complete(task)

Tasks

async def fetch_pokemon(name: str):
    inp = validate_name(name)
    data = await client.get(f"/{name}")
    pokemon = data["identity"]
    return pokemon
  
loop = asyncio.new_event_loop()
task = loop.create_task(fetch_pokemon("Pikachu"))
loop.run_until_complete(task)
  1. Schedule the first step
  2. Open the connection
  3. Add the writer to the selector
  4. WAIT for the write
  5. Add the reader to the selector
  6. WAIT for the read
  7. Schedule the next step

Recap

  • AsyncIO ✅
  • Event Loop ✅
  • Futures ✅
  • callbacks ✅
  • I/Os ✅
  • coroutines ✅
  • Tasks ✅
  • Selectors ✅

It can run a FastAPI server.

After some more network implementation...

It can run a FastAPI server.

What about performance?

Comparing the default event loop with ours

import pytest

@pytest.mark.benchmark
def test_sleep_future(loop):
    loop.run_until_complete(sleep(0, loop))

@pytest.mark.benchmark
def test_schedule_soon(loop):
    loop.call_soon(lambda: None)
    
@pytest.mark.benchmark
def test_schedule_later(loop, benchmark):
    loop.call_later(0, lambda: None)

Writing performance tests

Writing performance tests

@pytest.fixture(scope="function")
def loop():
    return _UnixSelectorEventLoop()

Writing performance tests

@pytest.fixture(scope="function")
def loop():
    return MyEventLoop()

uvloop

+

uvloop makes asyncio 2-4x faster.

Writing performance tests

@pytest.fixture(scope="function")
def loop():
    return uvloop.Loop()
jobs:
  benchmarks:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: actions/setup-python@v2

      - run: pip install -r requirements.lock -r requirements-dev.lock
      
      - name: Run benchmarks
        uses: CodSpeedHQ/action@v2
        with:
          run: pytest --codspeed

Integrating in the CI environment

  • Consistent performance measurement
     
  • Run on CI without noise
     
  • GitHub and Pull Request integration
     
  • Deterministic and differential profiling on every commit
     
  • Free for Open-Source

Thank you!

@art049

arthurpastel

Demystifying AsyncIO: Building Your Own Event Loop in Python

By Arthur Pastel

Demystifying AsyncIO: Building Your Own Event Loop in Python

  • 344