Programação Assíncrona com Twisted

Tonny Dourado (@tonnydourado)

Desenvolvedor Python, no Instituto de Pesquisas Eldorado, em Porto Alegre

Membro do PyTchê

(www.meetup.com/PyTche/)

Buh Buh Buh!

(twitter.com/tonnydourado)

Twisted

Mas e o asyncio?

Twisted

O Reactor

C10k Problem

Como suportar dez mil conexões simultâneas?

Threads

import socket
from multiprocessing.dummy import Pool
from threading import current_thread

def dispatcher(host, port):
    print(f"Thread: {current_thread()}, Host: '{host}', port: '{port}")
    thread_pool = Pool(5)

    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind((host, port))
    sock.listen(5)

    while True:
        client, address = sock.accept()
        client.settimeout(60)
        thread_pool.apply_async(process_request, (client, address))

def process_request(client, address, size=1024):
    print(f"Thread: {current_thread()}, Client: {client}, "
          f"Address: {address}")

    while True:
        try:
            data = client.recv(size)
            if data:
                print(f"Data: {data}")
                client.send(data)
            break
        finally:
            client.close()

if __name__ == '__main__':
    import sys
    dispatcher(host=sys.argv[1], port=int(sys.argv[2]))

Prós:

  • Código de processar requests é mais simples
  • Abstrações familiares
  • Scheduling é responsabilidade do kernel

Cons:

  • Overhead de:
    • memória: cada thread tem uma stack
    • processamento: context switches
  • Deadlocks, starvation, race conditions

"Must be this tall to write multi-threaded code."

Event Loop

import select
import socket
from queue import Queue, Empty


class EventLoop(object):
    def __init__(self, host="localhost", port=5000):
        self.host = host
        self.port = port
        self._server = None
        self._inputs, self._outputs = [], []
        self._queues = {}

    def __call__(self):
        if self._server is not None:
            raise Exception("Can't start server twice!")
        self._start_listening()
        self._inputs.append(self._server)

        while self._inputs:
            readable, writable, errors = select.select(
                self._inputs,
                self._outputs,
                self._inputs
            )
            for sock in readable:
                self._handle_readable(sock)
            for sock in writable:
                self._handle_writable(sock)
            for sock in errors:
                self._handle_error(sock)

    def _start_listening(self):
        self._server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self._server.setblocking(0)
        self._server.bind((self.host, self.port))
        self._server.listen(5)

    def _handle_readable(self, sock):
        # Quando a conexão de servidor está pronta pra ser lida, significa que
        # temos um novo cliente:
        if sock is self._server:
            conn, addr = sock.accept()
            conn.setblocking(0)
            self._inputs.append(conn)
            self._queues[conn] = Queue()
        else:
            data = sock.recv(1024)
            if data:
                # Colocamos os dados lidos na fila, pra enviar quando o socket
                # estiver pronto para ser escrito:
                self._queues[sock].put(data)
                # Caso seja uma nova conexão, vamos monitorar ela para escrita
                # também:
                if sock not in self._outputs:
                    self._outputs.append(sock)
            else:
                # Sem dados, podemos fechar a conexão:
                if sock in self._outputs:
                    self._outputs.remove(sock)
                self._inputs.remove(sock)
                del self._queues[sock]
                sock.close()

    def _handle_writable(self, sock):
        try:
            next_msg = self._queues[sock].get_nowait()
        except Empty:
            self._outputs.remove(sock)
            self._inputs.remove(sock)
            del self._queues[sock]
            sock.close()
        else:
            sock.send(next_msg)

    def _handle_error(self, sock):
        self._inputs.remove(sock)
        if sock in self._outputs:
            self._outputs.remove(sock)
        del self._queues[sock]
        sock.close()


if __name__ == '__main__':
    import sys
    loop = EventLoop(host=sys.argv[1], port=int(sys.argv[2]))
    loop()

Prós:

  • Sem overhead de troca de contexto
  • Algoritmos mais simples
  • Sem deadlocks, starvation, race conditions

Cons:

  • Scheduling é responsabilidade da aplicação
  • Abstrações menos familiares
  • Mais difícil de debugar

Reactor

  • Event loop independente de plataforma
  • Suporta timed events, filesystem, subprocessos, além de I/O
  • Múltiplas implementações: select,  epoll, IOCP, GTK, TKinter, AsyncIO, etc

Deferreds

Deferreds

  • Representa um resultado no futuro
  • Estabelece ordem de execução
  • Futures (asyncio) / Promises (ES6) / Tasks (C#)
import json


def connectionError(failure):
    failure.trap(ConnectionError)
    log.failure("makeRequest failed due to connection error", failure)
    return []


def getUsers():
    d = makeRequest("GET", "/users")
    d.addCallback(json.loads)
    d.addErrback(connectionError)
    return d
from twisted.internet import reactor, defer


def _triggerDeferred(d, x):
    if x % 2 == 0:
        d.callback(x * 3)
    else:
        d.errback(ValueError("You used an odd number!"))


def getDeferred(x):
    d = defer.Deferred()
    # Fingir que demorou pra rodar:
    reactor.callLater(2, _triggerDeferred, d, x)
    d.addCallback(firstCallback)
    return d


def firstCallback(result):
    print("First callback!")
    return f"Result: {result}"


def cbPrintData(result):
    print("Yay!")
    print(result)


def ebPrintError(failure):
    print("Oh, no!")
    print(repr(failure))


def main():
    # Vai imprimir a mensagem de erro que passamos pra ValueError
    d = getDeferred(3)
    d.addCallback(cbPrintData)
    d.addErrback(ebPrintError)

    # Vai imprimir "Result: 12"
    d = getDeferred(4)
    d.addCallback(cbPrintData)
    d.addErrback(ebPrintError)

    reactor.callLater(4, reactor.stop)
    reactor.run()


if __name__ == "__main__":
    main()

Dados

Deferred

Result ou

Failure

Result

Failure

Callbacks

Errbacks

import json


def connectionError(failure):
    failure.trap(ConnectionError)
    log.failure("makeRequest failed due to connection error", failure)
    return []


def getUsers():
    d = makeRequest("GET", "/users")
    d.addCallback(json.loads)
    d.addErrback(connectionError)
    return d

inlineCallbacks
&
async/await

inlineCallbacks

import json


def connectionError(failure):
    failure.trap(ConnectionError)
    log.failure("makeRequest failed due to "
                "connection error")
    return []


def getUsers():
    d = makeRequest("GET", "/users")
    d.addCallback(json.loads)
    d.addErrback(connectionError)
    return d
import json
from twisted.internet.defer import (
    inlineCallbacks, returnValue
)


@inlineCallbacks
def getUsers(self):
    try:
        response = yield makeRequest("GET", "/users")
    except ConnectionError:
       log.failure("makeRequest failed due to "
                   "connection error")
       returnValue([])

    returnValue(json.loads(response))

 Generators

e

Corotinas

Generators

  • Funções "interrompíveis"
  • yield é usado para retornar valores
  • "Retornar" um valor não termina a função
def my_range(n):
    i = 0
    while i < n:
        yield i
        i += 1

g = my_range(3)
print(next(g))  # Output: 0
print(next(g))  # Output: 1
print(next(g))  # Output: 2
print(next(g))  # Raise StopIteration

for i in my_range(10):
    print(i, end=" ")
print()
# Output: 0 1 2 3 4 5 6 7 8 9
def hello():
    person = yield
    greeting = f"Hello, {person}!!!"
    yield greeting


def say_hello(name):
    hw = hello()
    next(hw)
    greeting = hw.send(name)
    print(greeting)


say_hello("Tonny")  
# Output: 'Hello, Tonny!!!'
import json
from twisted.internet.defer import (
    inlineCallbacks, returnValue
)


@inlineCallbacks
def getUsers(self):
    try:
        response = yield makeRequest("GET", "/users")
    except ConnectionError:
       log.failure("makeRequest failed due to "
                   "connection error")
       returnValue([])

    returnValue(json.loads(response))

async/await

import json
from twisted.internet.defer import ensureDeferred
from twisted.logger import Logger
log = Logger()

async def getUsers():
    try:
        response = await makeRequest("GET", "/users")
        return json.loads(response)
    except ConnectionError:
        log.failure("makeRequest failed due to"
                    " connection error")
        return []

def do():
    d = ensureDeferred(getUsers())
    d.addCallback(print)
    return d

async/await

asyncio

  • async/await:

    • __await__, send, throw, close

  • asyncio:

    • Pluggable event loop

    • Transports, protocols

    • Futures

async/await

  • Deferred.fromFuture
  • Deferred.asFuture

asyncio + Twisted

import asyncio
from datetime import datetime

from twisted.internet.task import react, LoopingCall
from twisted.internet.defer import ensureDeferred, Deferred
from twisted.internet import asyncioreactor

asyncioreactor.install(asyncio.get_event_loop())


def sleep(secs):
    coroutine = asyncio.sleep(secs)
    future = asyncio.ensure_future(coroutine)
    return Deferred.fromFuture(future)


async def start_request():
    urls = [
        'https://example.com/page/1',
        'https://example.com/page/2',
        'https://example.com/page/3'
    ]

    for url in urls:
        print(f"Downloading page '{url}'", end="...\n")
        await sleep(1)
        print(f"Page '{url}' download finished")


async def after_request():
    await sleep(2)
    print("Downloaded all pages!")


def main(reactor):
    counter = LoopingCall(lambda: print(datetime.now()))
    counter.start(0.5)

    deferred = ensureDeferred(start_request())
    deferred.addCallback(lambda r: ensureDeferred(after_request()))
    deferred.addErrback(print)
    return deferred


if __name__ == "__main__":
    react(main)

2018-04-04 17:05:05.060931
Downloading page 'https://example.com/page/1'...
2018-04-04 17:05:05.561611
2018-04-04 17:05:06.062427
Page 'https://example.com/page/1' download finished
Downloading page 'https://example.com/page/2'...
2018-04-04 17:05:06.562397
2018-04-04 17:05:07.062301
Page 'https://example.com/page/2' download finished
Downloading page 'https://example.com/page/3'...
2018-04-04 17:05:07.561850
2018-04-04 17:05:08.061805
Page 'https://example.com/page/3' download finished
2018-04-04 17:05:08.562057
2018-04-04 17:05:09.061871
2018-04-04 17:05:09.561685
2018-04-04 17:05:10.062463
Downloaded all pages!

Tem mais?

  • Claro que tem!
    • Scheduling: LoopingCall, reactor.callLater
    • Threads: deferToThread, callFromThread
    • Processos: ProcessProtocol, getProcessOutput
    • Protocolos: Factories, Transports e Protocols
    • Arquitetura: Services e Applications
    • Testes: trial, pytest-twisted

Twisted

vs

asyncio

vs

Django/Flask/etc

Links e Referências

Programação Assíncrona com Twisted

By Tonny Dourado

Programação Assíncrona com Twisted

Introdução de alguns conceitos básicos do Twisted para programação assíncrona. Código: https://github.com/tonnydourado/twisted-talk

  • 368