AppNexus YASS
https://github.com/SimplyAhmazing/BatTorrent
file.txt.torrent
Bittorrent
Client
Done!
Write a program that can download a file when given file.torrent as input
$ python client.py file.txt.torrent
file.txt has finished downloading
Download connection
HTTP GET: Who has this file?
Resp: List of N Peers
Tracker
Peer 1
Peer N
Peer 2
.
.
.
Solving for:
queue = [
Task(some_coroutine),
Task(another_coroutine)
]
while queue:
task = queue.pop(0)
task.execute_coroutine()
if task.is_coroutine_done():
print('Finished a task')
else:
queue.append(task)
print('Event loop finished tasks')
async def some_coroutine():
a = 1 + 2
await asyncio.sleep(1)
return a
Creating a coroutine
use "async def" to create a coroutine
import asyncio
loop = asyncio.get_event_loop()
loop.run_until_complete(some_coroutine)
Running a coroutine
wrapped in Task
import asyncio
async def do_stuff()
await asyncio.gather(*[some_coroutine() for i range(5)])
print('Done')
loop = asyncio.get_event_loop()
loop.run_until_complete(do_stuff)
Run multiple coroutine "simultaneously"
Task 1
Task 2
active
inactive
execution time
import asyncio, sys
async def download(torrent_file):
# Read and parse ".torrent" file
torrent = read_torrent(torrent_file)
# Get peers list from tracker in ".torrent" file
peer_addresses = await get_peers(torrent)
# Object to track peer communication/state
peers = [Peer(addr) for addr in peer_addresses]
# Wait for all download coroutines to finish
await asyncio.gather(
*[peer.download() for peer in peers] # Producer
)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(download(sys.argv[1])
loop.close()
import asyncio, sys
async def download(torrent_file):
# Read and parse ".torrent" file
torrent = read_torrent(torrent_file)
# Object to track peer communication/state
peer_addresses = await get_peers(torrent)
# Queue for storing downloaded file pieces
file_pieces_queue = asyncio.Queue()
# Object to coordinate writing file to disk
file_saver = FileSaver(file_pieces_queue)
# Object to track peer communication/state
peers = [Peer(addr, file_pieces_queue) for addr in peer_addresses]
await asyncio.gather(
*([peer.download() for peer in peers] + # Producers
[file_saver.start()]) # Consumers
)
Peer 1
Peer 2
Peer 3
File Saver
Active
Inactive
execution time
(Metainfo file)
$ cat hello.txt.torrent
d8:announce31:http://10.0.16.36:6969/announce10:created by25:
Transmission/2.84 (14307)13:creation datei1504559097e8:encoding5
:UTF-84:infod6:lengthi35e4:name9 :hello.txt12:piece length
i32768e6:pieces20:�3�b0gŵ�{ *�$K7:privatei0eee%
Data type | Format | Example |
---|---|---|
Byte string | <len>:<str> | 4:spam => "spam" |
Integer | i<num>e | i-55e => -55 |
list | l<item>e | l2:hie => ["hi"] |
dictionary | d<key><val>e | d2:hi3:byee => {"hi": "bye"} |
$ cat hello.txt.torrent
d8:announce31:http://10.0.16.36:6969/announce10:created by25:Transmission/2.84
(14307)13:creation datei1504559097e8:encoding5:UTF-84:infod6:lengthi35e4:name9
:hello.txt12:piece lengthi32768e6:pieces20:�3�b0gŵ�{ *�$K7:privatei0eee%
{b'announce': b'http://10.0.16.36:6969/announce',
b'created by': b'Transmission/2.84 (14307)',
b'creation date': 1504559097,
b'encoding': b'UTF-8',
b'info': {b'length': 35,
b'name': b'hello.txt',
b'piece length': 32768,
b'pieces': b'\xad3\xd8b0g\xc5\xb5\x17\xaa{\x1a\t\x02*\xed\xbd$K\x13',
b'private': 0}}
bdecode( )
The Tracker is an HTTP(S) server that responds to HTTP GET requests.
It keeps track of who in the swarm has a torrent.
HTTP GET: Who has this file?
RESP: List of N Peers
Tracker
Params | Description |
---|---|
info_hash | URL encoded 20-byte SHA1 of the info_dict |
peer_id | URL encoded 20-byte string ID that identifies our client |
port | Port our client is listening on |
event | {started, stopped, completed} |
uploaded | Total amount uploaded (Bytes) since 'started' event |
downloaded | Total amount downloaded (Bytes) since 'started' event |
left | Total amount uploaded (Bytes) since 'started' event |
compact | Bool. Denotes if client accepts compact list of peers |
no_peer_id | Bool. Indicates if peer can omit peer_id in peers dict response |
URL: http://my-tracker.com/announce?params=params
import aiohttp
torrent = get_torrent_file()
async def request_peers(self):
async with aiohttp.ClientSession() as session:
resp = await session.get(tracker_url, params=params)
resp_data = await resp.read()
peers = bencoder.decode(resp_data)
return peers
b'd8:completei1e10:downloadedi0e10:incomplete
i1e8:intervali1921e12:min intervali960e5:peers
12:\xc0\xa8c\x01\xe90\xc0\xa8cd\xc8\xd5e'
{
b'complete': 1,
b'downloaded': 0,
b'incomplete': 1,
b'interval': 1918,
b'min interval': 959,
b'peers': b'\xc0\xa8c\x01\xe90\xc0\xa8cd\xc8\xd5\xc0\xa8c\x01\xe90\xc0\xa8cd\xc8\xd5'
}
['192.168.99.100:51413', '33.67.123.201:69596']
Initial State
Cooperative State
class Peer(object):
def __init__(self, host, port, file_queue):
self.host = host
self.port = port
self.file_queue = file_queue
# Denotes if peer is choking us
self.peer_choking = True
# Denotes if we've informed our peer we're interested
self.am_interested = False
async def download(self):
pass
TCP connection
Unchoke
Interested
Have/Bitfield
Request
Piece
.
.
.
Client
Peer
Client Handshake
Peer Handshake
class Peer(object):
async def download(self):
reader, writer = await asyncio.open_connection(
self.host, self.port
)
handshake = b''.join([
chr(19).encode(),
b'BitTorrent protocol',
(chr(0) * 8).encode(),
info_hash,
PEER_ID.encode()
])
# Send Handshake
writer.write(handshake)
await writer.drain()
# Read and validate response
peer_handshake = await reader.read(68)
self.validate(peer_handshake)
# Start exchanging messages...
Handshake format |
---|
Protocol string length |
"Bittorrent protocol" |
Reserved bytes |
info_hash |
peer id |
All remaining messages:
<message length> <id> <payload>
Message | Format |
---|---|
Keep Alive | <len=0000> |
Choke | <len=0001><id=0> |
Unchoke | <len=0001><id=1> |
Interested | <len=0001><id=2> |
Not Interested |
<len=0001><id=3> |
Have | <len=0005><id=4><piece index> |
Bitfield | <len=0001+X><id=5><bitfield> |
Request | <len=0013><id=6><index><begin><length> |
Piece | <len=0009+X><id=7><index><begin><block> |
class Peer(object):
async def download(self):
# Start exchanging messages
buf = b'' # Holds data read from peer
while True:
resp = await reader.read(REQUEST_SIZE)
buf += resp
while True:
if len(buf) < 4:
break
msg_message_length = self.get_message_message_length(buf)
if msg_message_length == 0:
# Handle Keep Alive
continue
msg_id = struct.unpack('>b', buf[4:5])
class Peer(object):
async def download(self):
# ... Looping through Peer messages
if msg_message_length == 0:
# Handle Keep Alive
continue
msg_id = struct.unpack('>b', buf[4:5]) # 5th byte is the ID
if msg_id == 0:
# Handle Choke...
elif msg_id == 1:
# Handle Unchoke...
await self.send_interested_message()
elif msg_id == 2:
# Handle Interested...
elif msg_id == 3:
# Handle Not interested...
elif msg_id == 4:
# Handle Have...
elif msg_id == 5:
# Handle Bitfield...
elif msg_id == 7:
# Handle Piece...
self.file_queue.enqueue(piece_data)
await self.request_a_piece()
class FileSaver(object):
def __init__(self, file_queue):
self.file_queue = file_queue
async def start(self):
while True:
piece = await self.file_queue.get()
if not piece: # Poison pill
return
await self.save(piece)
import asyncio, sys
async def download(torrent_file):
# Read and parse ".torrent" file
torrent = read_torrent(torrent_file)
# Object to track peer communication/state
peer_addresses = await get_peers(torrent)
# Queue for storing downloaded file pieces
file_pieces_queue = asyncio.Queue()
# Object to coordinate writing file to disk
file_saver = FileSaver(file_pieces_queue)
# Object to track peer communication/state
peers = [Peer(addr, file_pieces_queue) for addr in peer_addresses]
await asyncio.gather(
*([peer.download() for peer in peers] + # Producers
[file_saver.start()]) # Consumers
)
500 Lines or Less
A Web Crawler With asyncio Coroutines
by: Jesse Jiryu Davis & Guido van Rossum
Home Assistant
github.com/home-assistant/home-assistant