Esclando e Distribuindo com Python
André Claudino
claudino@d2x.com.br
https://www.linkedin.com/in/andreclaudino/
https://github.com/andreclaudino
t.me/IABrasil
http://t.me/aclaudino
PhD. Física Computacional, Inteligência Artificial e Sistemas Distribuídos
Blz?
- Processos demorados
- Processos que podem matar um nó pelo consumo de recursos
- Processos que não precisam retornar
- Jobs
- Processos escaláveis
- SIstemas de IA distribuídos (claro)
E o Python?
import logging
import uuid
from pymesos.interface import Scheduler
logging.basicConfig(level=logging.INFO)
class CustomScheduler(Scheduler):
pass
custom_scheduler.py
class CustomScheduler(Scheduler):
def __init__(self, executor):
pass
def registered(self, driver, frameworkId, masterInfo):
pass
def resourceOffers(self, driver, offers):
pass
def getResource(self, res, name):
pass
def statusUpdate(self, driver, update):
pass
def __init__(self, executor):
self.executor = executor
def registered(self, driver, frameworkId, masterInfo):
logging
.info(f"Registered with framework id {frameworkId} on master host"+
f"{masterInfo.hostname}")
def statusUpdate(self, driver, update):
logging.debug('Status update TID %s %s',
update.task_id.value,
update.state)
def getResource(self, res, name):
for r in res:
if r.name == name:
return r.scalar.value
def resourceOffers(self, driver, offers):
filters = {'refuse_seconds': 5}
for offer in offers:
logging.info(f"Recieved resource offers: {offer.id.value}")
task = new_task(self.executor, offer)
driver.launchTasks(offer.id, [task], filters)
def new_task(executor, offer, mem, cpu):
task = dict()
task.task_id = dict()
task.task_id.value = str(uuid.uuid4())
task.agent_id = dict()
task.agent_id.value = offer.agent_id.value
task.name = f'task {task.task_id.value}'
task.executor = executor
task.data = f'Hello from task {task_id}!'
task.resources = [
dict(name='cpus', type='SCALAR', scalar={'value': cpu}),
dict(name='mem', type='SCALAR', scalar={'value': mem}),
]
return task
import logging
import uuid
from pymesos.interface import Scheduler
logging.basicConfig(level=logging.INFO)
class CustomScheduler(Scheduler):
def __init__(self, executor):
self.executor = executor
def registered(self, driver, frameworkId, masterInfo):
logging.info(f"Registered with framework id {frameworkId} on master host {masterInfo.hostname}")
def resourceOffers(self, driver, offers):
filters = {'refuse_seconds': 5}
for offer in offers:
logging.info(f"Recieved resource offers: {offer.id.value}")
task = new_task(self.executor, offer)
driver.launchTasks(offer.id, [task], filters)
def getResource(self, res, name):
for r in res:
if r.name == name:
return r.scalar.value
def statusUpdate(self, driver, update):
logging.debug('Status update TID %s %s',
update.task_id.value,
update.state)
def new_task(executor, offer, mem, cpu):
task = dict()
task.task_id = dict()
task.task_id.value = str(uuid.uuid4())
task.agent_id = dict()
task.agent_id.value = offer.agent_id.value
task.name = f'task {task.task_id.value}'
task.executor = executor
task.data = f'Hello from task {task_id}!'
task.resources = [
dict(name='cpus', type='SCALAR', scalar={'value': cpu}),
dict(name='mem', type='SCALAR', scalar={'value': mem}),
]
return task
from addict import Dict
import getpass
import socket
def new_executor(memory, cpus, command):
executor = Dict()
executor.executor_id.value = 'Example Executor'
executor.name = executor.executor_id.value
executor.command.value = f'python3 -c "{command}"'
executor.resources = [
dict(name='mem', type='SCALAR', scalar={'value': memory}),
dict(name='cpus', type='SCALAR', scalar={'value': cpus}),
]
return executor
def build_framework():
framework = Dict()
framework.user = getpass.getuser()
framework.name = "Example Framework"
framework.hostname = socket.gethostname()
return framework
utils.py
def startup(master):
executor = new_executor(10, .1)
framework = build_framework()
driver = MesosSchedulerDriver(
CustomScheduler(executor),
framework,
master,
use_addict=True,
)
def signal_handler(signal, frame):
driver.stop()
def run_driver_thread():
driver.run()
driver_thread = Thread(target=run_driver_thread, args=())
driver_thread.start()
print('Scheduler running, Ctrl+C to quit.')
signal.signal(signal.SIGINT, signal_handler)
while driver_thread.is_alive():
time.sleep(1)
startup.py
if __name__ == '__main__':
import logging
logging.basicConfig(level=logging.DEBUG)
if len(sys.argv) != 2:
print("Using default mesos master (0.0.0.0:5050)")
master = "0.0.0.0:5050"
else:
master = sys.argv[1]
startup(master)
from utils import *
from pymesos import MesosSchedulerDriver
from custom_scheduler import CustomScheduler
from threading import Thread
import signal
import time
import sys
def startup(master):
executor = new_executor(10, .1)
framework = build_framework()
driver = MesosSchedulerDriver(
CustomScheduler(executor),
framework,
master,
use_addict=True,
)
def signal_handler(signal, frame):
driver.stop()
def run_driver_thread():
driver.run()
driver_thread = Thread(target=run_driver_thread, args=())
driver_thread.start()
print('Scheduler running, Ctrl+C to quit.')
signal.signal(signal.SIGINT, signal_handler)
while driver_thread.is_alive():
time.sleep(1)
if __name__ == '__main__':
import logging
logging.basicConfig(level=logging.DEBUG)
if len(sys.argv) != 2:
print("Using default mesos master (zk://0.0.0.0:2181/mesos)".format(sys.argv[0]))
master = "0.0.0.0:5050"
else:
master = sys.argv[1]
startup(master)
version: '2'
services:
zookeeper:
network_mode: host
image: garland/zookeeper
ports:
- "2181:2181"
- "2888:2888"
- "3888:3888"
master:
image: mesosphere/mesos-master:1.5.0
network_mode: host
ports:
- "5050:5050"
environment:
MESOS_HOSTNAME: 0.0.0.0
MESOS_IP: 0.0.0.0
MESOS_ZK: zk://0.0.0.0:2181/mesos
MESOS_PORT: 5050
MESOS_LOG_DIR: /var/log/mesos
MESOS_QUORUM: 1
MESOS_REGISTRY: in_memory
MESOS_WORK_DIR: /var/lib/mesos
slave:
network_mode: host
image: mesosphere/mesos-slave:1.5.0
environment:
MESOS_MASTER: zk://0.0.0.0:2181/mesos
MESOS_LOGGING_LEVEL: INFO
Vlw!
deck
By André Claudino
deck
- 133