Celery basics
elery
-
Async task queue & task runner
-
Distributed message passing
-
Implemented in Python
-
Plays well with Django
-
Pluggable brokers & storages
Basic Concepts
Basic Concepts
Concept #1: Broker
- Creation of task queues
- Dispatching tasks to task queues
- Delivering tasks to workers.
Concept #2: Consumer
One or multiple Celery workers executing the tasks
Concept #3:
Result Backend
Used for storing the results
Keep in mind...
Rule #1:
adding a task to a queue should be faster than performing the task itself
Rule #2:
you should consume tasks faster than you produce them.
If not --> add more workers!
StyleSage meets Celery
Use Case #1:
Product Loader
Use Case #2:
Images
http://www.zappos.com/images/z/3...
http://images.neimanmarcus.com/ca/1/pro...
Use Case #3:
Mappings
Use Case #4:
Scores
First steps
Application
from celery import Celery
app = Celery('tasks',
broker='pyamqp://guest@localhost//'
backend='redis://localhost')
@app.task
def add(x, y):
return x + y
Running the Celery worker server
>> celery -A tasks worker --loglevel=info
Calling the task
from tasks import add
### apply_async(args[, kwargs[, …]])
# Sends a task message.
add.apply_async(4, 4)
### delay(*args, **kwargs)
# Shortcut to send a task message,
# but doesn’t support execution options.
add.delay(4, 4)
### calling (__call__)
# That the task will not be executed by a worker,
# but in the current process instead
add(4, 4)
Keeping Results
>>> result = add.delay(4, 4)
>>> result.ready()
False
>>> result.get(timeout=1)
8
Best practices
#1 Passing Big Objects to Tasks => Memory Leak
@task()
def process_file(file_content):
print file_content
def run():
for i in range(1000):
file = open('big_file_%s.text' % i, 'r')
file_content = file.read()
process_file.delay(file_content)
#1 Passing Big Objects to Tasks => Memory Leak
@task()
def process_file(file_name):
file = open('big_file_%s.text' % i, 'r')
file_content = file.read()
print file_content
def run():
for i in range(1000):
process_file.delay('big_file_%s.text' % i)
#2 Passing Database/ORM Objects => Race Condition
from celery import task
@task()
def update_user_picture(user_object, picture):
"""Update user profile picture in background, since the
uploading process takes a while to complete."""
new_profile_picture_url = upload_picture(picture)
user_object.profile_picture = new_profile_picture_url
user_object.save()
def update_username(request, user_id, username):
user_object = db.user.get(user_id=user_id)
user_object.username = username
user_object.save()
def update_user_picture(request, user_id, picture):
user_object = db.user.get(user_id=user_id)
update_user_picture.delay(user_object, picture)
#2 Passing Database/ORM Objects => Race Condition
@task()
def update_user_picture(user_id, picture):
# Uploading takes time, make sure to get a fresh
# user_object before updating/saving it.
new_profile_picture_url = upload_picture(picture)
user_object = db.user.get(user_id=user_id)
user_object.profile_picture = new_profile_picture_url
user_object.save()
def update_username(user_id, username):
user_object = db.user.get(user_id=user_id)
user_object.username = username
user_object.save()
def update_user_picture(request, user_id, picture):
update_user_picture.delay(user_id, picture)
#3 Route Tasks to Their Own Queues
@task()
def log_event():
pass
@task()
def update_username():
pass
@task()
def update_password():
pass
@task()
def update_user_picture():
pass
def run():
log_event.delay()
update_username.delay()
update_password.delay()
update_user_picture.delay()
CELERY_ROUTES = {
'default.log_event': {
'queue': 'log_event',
},
'default.update_username': {
'queue': 'update_user_profile',
},
'default.update_password': {
'queue': 'update_user_profile',
},
'default.update_user_picture': {
'queue': 'update_user_profile',
},
}
#4 Retry & Idempotent
from celery import task
@task()
def log_event(event):
"""
Simply save the event object to database.
Retrying will cause duplicate objects saved.
"""
db.save(event)
- Celery Tasks may fail or be interrupted.
- Never assume the current state of the system when a task begins.
- Change as little external state as possible.
#4 Retry & Idempotent
from celery import task
@task(default_retry_delay=10, max_retries=3)
def log_event(event):
"""
Save the event object to database, only if it's
not been created yet.
"""
if not db.event.find_one(ip=event.ip, user_agent=event.user_agent):
db.save(event)
- Celery Tasks may fail or be interrupted.
- Never assume the current state of the system when a task begins.
- Change as little external state as possible.
#5 Property Caching
from celery import Task
class DatabaseTask(Task):
abstract = True
_db = None
@property
def db(self):
"""Cache the Database connection for reuse."""
if self._db is None:
self._db = Database.connect()
return self._db
def run(self, user_id, username):
user = self.db.user.find_one(user_id=user_id)
user["username"] = username
self.db.user.save(user)
#6 Class Method as Task
from celery import task
class UserProfileUpdater(object):
@staticmethod
@celery.task()
def update_user_picture(user_id, picture):
new_profile_picture_url = upload_picture(picture)
user = db.user.find_one(user_id=user_id)
user.profile_picture = new_profile_picture_url
user.save()
Designing Work-flows
6 tools
#1 Partials
# Any arguments added will be prepended
# to the args in the signature:
>>> partial = add.s(2) # incomplete signature
>>> partial.delay(4) # 4 + 2
>>> partial.apply_async((4,)) # same
# Any keyword arguments added will be merged
# with the kwargs in the signature
>>> s = add.s(2, 2)
>>> s.delay(debug=True) # -> add(2, 2, debug=True)
>>> s.apply_async(kwargs={'debug': True}) # same
>>> from celery import chain
>>> # 2 + 2 + 4 + 8
>>> res = chain(add.s(2, 2),
add.s(4), add.s(8))()
>>> res.get()
16
# This can also be written using pipes:
(add.s(2, 2) | add.s(4) | add.s(8))().get()
#2 Chains
# Two ways to do it
>>> add.signature((2, 2), immutable=True)
>>> add.si(2, 2)
# A chain of independent tasks
>>> res = (add.si(2, 2) | add.si(4, 4) | add.s(8, 8))()
>>> res.get()
16
>>> res.parent.get()
8
>>> res.parent.parent.get()
4
#3 Immutable signatures
# Group of tasks to execute in parallel
>>> from celery import group
>>> res = group(add.s(i, i)
for i in xrange(10))()
>>> res.get(timeout=1)
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
#4 Groups
>>> from celery import chord
>>> res = chord((add.s(i, i) for i in xrange(10)),
xsum.s())()
>>> res.get()
90
#5 chord (group + chain)
>>> from proj.tasks import add
>>> res = add.chunks(zip(range(100), range(100)), 10)()
>>> res.get()
[[0, 2, 4, 6, 8, 10, 12, 14, 16, 18],
[20, 22, 24, 26, 28, 30, 32, 34, 36, 38],
[40, 42, 44, 46, 48, 50, 52, 54, 56, 58],
[60, 62, 64, 66, 68, 70, 72, 74, 76, 78],
[80, 82, 84, 86, 88, 90, 92, 94, 96, 98],
[100, 102, 104, 106, 108, 110, 112, 114, 116, 118],
[120, 122, 124, 126, 128, 130, 132, 134, 136, 138],
[140, 142, 144, 146, 148, 150, 152, 154, 156, 158],
[160, 162, 164, 166, 168, 170, 172, 174, 176, 178],
[180, 182, 184, 186, 188, 190, 192, 194, 196, 198]]
#6 Chunks
Periodic tasks
Periodical tasks
from celery import Celery
app = Celery()
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
# Calls test('hello') every 10 seconds.
sender.add_periodic_task(10.0, test.s('hello'))
# Calls test('world') every 30 seconds
sender.add_periodic_task(30.0, test.s('world'))
@app.task
def test(arg):
print(arg)
Crontab schedules
from celery import Celery
from celery.schedules import crontab
app = Celery()
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
# Executes every Monday morning at 7:30 a.m.
sender.add_periodic_task(
crontab(hour=7, minute=30, day_of_week=1),
test.s('Happy Mondays!'),
)
@app.task
def test(arg):
print(arg)
Solar schedules
from celery import Celery
from celery.schedules import crontab
app = Celery()
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
# Executes at sunset in Melbourne
sender.add_periodic_task(
solar('sunset', -37.81753, 144.96715),
test.s('Good night Melbourne!'),
)
@app.task
def test(arg):
print(arg)
Starting the Scheduler
$ celery -A proj beat
Django testing
"""
* No need for running RabbitMQ
* Don't do async (always eager)
* Propagate exceptions
"""
CELERY_BROKER_BACKEND = 'memory'
CELERY_ALWAYS_EAGER = True
CELERY_EAGER_PROPAGATES_EXCEPTIONS = True
Django testing
Monitoring
Flower
Thank you!
Celery
By aliciapj
Celery
- 1,632