after AOZ
celery tasks
what should be have in mind when creating tasks ?
celery tasks
@task
def update_external_system(model_instance: models.Model):
pass
is there a problem ?
celery tasks
#don't
@task
def update_external_system(model_instance):
pass
#do
@task
def update_external_system(model_instance_id):
pass
celery tasks
@task
def update_external_system(model_instance_id):
requests.post('https://api.com/ready')
is there a problem ?
celery tasks
#don't
@task
def update_external_system(model_instance_id):
requests.post('https://api.com/ready')
#do
@task
def update_external_system(model_instance_id):
requests.post('https://api.com/ready', timeout=30)
celery tasks
@task
def update_external_system(model_instance_id):
instance = Model.objects.get(id=model_instance_id)
if instance.status == 'ready':
requests.post('https://api.com/ready')
def on_order_update(order):
update_external_system.delay(order.id)
is there a problem ?
celery tasks
#don't
@task
def update_external_system(model_instance_id):
instance = Model.objects.get(id=model_instance_id)
if instance.status == 'ready':
requests.post('https://api.com/ready')
def on_order_update(order):
update_external_system.delay(order.id)
#do
@task
def update_external_system(model_instance_id):
instance = Model.objects.get(id=model_instance_id)
requests.post('https://api.com/ready')
def on_order_update(order):
if instance.status == 'ready':
update_external_system.delay(order.id)
celery tasks
how to handle failure ?
- acks_late = True
- task has to be indempodent
- autoretry_for decorator
- set retry_backoff = True
- retry_backoff_max = N seconds
- retry jitter = True
- use RequestException with the requests library
- monitor state in db
celery tasks
how to handle failure ?
There is always the possibility that the task will retry a maximum number of times and fail. For those scenarios it is important that the user of the system is informed of that fact. Relying only on logging is not enough due to the fact the the information may be lost among other logs. Consider creating alerts for those cases in order to react accordingly. Another useful technique is to keep track of the status of the operation in the db. There could be periodical tasks that inform users about possible failures.
celery tasks
- they introduce asynchronicity
- they introduce parallelism
- the parameters must be (json) serialiazable
- they can fail
- they can run infinitely
- queues and workers can be blocked
classes (in python)
When to use one ?
classes (in python)
#Don't
class OrderUpdateManager():
def __init__(self, order):
self.order
def update(self):
pass
order_update_manager = OrderUpdateManager(order)
order_update_manager.update()
#Do
def update_order(order):
pass
When the need to preserve state between function calls is not obvious, refrain from creating classes. If you need namespacing use a separate module.
type hinting
def select_courier_company(dispatch_order, logistic_company):
"""Return a courier company given a dispatch order and a logistic company
based on the distribution rules"""
pass
vs
def select_courier_company(
dispatch_order: DispatchOrder,
logistic_company: str) -> Optional[Courier]:
"""Return a courier company given a dispatch order and a logistic company
based on the distribution rules"""
pass
orders_by_id = {o.id: o for o in Order.objects.filter(id__in=order_ids)}
vs
orders_by_id: Dict[int, Order] = {o.id: o for o in Order.objects.filter(id__in=order_ids)}
- for type hinting inside your ide
- for self documentation
- for code clarity
- for static checking using mypy
type hinting
def select_courier_company(dispatch_order, logistic_company):
"""Return a courier company given a dispatch order and a logistic company
based on the distribution rules"""
pass
vs
def select_courier_company(
dispatch_order: DispatchOrder,
logistic_company: str) -> Optional[Courier]:
"""Return a courier company given a dispatch order and a logistic company
based on the distribution rules"""
pass
orders_by_id = {o.id: o for o in Order.objects.filter(id__in=order_ids)}
vs
orders_by_id: Dict[int, Order] = {o.id: o for o in Order.objects.filter(id__in=order_ids)}
- for type hinting inside your ide
- for self documentation
- for code clarity
- for static checking using mypy
transactions
when to use one ?
transactions
when to use one ?
Transactions are not meant for avoiding race conditions (at least not in the default read commited isolation mode) and they don't prevent deadlocks.
Use transactions to ensure data integrity.
A rule of thumb is when you have more than one insert/update inside your request/view
transactions
when to use one ?
#don't
def order_view():
Order.objects.update()
OrderHistory.objects.create()
#do
@transaction.atomic
def order_view():
Order.objects.update()
OrderHistory.objects.create()
transactions
Use on_commit when calling celery tasks inside transactions
@receiver(post_save, sender=models.Order)
def new_order_callback(sender, instance, created, **kwargs):
""" Automatically triggers processing of a new Order. """
if created:
transaction.on_commit(lambda:
tasks.process_new_order.delay(instance.pk))
race conditions
they are there even if you don't think so:
- simultaneous requests
- celery tasks
#don't
def my_view():
account = Account.objects.get()
account.amount -= 100
account.save()
#do (using db locking)
def my_view():
with transaction.atomic():
account = Account.objects.select_for_update().get(pk=1)
account.amount -= 100
account.save()
#don't
def my_view(request):
user = request.user
user.visit_count += 1
user.save()
#do (using F expressions)
def my_view(request):
user = request.user
user.visit_count = F('count') + 1
user.save()
race conditions
less obvious example
# susceptible to race condition
def my_view():
transaction = Transaction.objects.get(pk=1)
if transaction.status = 'sent':
send_email(transaction)
race conditions
less obvious example
Foo.objects.get_or_create(defaults__exact='bar', defaults={'defaults': 'baz'})
This method is atomic assuming correct usage, correct database configuration, and correct behavior of the underlying database. However, if uniqueness is not enforced at the database level for the kwargs used in a get_or_createcall (see unique or unique_together), this method is prone to a race-condition which can result in multiple rows with the same parameters being inserted simultaneously.
race conditions
optimistic locking
def update_objects(self):
"""Update all awaiting objects in COD database"""
query = UpdateInCOD.objects.filter(content_type=self.content_type, status=UpdateInCOD.WAITING).order_by('-id')
updated_objects_ids = []
for object_to_update in query.iterator():
try:
self.update_object_in_cod(object_to_update.object_id)
status = UpdateInCOD.UPDATED
except (MySQLdb.DataError, MySQLdb.IntegrityError, MySQLdb.Warning):
status = UpdateInCOD.FAILED
UpdateInCOD.objects.filter(
content_type=self.content_type,
id=object_to_update.id,
version=object_to_update.version
).update(status=status)
updated_objects_ids.append(object_to_update.object_id)
@receiver(post_save, sender=Parcel)
def importer_parcel_cod_update(sender, instance, **kwargs): # pylint: disable=unused-argument
content_type = ContentType.objects.get_for_model(Parcel)
parcel_to_update, _ = UpdateInCOD.objects.get_or_create(content_type=content_type, object_id=instance.id)
parcel_to_update.version = F('version') + 1
parcel_to_update.status = UpdateInCOD.WAITING
parcel_to_update.save()
optimization
premature optimization is the rule of all evil
true but don't exaggerate
optimization
class CustomerSerializer(serializers.ModelSerializer):
orders = OrderSerializer(many=True, read_only=True)
class CustomerViewSet(ViewSet):
serializer_class = CustomerSerializer
class CustomerViewSet(ViewSet):
serializer_class = CustomerSerializer
def get_queryset(self):
# avoid N+1 selects
return = Customers.objects.all().prefetch_related('orders')
optimization
def create_order(request):
products = request.products
Purchase.objects.create()
for product in purchase.product:
PurchaseProduct.objects.create(purchase=purchase, product=product)
def create_order(request):
products = request.products
Purchase.objects.create()
PurchaseProduct.bulk_create([PurchaseProduct(purchase=purchase, product=product) for product in products])
optimization
def my_view(request):
for order in Orders.objects.filter():
customer = order.customer
products = order.products
delivery = Delivery.objects.get(delivery_id=order.delivery_id)
def create_order(request):
deliveries_by_delivery_id = Delivery.objects.filter().in_bulk(['delivery_id'])
for order in Orders.objects.filter().prefetch_related('customer').select_related('products'):
customer = order.customer
products = order.products
delivery = deliveries_by_delivery_id[order.delivery_id]
namedtuple
HistoricalChange = namedtuple('HistoricalChange',
['field', 'date', 'user', 'old_value', 'new_value']
)
class HistoryModelMixin(object): # pylint: disable=too-few-public-methods
def _get_changes_between_histories(self, old, new, fields=None):
"""
Returns a list of HistoricalChange based on the difference between two historical model instances
fields: The list of fields to compare - all fields if not passed
"""
changes = []
for field in self._meta.fields:
if fields is not None and field.name not in fields:
continue
old_value = getattr(old, field.name, '')
new_value = getattr(new, field.name, '')
if old_value != new_value:
change = HistoricalChange(
field=field.name,
date=new.history_date,
user=new.history_user,
old_value=old_value,
new_value=new_value
)
changes.append(change)
return changes
data class
@dataclass
class InventoryItem:
'''Class for keeping track of an item in inventory.'''
name: str
unit_price: float
quantity_on_hand: int = 0
def total_cost(self) -> float:
return self.unit_price * self.quantity_on_hand
@dataclass
class Point:
x: int
y: int
p = Point(10, 20)
assert asdict(p) == {'x': 10, 'y': 20}
ABC
class BaseManager():
def send():
raise NotImplementedError
from abc import ABC, abstractmethod
class BaseManager(ABC):
@abstractmethod
def send():
pass
Thank You
deck
By zqzak
deck
- 274