Data Science Summit 2020
n+1
window
aggregates
annotate
constraints
function
postgres
admin
json
fulltext
queryset
explain
lookups
index
Expressions
Q
F
Arrays
range
explain/analyze
django-debug-tool
managers
search
Product.objects.filter(name__startswith="A")
SQL Representation
Explain
# Get a list of categories
Category.objects.all()
# Get an iterator of categories
Category.objects.iterator()
# Get product with categories filled in
Product.objects.select_related("category").all()
# Get all dates stocks were in
ProductItem.objects.dates("stock_in_date", "day")
# Find all products that end with "phone"
Product.objects.filter(name__endswith="phone")
# Find all product items
ProductItems.objects.count()
Product.objects\
.select_related("category")\
.values("category__name")\
.annotate(count=Count("pk")
def purchases_by_month():
return ProductItem.objects\
.annotate(month=ExtractMonth("stock_in_date"))\
.values("month")\
.annotate(sales=Avg("product__price"))
def total_sale():
return ProductItem.objects\
.select_related("product")\
.aggregate(total=Sum('product__price'))
Category.objects.annotate(
total=Subquery(
Product.objects.filter(
category=OuterRef("pk")).values("category").annotate(
total=Count("pk")).values("total")),
product_names=Subquery(
Product.objects.filter(
category=OuterRef("pk")).values("category").annotate(
names=StringAgg("name", "||")).values("names")),
average_price=Subquery(
Product.objects.filter(
category=OuterRef("pk")).values("category").annotate(
avg_price=Avg("price")).values("avg_price")))
def average_price():
return Product.objects.annotate(
average_by_category=Window(
expression=Avg('price'),
partition_by=[F('category')],
order_by=F('category').asc()
)
).values(
'name', 'category__name', 'average_by_category')
class Message(models.Model):
when = models.DateTimeField(auto_now=True, db_index=True)
sender = models.CharField(max_length=127)
recipient = models.CharField(max_length=127)
objects = MessageManager()
def __str__(self):
return f'[{self.when.ctime()}] {self.sender} -> {self.recipient}'
Find last messages between two individuals
class MessageManager(models.Manager):
def latest_distinct(self):
return super().get_queryset().annotate(
user_1=Case(
When(sender__gt=F('recipient'), then=F('recipient')),
default=F('sender')
),
user_2=Case(
When(sender__gt=F('recipient'), then=F('sender')),
default=F('recipient')
)
).order_by('user_1', 'user_2', '-when').distinct('user_1', 'user_2')
WHY?
from django.db import models
from django.contrib.postgres.fields import CITextField, CICharField, JSONField
from phonenumber_field.modelfields import PhoneNumberField
from common.models import TimeStampedModel
class Customer(TimeStampedModel):
name = models.CharField(max_length=127)
email = models.EmailField(unique=True)
phone_number = PhoneNumberField(unique=True)
address = CITextField(blank=True)
bio = CICharField(blank=True, max_length=31)
preferences = JSONField(null=True)
def __str__(self):
return self.name
Customer.objects.filter(preferences__send_email=True)
Customer.objects.filter(preferences__languages__contains="python")
Customer.objects.filter(bio__contains="pYThOn")
This one will be fun!!!
class Event(TimeStampedModel):
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
time_range = DateTimeRangeField(db_index=True, default=default_time_range)
remarks = models.TextField(blank=True)
objects = EventManager()
def __str__(self):
return f'{self.user.username}: ({format_time_range(self.time_range)})'
class Meta:
ordering = '-time_range',
# Uncomment in the event of Python 3.0!!!
# constraints = [
# ExclusionConstraint(
# name='exclude_overlapping_reservations',
# expressions=[
# ('time_range', RangeOperators.OVERLAPS),
# ('user', RangeOperators.EQUAL),
# ]
# ),
# ]
def create_event(user, start_time=None, end_time=None) -> Event:
return Event.objects.create(
user=user, time_range=DateTimeTZRange(lower=start_time or now(), upper=end_time))
def update_event(pk, start_time, end_time) -> Event:
return Event.objects.get(id=pk).update(
time_range=DateTimeTZRange(lower=start_time, upper=end_time))
class EventManager(models.Manager):
def get_queryset(self):
return super().get_queryset().annotate(
start_time=Lower('time_range'),
end_time=Upper('time_range'),
is_current=Case(
When(
end_time__isnull=True,
then=True),
default=False,
output_field=BooleanField()),
duration=Case(
When(is_current=True,
then=ExpressionWrapper(Now() - F('start_time'),
output_field=DurationField())),
default=ExpressionWrapper(F('end_time') - F('start_time'),
output_field=DurationField()))
)
def find_in_category_or_product(query):
return Product.objects.select_related('category').annotate(
category_name=F('category__name'),
).annotate(
search=SearchVector('name') + SearchVector('category_name')
).filter(search=query)
# FIND Pixel Phones that does not have TAB in it
services.find_in_category_or_product(
SearchQuery('pixel') & ~SearchQuery('tab'))
# FIND ANYTHING WHERE 'phone' IS IN 'name' or 'category name'
services.find_in_category_or_product('phone')
def fuzzy_name_finder(term, model=Product):
return model.objects.annotate(
similarity=TrigramSimilarity('name', term)
).filter(similarity__gt=0.1).order_by('-similarity')
# SHOULD GIVE YOU PIXEL
services.fuzzy_name_finder("pijsel")