Advanced Django queries optimization
Bulgaria
Capital city: Sofia
Population: ~7 000 000
Who am I ?
Ivo Donchev
- 23 years (today)
- Student in Sofia University
- Django and React Web Developer - since 2016
- Vim user
HackSoft Academy
Let's talk about the Django ORM
My aim during this talk:
#3 types of problems
2 assumptions:
- ORM is an abstraction over the DB interaction
- Python is not the new SQL
#1 Problem
Too much SQL queries
.prefetch_related()
.select_related()
&
class SharedAccount(Model):
pass
class User(Model):
shared_account = ForeignKey(
SharedAccount,
related_name='users'
)
SELECT *
FROM "user"
ORM expression
User.objects.all()
SQL query
SELECT *
FROM "user"
INNER JOIN "shared_account"
ON ("user"."shared_account_id" = "shared_account"."id")
ORM expression
User.objects.select_related('shared_account')
SQL query
class SharedAccount(Model):
pass
class User(Model):
shared_account = ForeignKey(
SharedAccount,
related_name='users'
)
SELECT *
FROM "shared_account"
ORM expression
SharedAccount.objects.all()
SQL query
SELECT *
FROM "shared_account"
ORM expression
SharedAccount.objects.prefetch_related('users')
SQL query
SELECT *
FROM "users"
WHERE "user"."shared_account_id" IN (...);
wait for it....
Tip 1:
you can use
.select_related() and .prefetch_related()
for simple JOIN-s
#2 Problem
Too much data
class User(Model):
name = CharField(max_length=255)
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
Music Service Web Application
class User(Model):
name = CharField(max_length=255)
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
total_length = 0
for song in self.songs.all():
total_length += song.length
return total_length
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
class User(Model):
name = CharField(max_length=255)
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
return sum([song.length for song in self.songs.all()])
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
class User(Model):
name = CharField(max_length=255)
@property
def playlists_total_length(self):
return sum([p.songs_total_length for p in self.playlists.all()])
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
return sum([song.length for song in self.songs.all()])
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
Task: Get all users' total length of their playlists
- 10 Users
- 10 Playlists per user
- 10 songs per playlist
Task: Get all users' total length of their playlists
Total objects to fetch = 10 (Users) +\
10 * 10 (Playlists) +\
10 * 10 * 10 (Songs)
1110 DB rows in 3 different tables
for just 10 users
GROUP BY with aggregation(SUM)
class User(Model):
name = CharField(max_length=255)
@property
def playlists_total_length(self):
return sum([p.songs_total_length for p in self.playlists.all()])
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
return sum([song.length for song in self.songs.all()])
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
Playlists with total songs length
SELECT "playlist"."id",
"playlist"."user_id",
"playlist"."name",
(
SELECT SUM("song"."length")
FROM "song"
WHERE "song"."playlist_id" = ("playlist"."id")
GROUP BY "song"."playlist_id" LIMIT 1
) AS "songs_total_length"
FROM "playlist";
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
return <sum aggregation over songs grouped by the playlist ???>
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
return Subquery(
query=...,
output_field=...
)
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
return Subquery(
queryset=...,
output_field=models.IntegerField()
)
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
queryset = Song.objects \
.values('playlist__id') \ # This is the GROUP BY field
. # ¯\_(ツ)_/¯
return Subquery(
queryset=queryset,
output_field=models.IntegerField()
)
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
queryset = Song.objects \
.values('playlist__id') \ # This is the GROUP BY field
.filter(playlist__id=OuterRef('id')) \
. # ¯\_(ツ)_/¯
return Subquery(
queryset=queryset,
output_field=models.IntegerField()
)
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
@classmethod
def songs_total_length(cls):
queryset = Song.objects \
.values('playlist__id') \ # This is the GROUP BY field
.filter(playlist__id=OuterRef('id')) \
.values_list(Sum('length')) # The SUM aggregation
return Subquery(
queryset=queryset,
output_field=models.IntegerField()
)
def collect(self):
return self.annotate(_songs_total_length=self.songs_total_length())
class Playlist(Model):
objects = PlaylistQuerySet.as_manager()
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
if hasattr(self, 'songs_total_length'):
return self._songs_total_length
return sum([song.length for song in self.songs.all()])
Playlist.objects.collect()
SELECT "playlist"."id",
"playlist"."user_id",
"playlist"."name",
(
SELECT SUM("song"."length")
FROM "song"
WHERE "song"."playlist_id" = ("playlist"."id")
GROUP BY "song"."playlist_id" LIMIT 1
) AS "songs_total_length"
FROM "playlist";
...is the Django ORM equivalent for
class PlaylistQuerySet(QuerySet):
@classmethod
def playlists_total_length(cls):
pass
def collect(self):
return self.annotate(_playlists_total_length=self.playlists_total_length())
class User(Model):
objects = UsersQuerySet.as_manager()
name = CharField(max_length=255)
@property
def playlists_total_length(self):
if hasattr(self, '_playlists_total_length'):
return self._playlists_total_length
playlists_length = [
playlist.songs_total_length
for playlist in self.playlists.all()
]
return sum(playlist_length)
class UserQuerySet(QuerySet):
@classmethod
def playlists_total_length(cls):
playlist_annotation = PlaylistQueryset.songs_total_length()
queryset = Playlist.objects \
.values('user__id') \
.filter(user__id=OuterRef('id')) \
.values_list(Sum(playlist_annotation))
return Subquery(
queryset=queryset,
output_field=IntegerField()
)
def collect(self):
return self.annotate(_playlists_total_length=self.playlists_total_length())
class User(Model):
objects = UserQuerySet.as_manager()
name = CharField(max_length=255)
@property
def playlists_total_length(self):
if hasattr(self, '_playlists_total_length'):
return self._playlists_total_length
return sum([p.songs_total_length for p in self.playlists.all()])
SELECT "user"."id",
"user"."name",
(SELECT SUM((
SELECT SUM(U0."length")
FROM "song" U0
WHERE U0."playlist_id" = (V0."id")
GROUP BY U0."playlist_id"
LIMIT 1
))
FROM "playlist" V0
WHERE V0."user_id" = ("user"."id")
GROUP BY V0."user_id") AS "_playlists_total_length"
FROM "user"
Tip 2:
Subquery + OuterRef for GROUP BY
#3 Problem
Too much queries
+
Too much data
class User(Model):
name = CharField(max_length=255)
@property
def playlists_total_length(self):
return sum([p.songs_total_length for p in self.playlists.all()])
class Playlist(Model):
user = ForeignKey(User, related_name='playlists')
name = CharField(max_length=255)
@property
def songs_total_length(self):
return sum([song.real_length for song in self.songs.all()])
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
def real_length(self):
return self.length * 0.8
class SongQuerySet(QuerySet):
@classmethod
def real_length(cls):
return ExpressionWrapper(
expression=...,
output_field=IntegerField()
)
def collect(self):
return self.annotate(_real_length=self.real_length())
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
def real_length(self):
if hasattr(self, '_real_length'):
return self._real_length
return self.length * 0.8
class SongQuerySet(QuerySet):
@classmethod
def real_length(cls):
real_length = F('length') * Value(0.8)
return ExpressionWrapper(
expression=real_length,
output_field=IntegerField()
)
def collect(self):
return self.annotate(_real_length=self.real_length())
class Song(Model):
playlist = ForeignKey(Playlist, related_name='songs')
title = CharField(max_length=255)
length = PositiveIntegerField()
def real_length(self):
if hasattr(self, '_real_length'):
return self._real_length
return self.length * 0.8
Modify the tests
# test_models.py
class SongTests(TestCase):
def test_song_length(self):
length = 120
real_length = 0.8 * length
song = Song.objects.create(title='Europython', length=120)
self.assertEqual(real_length, song.real_length)
Modify the tests
# test_models.py
class SongTests(TestCase):
def test_song_length(self):
length = 120
real_length = 0.8 * length
song = Song.objects.create(title='Europython', length=120)
self.assertEqual(real_length, song.real_length)
song_collected = Song.objects.collect().get(id=song.id)
self.assertEqual(real_length, song_collected.real_length)
Thank you!!!
- https://github.com/Ivo-Donchev
- https://twitter.com/DonchevIvaylo
Django queries optimization
By Ivaylo Donchev
Django queries optimization
- 1,734