Advanced Django queries optimization

Bulgaria

Capital city: Sofia

Population: ~7 000 000

Who am I ?

Ivo Donchev

  • 23 years (today)
  • Student in Sofia University
  • Django and React Web Developer - since 2016
  • Vim user

HackSoft Academy

Let's talk about the Django ORM

My aim during this talk:

#3 types of problems

2 assumptions:

  • ORM is an abstraction over the DB interaction  
  • Python is not the new SQL

#1 Problem

Too much SQL queries

.prefetch_related()

.select_related()

&

class SharedAccount(Model):
    pass



class User(Model):
    shared_account = ForeignKey(
        SharedAccount,
        related_name='users'
    )
SELECT *
    FROM "user"

ORM expression

User.objects.all()

SQL query

SELECT *
    FROM "user"
    INNER JOIN "shared_account"
        ON ("user"."shared_account_id" = "shared_account"."id")

ORM expression

User.objects.select_related('shared_account')

SQL query

class SharedAccount(Model):
    pass



class User(Model):
    shared_account = ForeignKey(
        SharedAccount,
        related_name='users'
    )
SELECT *
    FROM "shared_account"

ORM expression

SharedAccount.objects.all()

SQL query

SELECT *
    FROM "shared_account"

ORM expression

SharedAccount.objects.prefetch_related('users')

SQL query

SELECT *
    FROM "users"
    WHERE "user"."shared_account_id" IN (...);

wait for it....

Tip 1:

you can use

.select_related() and .prefetch_related()

for simple JOIN-s

#2 Problem

Too much data

class User(Model):
    name = CharField(max_length=255)


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')

    name = CharField(max_length=255)


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')

    title = CharField(max_length=255)
    length = PositiveIntegerField()

Music Service Web Application

class User(Model):
    name = CharField(max_length=255)


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        total_length = 0
        
        for song in self.songs.all():
            total_length += song.length
   
        return total_length


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')

    title = CharField(max_length=255)
    length = PositiveIntegerField()
class User(Model):
    name = CharField(max_length=255)


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')

    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        return sum([song.length for song in self.songs.all()])


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')

    title = CharField(max_length=255)
    length = PositiveIntegerField()
class User(Model):
    name = CharField(max_length=255)

    @property
    def playlists_total_length(self):
        return sum([p.songs_total_length for p in self.playlists.all()])


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')

    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        return sum([song.length for song in self.songs.all()])


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')

    title = CharField(max_length=255)
    length = PositiveIntegerField()

Task: Get all users' total length of their playlists

  • 10 Users
  • 10 Playlists per user
  • 10 songs per playlist

Task: Get all users' total length of their playlists

Total objects to fetch = 10 (Users) +\

                                          10 * 10 (Playlists) +\

                                          10 * 10 * 10 (Songs)

1110 DB rows in 3 different tables

for just 10 users

GROUP BY with aggregation(SUM)

class User(Model):
    name = CharField(max_length=255)

    @property
    def playlists_total_length(self):
        return sum([p.songs_total_length for p in self.playlists.all()])


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')

    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        return sum([song.length for song in self.songs.all()])


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')

    title = CharField(max_length=255)
    length = PositiveIntegerField()

Playlists with total songs length 

SELECT "playlist"."id",
       "playlist"."user_id",
       "playlist"."name",
       (
           SELECT SUM("song"."length")
               FROM "song"
               WHERE "song"."playlist_id" = ("playlist"."id")
               GROUP BY "song"."playlist_id" LIMIT 1
        ) AS "songs_total_length"
    FROM "playlist";
class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')

    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        return <sum aggregation over songs grouped by the playlist ???>

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        return Subquery(
            query=...,
            output_field=...
        )

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        return Subquery(
            queryset=...,
            output_field=models.IntegerField()
        )

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        queryset = Song.objects \
            .values('playlist__id') \  # This is the GROUP BY field
            .  # ¯\_(ツ)_/¯

        return Subquery(
            queryset=queryset,
            output_field=models.IntegerField()
        )

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        queryset = Song.objects \
            .values('playlist__id') \  # This is the GROUP BY field
            .filter(playlist__id=OuterRef('id')) \
            .  # ¯\_(ツ)_/¯

        return Subquery(
            queryset=queryset,
            output_field=models.IntegerField()
        )

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
class PlaylistQuerySet(QuerySet):
    @classmethod
    def songs_total_length(cls):
        queryset = Song.objects \
            .values('playlist__id') \  # This is the GROUP BY field
            .filter(playlist__id=OuterRef('id')) \
            .values_list(Sum('length'))  # The SUM aggregation

        return Subquery(
            queryset=queryset,
            output_field=models.IntegerField()
        )

    def collect(self):
        return self.annotate(_songs_total_length=self.songs_total_length())


class Playlist(Model):
    objects = PlaylistQuerySet.as_manager()
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        if hasattr(self, 'songs_total_length'):
            return self._songs_total_length

        return sum([song.length for song in self.songs.all()])
Playlist.objects.collect()

SELECT "playlist"."id",
       "playlist"."user_id",
       "playlist"."name",
       (
           SELECT SUM("song"."length")
               FROM "song"
               WHERE "song"."playlist_id" = ("playlist"."id")
               GROUP BY "song"."playlist_id" LIMIT 1
        ) AS "songs_total_length"
    FROM "playlist";

...is the Django ORM equivalent for

class PlaylistQuerySet(QuerySet):
    @classmethod
    def playlists_total_length(cls):
        pass

    def collect(self):
        return self.annotate(_playlists_total_length=self.playlists_total_length())


class User(Model):
    objects = UsersQuerySet.as_manager()

    name = CharField(max_length=255)

    @property
    def playlists_total_length(self):
        if hasattr(self, '_playlists_total_length'):
            return self._playlists_total_length

        playlists_length = [
            playlist.songs_total_length
            for playlist in self.playlists.all()
        ]

        return sum(playlist_length)
class UserQuerySet(QuerySet):
    @classmethod
    def playlists_total_length(cls):
        playlist_annotation = PlaylistQueryset.songs_total_length()

        queryset = Playlist.objects \
            .values('user__id') \
            .filter(user__id=OuterRef('id')) \
            .values_list(Sum(playlist_annotation))

        return Subquery(
            queryset=queryset,
            output_field=IntegerField()
        )

    def collect(self):
        return self.annotate(_playlists_total_length=self.playlists_total_length())


class User(Model):
    objects = UserQuerySet.as_manager()

    name = CharField(max_length=255)

    @property
    def playlists_total_length(self):
        if hasattr(self, '_playlists_total_length'):
            return self._playlists_total_length

        return sum([p.songs_total_length for p in self.playlists.all()])
SELECT "user"."id",
       "user"."name",
       (SELECT SUM((
            SELECT SUM(U0."length")
                FROM "song" U0
                WHERE U0."playlist_id" = (V0."id")
                GROUP BY U0."playlist_id"
                LIMIT 1
       ))
           FROM "playlist" V0
           WHERE V0."user_id" = ("user"."id")
           GROUP BY V0."user_id") AS "_playlists_total_length"
FROM "user"

Tip 2:

Subquery + OuterRef for GROUP BY

#3 Problem

Too much queries

+

Too much data

class User(Model):
    name = CharField(max_length=255)

    @property
    def playlists_total_length(self):
        return sum([p.songs_total_length for p in self.playlists.all()])


class Playlist(Model):
    user = ForeignKey(User, related_name='playlists')
    name = CharField(max_length=255)

    @property
    def songs_total_length(self):
        return sum([song.real_length for song in self.songs.all()])


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')
    title = CharField(max_length=255)
    length = PositiveIntegerField()
    
    def real_length(self):
        return self.length * 0.8
class SongQuerySet(QuerySet):
    @classmethod
    def real_length(cls):
        return ExpressionWrapper(
            expression=...,
            output_field=IntegerField()
        )

    def collect(self):
        return self.annotate(_real_length=self.real_length())


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')
    title = CharField(max_length=255)
    length = PositiveIntegerField()
    
    def real_length(self):
        if hasattr(self, '_real_length'):
            return self._real_length

        return self.length * 0.8
class SongQuerySet(QuerySet):
    @classmethod
    def real_length(cls):
        real_length = F('length') * Value(0.8)

        return ExpressionWrapper(
            expression=real_length,
            output_field=IntegerField()
        )

    def collect(self):
        return self.annotate(_real_length=self.real_length())


class Song(Model):
    playlist = ForeignKey(Playlist, related_name='songs')
    title = CharField(max_length=255)
    length = PositiveIntegerField()
    
    def real_length(self):
        if hasattr(self, '_real_length'):
            return self._real_length

        return self.length * 0.8

Modify the tests

# test_models.py

class SongTests(TestCase):
    def test_song_length(self):
        length = 120
        real_length = 0.8 * length
        song = Song.objects.create(title='Europython', length=120)
        
        self.assertEqual(real_length, song.real_length)

Modify the tests

# test_models.py

class SongTests(TestCase):
    def test_song_length(self):
        length = 120
        real_length = 0.8 * length
        song = Song.objects.create(title='Europython', length=120)
        
        self.assertEqual(real_length, song.real_length)

        song_collected = Song.objects.collect().get(id=song.id)

        self.assertEqual(real_length, song_collected.real_length)

Thank you!!!

  •  https://github.com/Ivo-Donchev
  • https://twitter.com/DonchevIvaylo

Django queries optimization

By Ivaylo Donchev

Django queries optimization

  • 1,548