Пишем приложения на SQLAlchemy

Денис Катаев

Tinkoff.ru

In the previous episode

SQLAlchemy:

raw SQL vs Python

  • Введение в sqlalchemy
  • Разницу между core & orm
  • Как написать любой сложный запрос
  • Объектно-реляционный разрыв

Объектно-реляционное несовпадение потенциалов

OOP

SQL

SQLAlchemy

Orator

Active record ORM

from orator import Model
from orator.orm import has_many, belongs_to


class Dialog(Model):
    @has_many
    def messages(self):
        return Message


class Message(Model):
    @belongs_to
    def dialog(self):
        return Dialog
message = Message()
message.text

Traceback (most recent call last):
...
AttributeError: 'Message' object has no attribute 'text'

Table fields

None is not None

message = Message()

...

if message.dialog is not None:
    print(message.dialog.user_id)

Traceback (most recent call last):
    print(messsage.dialog.user_id)
AttributeError: 'NoneType' object has no attribute 'user_id'
message = Message()

...

if message.dialog is not None:
    print(message.dialog.user_id)

Traceback (most recent call last):
    print(messsage.dialog.user_id)
AttributeError: 'NoneType' object has no attribute 'user_id'

type(message.dialog)
'orator.orm.relations.wrapper.Wrapper'
def fresh_timestamp(self):
    """
    Get a fresh timestamp for the model.

    :return: pendulum.Pendulum
    """
    return pendulum.utcnow()

Magic created_at

m1 = Message.create(text='test')

...

m2 = Message.first_or_create(text='hello')



# django
m2, created = Message.objects.get_or_create(...)
if created:
    print('created')

Created or not?

ORM это не .save()

Sample project

База для чат бота

Упрощенная

pycon_russia_2k19_sqlalchemy/
├── __init__.py
├── __main__.py
├── db
│   ├── __init__.py
│   ├── base.py
│   ├── dialog.py
│   └── message.py
└── README.txt

Подопытный проект

# ./db/base.py
import os

from sqlalchemy import MetaData, create_engine
from sqlalchemy.ext.declarative import as_declarative

engine = create_engine(os.environ['DB_URL'])
metadata = MetaData(bind=engine)


@as_declarative(metadata=metadata)
class Base:
    pass
# ./db/dialog.py
import sqlalchemy as sa

from .base import Base

class Dialog(Base):
    __tablename__ = 'dialog'

    id = sa.Column('dialog_id', sa.Integer, primary_key=True)

    created_at = sa.Column(
        sa.DateTime(), nullable=False, 
        server_default=sa.func.now()
    )
    user_id = sa.Column(sa.String, nullable=False, index=True)

    def __init__(self, user_id):
        self.user_id = user_id
# ./db/message.py
import sqlalchemy as sa
from sqlalchemy import orm as so

from .base import Base
from .dialog import Dialog

class Message(Base):
    __tablename__ = "message"

    id = sa.Column('message_id', sa.Integer, primary_key=True)
    dialog_id = sa.Column(
        Dialog.id.type, sa.ForeignKey(Dialog.id), nullable=False
    )
    client_message_id = sa.Column(
        sa.Integer, sa.ForeignKey(id), nullable=True,
    )
    text = sa.Column(sa.Text, nullable=False)
    meta = sa.Column(sa.JSON, nullable=False, default={})
# ./db/message.py
import sqlalchemy as sa
from sqlalchemy import orm as so

from .base import Base
from .dialog import Dialog

class Message(Base):
    ...
    client_message = so.relationship(
        lambda: Message, backref=so.backref("bot_messages"),
        remote_side=[id],

    )
    dialog = so.relationship(
        Dialog, lazy="joined", backref=so.backref("client_messages")
    )
    def __init__(self, dialog, text):
        self.dialog = dialog
        self.text = text
# ./db/__init__.py
from .dialog import Dialog
from .message import Message


# ./other_code.py

# use this in other code
#    ↓↓↓
from .db import Dialog, Message



# instead of this
#    ↓↓↓↓↓↓↓↓↓↓
from .db.dialog import Dialog # don't use it
from .db.message import Message # avoid this line

Session

# ./db/base.py
from sqlalchemy.orm import sessionmaker

# no bind, pass bind into engine ONLY
Session = sessionmaker()

Session - unit of work

# ./db/base.py
from contextlib import contextmanager
import typing

@contextmanager
def session(**kwargs) -> typing.ContextManager[Session]:
    """Provide a transactional scope"""
    new_session = Session(**kwargs)
    try:
        yield new_session
        new_session.commit()
    except Exception:
        new_session.rollback()
        raise
    finally:
        new_session.close()

Session

# some_code.py
from .db import session, Dialog, ClientMessage

dialog = Dialog('some_user')
message = ClientMessage(dialog, 'Привет Олег')

with session() as s:
    s.add(dialog)
    s.add(message)
with session() as s:
    some_business_logic(s)
    other_function(s)
    ...
    stop_plz_god_no(s)

def stop_plz_god_no(session):
    ...
    kill_me_plz(s)
    ...

def kill_me_plz(s)
    ...

Не всегда удобно

Contextual Session

Thread-local session

# ./db/base.py
from sqlalchemy.orm import sessionmaker, scoped_session

Session = sessionmaker()
current_session = scoped_session(Session)

...



# ./db/__init__.py
from .base import session, current_session
from .dialog import Dialog
from .message import ClientMessage, BotMessage


New!

from threading import local

class SessionRegistry(local):
    session = None
registry = SessionRegistry()

class Middleware:
    def on_request_start(self, request):
        registry.session = Session()

    def on_request_error(self, error):
        registry.session.close()
        registry.session = None

    def on_response(self, response):
        registry.session.commit()
        registry.session.close()
        registry.session = None
from .db import current_session, Dialog, ClientMessage


def some_handler():
    dialog = Dialog('some_user_id')
    message = ClientMessage(dialog, 'Привет Олег')

    current_session.add(dialog)
    current_session.add(message)

    assert message.text

Пример

State management

  1. Transient
  2. Pending
  3. Persistent
  4. Deleted
  5. Detached

Expire on commit

# some_code.py
from .db import session, Dialog, ClientMessage

dialog = Dialog('some_user')
message = ClientMessage(dialog, 'Привет Олег')

with session() as s:
    s.add(dialog)
    s.add(message)

message.text 
Traceback (most recent call last):
sqlalchemy.orm.exc.DetachedInstanceError: Instance <ClientMessage at 0x10c6d48d0> is not bound to a Session
# Transient
dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)  # Pending
    # Persistent
# Detached
@contextmanager
def session():
    new_session = Session()
    try:
        yield new_session
        new_session.commit()
    except Exception:
        new_session.rollback()
        raise
    finally:
        new_session.close()





BEGIN
INSERT INTO dialogs ...
COMMIT
Detach
Expire

State management

State management

dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)





dialog.id  # ???
dialog.user_id  # ???
BEGIN
INSERT INTO dialogs 
    VALUES (...)
COMMIT
dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)











dialog.id  # ???
dialog.user_id  # ???
BEGIN
INSERT INTO dialogs 
    VALUES (...)
COMMIT
BEGIN
UPDATE dialogs 
    SET user_id = 'pycon'
COMMIT
dialog = Dialog('some_user_id')

with session(expire_on_commit=False) as s:
    s.add(dialog)

dialog.id  # 1
dialog.user_id  # 'some_user_id'
  • NOT expired
  • Detached
  • And very dangerous

Tests only!

s.merge(dialog)?

>>> from sqlalchemy import inspect

>>> insp = inspect(dialog)
>>> insp.persistent
True

>>> dialog.__dict__
{
    '_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x107ada7b8>, 
    'user_id': 'some_user_id'
}
# Transient
dialog = Dialog('some_user_id')

with session() as s:
    # Pending
    s.add(dialog)  
    # Persistent
# Detached

with session() as s:
    # Persistent
    dialog = s.query(Dialog).first()

    # Deleted
    s.delete(dialog) 
# Detached

?

__init__

from sqlalchemy import orm as so

class Dialog(Base):
    __tablename__ = "dialog"

    created_at = sa.Column(
        sa.DateTime(), nullable=False, server_default=sa.func.now()
    )

    id = sa.Column(sa.Integer, primary_key=True)
    user_id = sa.Column(sa.String, nullable=False, index=True)

    def __init__(self, user_id):
        """Call only in your code"""
        self.user_id = user_id

    @so.reconstructor
    def init_on_load(self):
        """Call on load from db"""
        self.some_instance_only_attr = []

Reconstructor hook

s.add() vs .save()

Just insert a row

with session() as s:
    dialog = Dialog('some_user_id')
    s.add(dialog)

    first = s.query(Dialog).first()




INSERT INTO dialog 
    VALUES (...);
SELECT * FROM dialog
    LIMIT 1;
COMMIT;

Autoflush

Manual flush

with session() as s:
    dialog = Dialog('some_user_id')
    s.add(dialog)
    s.flush()
    print(dialog.id)

    first = s.query(Dialog).first()



INSERT INTO dialog 
    VALUES (...);

SELECT * FROM dialog
    LIMIT 1;
COMMIT;

No autoflush

with session() as s:
    dialog = Dialog('some_user_id')
    s.add(dialog)
    s.flush()
    print(dialog.id)


    first = s.query(Dialog).first()


    with s.no_autoflush:
        s.add(some)
        s.query(...)



INSERT INTO dialog 
    VALUES (...);


SELECT * FROM dialog
    LIMIT 1;



SELECT * FROM ...
INSERT INTO ...
COMMIT;

Server generated values

dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)
    s.flush()
    print(dialog.id) # 1




INSERT INTO dialog 
    VALUES (...)

?

Откуда знаем какой id?

Server generated values

dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)
    s.flush()
    print(dialog.id) # 1




INSERT INTO dialog 
    VALUES (...)
    RETURNING (id)

?

Откуда знаем какой id?

Отсюда

Server generated values

class Dialog(Base):
    __tablename__ = "dialog"

    created_at = sa.Column(
        sa.DateTime(), nullable=False, 
        server_default=sa.func.now()
    )
    ...

dialog = Dialog('some_user_id')

with session() as s:
    s.add(dialog)
    s.flush()
    assert dialog.created_at

INSERT INTO dialog VALUES (...)
    RETUNING id;

SELECT created_at FROM dialog 
    WHERE id = %s;

Eager defaults

class Dialog(Base):
    __tablename__ = "dialog"

    id = sa.Column('dialog_id', sa.Integer, primary_key=True)

    created_at = sa.Column(
        sa.DateTime(), nullable=False, server_default=sa.func.now()
    )
    user_id = sa.Column(sa.String, nullable=False, index=True)

    __mapper_args__ = {"eager_defaults": True}

    def __init__(self, user_id):
        self.user_id = user_id
INSERT INTO dialog VALUES (...) RETUNING id, created_at;

Просто добавь строку?

Query

# context manager session
with session() as s:
    dialogs = s.query(Dialog).all()


# contextual session
dialogs = current_session.query(Dialog).all()

Get object from base

# ./db/base.py
...
Session = sessionmaker()
current_session = scoped_session(Session)

class Base(object):
    query = current_session.query_property()


# ./views.py

def some_handler():
    dialogs = Dialog.query.all()

Query property

def some_handler1():
    dialogs = Dialog.query.all()


def some_handler2():
    dialogs = current_session.query(Dialog).all()

def some_handler3():
    with session() as s:
        dialogs = s.query(Dialog).all()

def some_handler4():
    with session() as s:
        for (d, m) in s.query(Dialog, ClientMessage).all():
            print(d, m)

Query property

What is query?

>>> with session() as s:
...     query = s.query(Dialog)
...     type(query)
<class 'sqlalchemy.orm.query.Query'>
...     query.get(1)
<Dialog ...>

Chaining

>>> s.query(Dialog).limit(10).count()
5

>>> s.query(Dialog).filter_by(user_id='hello').all()
[]

Filtering

>>> s.query(Dialog).filter_by(user_id='some_value')
<Query ...>


>>> s.query(Dialog).filter(Dialog.user_id == 'some_value')
<Query ...>


>>> condition = Dialog.user_id == 'some_value'
dialog.user_id = :user_id_1
>>> type(condition)
<class 'sqlalchemy.sql.elements.BinaryExpression'>

>>> condition &= Dialog.id == ClientMessage.dialog_id

Find last message of dialog

Last message

# ./db/message.py
class MessageQuery(so.Query):
    def last_message(self, dialog):
        max_id = self.session.query(
            sa.func.max(Message.id)
        ).filter(Message.dialog == dialog).as_scalar()

        return self.filter(Message.id == max_id).one()

def query_example():
    data = Dialog('id'), Message(d, '1'), Message(d, '2')

    with session() as s:
        s.add_all(data)
        assert s.query(Message).last_message(d) == data[-1]

Search query class

def _get_query_cls(mapper, session):
    if mapper:
        m = mapper
        if isinstance(m, tuple):
            m = mapper[0]
        if isinstance(m, Mapper):
            m = m.entity

        try:
            return m.__query_cls__(mapper, session)
        except AttributeError:
            pass

    return Query(mapper, session)

Session = sessionmaker(query_cls=_get_query_cls)

Custom query class

s.query(Message).some_method()




Message.__query_cls__(Message, session=s).some_method()

Расширяйте Query

Скажите нет import sqlalchemy вне модуля db

Relationship

loading = (
    "select",
    "joined",
    "subquery",
    "selectin",
    "noload",
)

class Message(Base):
    ...

    dialog = so.relationship(Dialog, lazy=loading[???])

Loading techniques

Ответ на вопрос:

При загрузке Message когда грузить Dialog?

Lazy

Не грузить вообще

По требованию

Сразу

Как грузить?

JOIN

SELECT IN

SELECT JOIN

SELECT * FROM ...

RAISE

NOLOAD

SELECT d.*, m.* FROM ... JOIN ...

SELECT * FROM d ...

SELECT * FROM m WHERE m.id in (...)

SELECT * FROM d ...

SELECT * FROM m JOIN d ...

N+1 problem

# ONE query
messages = current_session.query(Message).all() 


for m in messages:
    # query per N messages
    print(m.dialog)
# ONE query
messages = current_session.query(Message)\
    .options(joinedload(Message.dialog)).all()

Smart relationship with custom query

with session() as s:
    d = s.query(Dialog).get(...)
    last_message = s.query(Message).last_message(d)




with session() as s:
    d = s.query(Dialog).get(...)
    d.last_message
class Dialog(Base):
  ...

  @declared_attr
  def last_message(cls):
      def condition():
        from sqlalchemy.orm import foreign, remote
        from .message import Message

        message = sa.alias(Message, 'msg')
        subquery = sa.select(
            [sa.func.max(remote(message.c.message_id))],
            whereclause=remote(message.c.dialog_id) == foreign(Dialog.id)
        )
        dialog_condition = remote(Message.dialog_id) == foreign(Dialog.id)
        message_condition = remote(Message.id) == subquery
        return dialog_condition & message_condition

    return so.relationship('Message', primaryjoin=condition, 
                            uselist=False, viewonly=True)

Smart relationship with custom query without N+1

with session() as s:
    dialogs = s.query(Dialog).options(
        joinedload(Dialog.last_message)
    ).all()

    for d in dialogs:
        d.last_message

Dynamic & custom query class

class Message(Base):
    ...

    dialog = so.relationship(
        Dialog, lazy="joined", 
        backref=so.backref("messages", 
                           lazy='dynamic', 
                           query_cls=MessageQuery)
    )


with session() as s:
    dialog = s.query(Dialog).get(...)
    dialog.message.some_cool_method()

Хорошие отношения в каждый проект!

SQLalchemy & asyncio

Threadpool and ok

async def example():
    d = Dialog('some_user_id')
    data = d, Message(d, 'hello'), Message(d, 'again')

    @run_in_threadpool
    def query():
        current_session.add_all(data)
        current_session.flush()

    await query()

    assert d.created_at

current_session with contextvar

from contextvars import ContextVar

_scope = ContextVar('session_scope')


def scopefunc():
    try:
        return _scope.get()
    except LookupError as e:
        raise RuntimeError('Use current_session with request context')

current_session = scoped_session(Session, scopefunc=scopefunc)

Warning of silent block and undefer

async def loop_block():
    @run_in_threadpool
    def query():
        return current_session.query(Dialog).get(...)
    dialog = await query()
    assert dialog.message

Loop block

RaiseLoad & JoinedLoad

async def loop_block():
    @run_in_threadpool
    def query():
        return current_session.query(Dialog).get(...)
    dialog = await query()
    assert dialog.message

async def raise_instead_lock():
    @run_in_threadpool
    def query():
        return current_session.query(Dialog)
            .options(
                joinedload(Dialog.message),
                raiseload('*')
            ).get(...)
    dialog = await query()
    assert dialog.message


Loop block

Final words

Спасибо

kataev

mr_apt

denis.a.kataev@gmail.com

Пишем приложения на SQLAlchemy

By Denis Kataev

Пишем приложения на SQLAlchemy

  • 1,417