Database handling with DuckDB and SQLModel

Relational database

Structure:

  • a collection of tables (i.e. relations)
  • each table has at least one column
  • each row represents an entity of the relation described by the table
  • columns can point to entities in other tables

 

Operations:

  • JOIN tables via corresponding columns
  • SELECT rows by specifying constraints for column values

DuckDB

  • columnar database
  • particularly fast and parallelized
  • server free, single file
  • Python API and compatibility with generic DB layers, in particular SQLModel

SQLModel

Idea:

Interact with DB using Python types and Python logic

from sqlmodel import Field, SQLModel, Session, create_engine, select

class Contact(SQLModel, table=True):
    id: Optional[int] = Field(default=None, primary_key=True)
    email_address: str
    name: Optional[str] = None


engine = create_engine("...")
SQLModel.metadata.create_all(engine)

me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")

with Session(engine) as session:
    session.add(me)
    session.commit()
    
    print(session.exec(select(Contact).where(Contact.name == "Johannes Köster")).first())

SQLModel with DuckDB

Packages:

  • sqlmodel
  • duckdb
  • duckdb-engine

SQLModel with DuckDB

from sqlmodel import Field, SQLModel, Session, create_engine, select

def id_field(table_name: str):
    sequence = sqlalchemy.Sequence(f"{table_name}_id_seq")
    return Field(
        default=None,
        primary_key=True,
        sa_column_args=[sequence],
        sa_column_kwargs={"server_default": sequence.next_value()},
    )

class Contact(SQLModel, table=True):
    id: Optional[int] = id_field("contact")
    email_address: str
    name: Optional[str] = None


engine = create_engine("duckdb:///test.db")
SQLModel.metadata.create_all(engine)

me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")

with Session(engine) as session:
    session.add(me)
    session.commit()
    
    print(session.exec(select(Contact).where(Contact.name == "Johannes Köster")).first())

Adding emails

class Contact(SQLModel, table=True):
    id: Optional[int] = id_field("contact")
    email_address: str
    name: Optional[str] = None
    
    sent_emails: List["Email"] = Relationship(back_populates="sender")


class Email(SQLModel, table=True):
    id: Optional[int] = id_field("email")
    sender_id: int = Field(foreign_key="contact.id")
    subject: str
    body: str
    
    sender: Contact = Relationship(back_populates="sent_emails")
Contact
id
email_address
name
Email
id
sender_id
subject
body

1

n

Adding attachments

class Attachment(SQLModel, table=True):
    id: Optional[int] = id_field("attachment")
    filename: str
    email_id: int = Field(default=None, foreign_key="email.id")

    email: "Email" = Relationship(back_populates="attachments")
 

class Email(SQLModel, table=True):
    id: Optional[int] = id_field("email")
    sender_id: int = Field(foreign_key="contact.id")
    subject: str
    body: str
    
    sender: Contact = Relationship(back_populates="sent_emails")
    attachments: List[Attachment] = Relationship(back_populates="email")
Attachment
id
filename
email_id
Email
id
sender_id
subject
body

1

n

Adding recipients

class EmailReception(SQLModel, table=True):
    email_id: int = Field(foreign_key="email.id", primary_key=True)
    contact_id: int = Field(foreign_key="contact.id", primary_key=True)


    email: "Email" = Relationship(back_populates="recipients")
    contact: "Contact" = Relationship(back_populates="receptions")
 

class Email(SQLModel, table=True):
    id: Optional[int] = id_field("email")
    sender_id: int = Field(foreign_key="contact.id")
    subject: str
    body: str
    
    sender: Contact = Relationship(back_populates="sent_emails")
    attachments: List[Attachment] = Relationship(back_populates="email")
    recipients: List[EmailReception] = Relationship(back_populates="email")
EmailReception
email_id
contact_id
Email
id
sender_id
subject
body

1

n

Contact
id
email_address
name

1

n

Adding recipient kinds

class RecipientKind(Enum):
    to = "to"
    cc = "cc"
    bcc = "bcc"


class EmailReception(SQLModel, table=True):
    email_id: int = Field(foreign_key="email.id", primary_key=True)
    contact_id: int = Field(foreign_key="contact.id", primary_key=True)
    kind: RecipientKind

    email: "Email" = Relationship(back_populates="recipients")
    contact: "Contact" = Relationship(back_populates="receptions")
EmailReception
email_id
contact_id
kind

Filling the DB

me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")

some_email = Email(
    sender=john_doe,
    subject="Hello",
    body="World",
    recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)

with Session(engine) as session:
    session.add(me)
    session.add(john_doe)
    session.add(some_email)
    session.commit()

Querying

me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")

some_email = Email(
    sender=john_doe,
    subject="Hello",
    body="World",
    recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)

with Session(engine) as session:
    session.add(me)
    session.add(john_doe)
    session.add(some_email)
    session.commit()

    stmt = select(Email).where(Email.sender == john_doe)
    print(session.exec(stmt).first())

    print(
        session.exec(
            select(EmailReception)
            .join(Email)
            .where(Email.subject == "Hello", EmailReception.kind == RecipientKind.to)
        ).all()[0].contact
    )

Modifying

me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")

some_email = Email(
    sender=john_doe,
    subject="Hello",
    body="World",
    recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)

with Session(engine) as session:
    session.add(me)
    session.add(john_doe)
    session.add(some_email)
    session.commit()

    stmt = select(Email).where(Email.sender == john_doe)
    print(session.exec(stmt).first())

    session.exec(
        select(Contact).where(Contact.name == "Johannes Köster")
    ).first().name = "Johannes Simon Köster"
    
    session.commit()
    
    print(
        session.exec(
            select(Contact).where(Contact.name == "Johannes Simon Köster")
        ).first()
    )