Database handling with DuckDB and SQLModel
Relational database
Structure:
- a collection of tables (i.e. relations)
- each table has at least one column
- each row represents an entity of the relation described by the table
- columns can point to entities in other tables
Operations:
- JOIN tables via corresponding columns
- SELECT rows by specifying constraints for column values
DuckDB
- columnar database
- particularly fast and parallelized
- server free, single file
- Python API and compatibility with generic DB layers, in particular SQLModel
SQLModel
Idea:
Interact with DB using Python types and Python logic
from sqlmodel import Field, SQLModel, Session, create_engine, select
class Contact(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
email_address: str
name: Optional[str] = None
engine = create_engine("...")
SQLModel.metadata.create_all(engine)
me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
with Session(engine) as session:
session.add(me)
session.commit()
print(session.exec(select(Contact).where(Contact.name == "Johannes Köster")).first())
SQLModel with DuckDB
Packages:
- sqlmodel
- duckdb
- duckdb-engine
SQLModel with DuckDB
from sqlmodel import Field, SQLModel, Session, create_engine, select
def id_field(table_name: str):
sequence = sqlalchemy.Sequence(f"{table_name}_id_seq")
return Field(
default=None,
primary_key=True,
sa_column_args=[sequence],
sa_column_kwargs={"server_default": sequence.next_value()},
)
class Contact(SQLModel, table=True):
id: Optional[int] = id_field("contact")
email_address: str
name: Optional[str] = None
engine = create_engine("duckdb:///test.db")
SQLModel.metadata.create_all(engine)
me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
with Session(engine) as session:
session.add(me)
session.commit()
print(session.exec(select(Contact).where(Contact.name == "Johannes Köster")).first())
Adding emails
class Contact(SQLModel, table=True):
id: Optional[int] = id_field("contact")
email_address: str
name: Optional[str] = None
sent_emails: List["Email"] = Relationship(back_populates="sender")
class Email(SQLModel, table=True):
id: Optional[int] = id_field("email")
sender_id: int = Field(foreign_key="contact.id")
subject: str
body: str
sender: Contact = Relationship(back_populates="sent_emails")
Contact |
---|
id |
email_address |
name |
id |
sender_id |
subject |
body |
1
n
Adding attachments
class Attachment(SQLModel, table=True):
id: Optional[int] = id_field("attachment")
filename: str
email_id: int = Field(default=None, foreign_key="email.id")
email: "Email" = Relationship(back_populates="attachments")
class Email(SQLModel, table=True):
id: Optional[int] = id_field("email")
sender_id: int = Field(foreign_key="contact.id")
subject: str
body: str
sender: Contact = Relationship(back_populates="sent_emails")
attachments: List[Attachment] = Relationship(back_populates="email")
Attachment |
---|
id |
filename |
email_id |
id |
sender_id |
subject |
body |
1
n
Adding recipients
class EmailReception(SQLModel, table=True):
email_id: int = Field(foreign_key="email.id", primary_key=True)
contact_id: int = Field(foreign_key="contact.id", primary_key=True)
email: "Email" = Relationship(back_populates="recipients")
contact: "Contact" = Relationship(back_populates="receptions")
class Email(SQLModel, table=True):
id: Optional[int] = id_field("email")
sender_id: int = Field(foreign_key="contact.id")
subject: str
body: str
sender: Contact = Relationship(back_populates="sent_emails")
attachments: List[Attachment] = Relationship(back_populates="email")
recipients: List[EmailReception] = Relationship(back_populates="email")
EmailReception |
---|
email_id |
contact_id |
id |
sender_id |
subject |
body |
1
n
Contact |
---|
id |
email_address |
name |
1
n
Adding recipient kinds
class RecipientKind(Enum):
to = "to"
cc = "cc"
bcc = "bcc"
class EmailReception(SQLModel, table=True):
email_id: int = Field(foreign_key="email.id", primary_key=True)
contact_id: int = Field(foreign_key="contact.id", primary_key=True)
kind: RecipientKind
email: "Email" = Relationship(back_populates="recipients")
contact: "Contact" = Relationship(back_populates="receptions")
EmailReception |
---|
email_id |
contact_id |
kind |
Filling the DB
me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")
some_email = Email(
sender=john_doe,
subject="Hello",
body="World",
recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)
with Session(engine) as session:
session.add(me)
session.add(john_doe)
session.add(some_email)
session.commit()
Querying
me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")
some_email = Email(
sender=john_doe,
subject="Hello",
body="World",
recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)
with Session(engine) as session:
session.add(me)
session.add(john_doe)
session.add(some_email)
session.commit()
stmt = select(Email).where(Email.sender == john_doe)
print(session.exec(stmt).first())
print(
session.exec(
select(EmailReception)
.join(Email)
.where(Email.subject == "Hello", EmailReception.kind == RecipientKind.to)
).all()[0].contact
)
Modifying
me = Contact(name="Johannes Köster", email_address="johannes.koester@uni-due.de")
john_doe = Contact(name="John Doe", email_address="john.doe@uni-due.de")
some_email = Email(
sender=john_doe,
subject="Hello",
body="World",
recipients=[EmailReception(contact=me, kind=RecipientKind.to)],
)
with Session(engine) as session:
session.add(me)
session.add(john_doe)
session.add(some_email)
session.commit()
stmt = select(Email).where(Email.sender == john_doe)
print(session.exec(stmt).first())
session.exec(
select(Contact).where(Contact.name == "Johannes Köster")
).first().name = "Johannes Simon Köster"
session.commit()
print(
session.exec(
select(Contact).where(Contact.name == "Johannes Simon Köster")
).first()
)
Database handling with DuckDB and SQLModel
By Johannes Köster
Database handling with DuckDB and SQLModel
- 80