Why and how Pydantic uses Rust
from datetime import datetime from pydantic import BaseModel class Delivery(BaseModel): timestamp: datetime dimensions: tuple[int, int] m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) print(repr(m.timestamp)) #> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC)) print(m.dimensions) #> (10, 20)
from datetime import datetime from pydantic import BaseModel class Delivery(BaseModel): timestamp: datetime dimensions: tuple[int, int] m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) print(repr(m.timestamp)) #> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC)) print(m.dimensions) #> (10, 20)
from datetime import datetime from pydantic import BaseModel class Delivery(BaseModel): timestamp: datetime dimensions: tuple[int, int] m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) print(repr(m.timestamp)) #> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC)) print(m.dimensions) #> (10, 20)
from datetime import datetime from pydantic import BaseModel class Delivery(BaseModel): timestamp: datetime dimensions: tuple[int, int] m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) print(repr(m.timestamp)) #> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC)) print(m.dimensions) #> (10, 20)
from datetime import datetime from pydantic import BaseModel class Delivery(BaseModel): timestamp: datetime dimensions: tuple[int, int] m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20']) print(repr(m.timestamp)) #> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC)) print(m.dimensions) #> (10, 20)
The obvious...
(maybe) Less obviously advantages:
Nested modular structures
from pydantic import BaseModel class Qualification(BaseModel): name: str description: str required: bool value: int class Student(BaseModel): id: int name: str qualifications: list[Qualification] friends: list[int]
[
...,
...,
...,
...,
...,
...,
...,
...,
...,
...,
...,
...,
]
continued...
What does that tree look like?
class Talk(BaseModel): title: Annotated[ str, Maxlen(100) ] attendance: PosInt when: datetime | None = None mistakes: list[ tuple[timedelta, str] ]
ModelValidator {
cls: Talk,
validator: TypeDictValidator [
Field {
key: "title",
validator: StrValidator { max_len: 100 },
},
Field {
key: "attendance",
validator: IntValidator { min: 0 },
},
Field {
key: "when",
validator: UnionValidator [
DateTimeValidator {},
NoneValidator {},
],
default: None,
},
Field {
key: "mistakes",
validator: ListValidator {
item_validator: TupleValidator [
TimedeltaValidator {},
StrValidator {},
],
},
},
],
}
Disadvantages:
RecursionError is bad, but no RecursionError is worse!
Also no multiple ownership.
continued...
fn main() { main(); }
from __future__ import annotations from pydantic import BaseModel class Foo(BaseModel): a: int f: list[Foo] f = {'a': 1, 'f': []} f['f'].append(f) Foo(**f)
Read type hints
construct a "core schema"
(pure python)
(binary + stubs + core-schema)
process core schema
return SchemaValidator
Receive input data
call .validate_python(data)
run validators
return the result of validation
class Talk(BaseModel):
title: Annotated[
str,
Maxlen(100)
]
attendance: PosInt
when: datetime | None = None
mistakes: list[
tuple[timedelta, str]
]
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
from pydantic_core import SchemaValidator class Talk: ... talk_validator = SchemaValidator({ 'type': 'model', 'cls': Talk, 'schema': { 'type': 'model-fields', 'fields': { 'title': {'schema': {'type': 'str', 'max_length': 100}}, 'attendance': {'schema': {'type': 'int', 'ge': 0}}, 'when': { 'schema': { 'type': 'default', 'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}}, 'default': None, } }, 'mistakes': { 'schema': { 'type': 'list', 'items_schema': { 'type': 'tuple', 'mode': 'positional', 'items_schema': [{'type': 'timedelta'}, {'type': 'str'}] } } }, }, } }) some_data = { 'title': "How Pydantic V2 leverages Rust's Superpowers", 'attendance': '100', 'when': '2024-10-22T19:15:00', 'mistakes': [ ('00:00:00', 'Screen mirroring confusion'), ('00:00:30', 'Forgot to turn on the mic'), ('00:25:00', 'Too short'), ('00:40:00', 'Too long!'), ], } talk = talk_validator.validate_python(some_data) print(talk.mistakes) """ [ (datetime.timedelta(0), 'Screen mirroring confusion'), (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), (datetime.timedelta(seconds=1500), 'Too short'), (datetime.timedelta(seconds=2400), 'Too long!') ] """
import timeit from pydantic import BaseModel, __version__ class Model(BaseModel): name: str age: int friends: list[int] settings: dict[str, float] data = { 'name': 'John', 'age': 42, 'friends': list(range(200)), 'settings': {f'v_{i}': i / 2.0 for i in range(50)} } t = timeit.timeit( 'Model(**data)', globals={'data': data, 'Model': Model}, number=10_000, ) print(f'version={__version__} time taken {t * 100:.2f}us')
import timeit from pydantic import BaseModel, __version__ class Model(BaseModel): name: str age: int friends: list[int] settings: dict[str, float] data = { 'name': 'John', 'age': 42, 'friends': list(range(200)), 'settings': {f'v_{i}': i / 2.0 for i in range(50)} } t = timeit.timeit( 'Model(**data)', globals={'data': data, 'Model': Model}, number=10_000, ) print(f'version={__version__} time taken {t * 100:.2f}us')
version=1.10.18 time taken 195.8us
version=2.9.2 time taken 4.08us
48.0x speedup
But rather: Python as the user* interface for Rust.
(* by user, I mean "application developer")
I'd love to see a generation of libraries for Python (and other high level languages) built in Rust.
TLS
Routing
HTTP parsing
Validation
DB query
Serializing
Rust/C
Python
Application Logic
HTTPS request lifecycle:
100% of Developer time
=
1% of CPU cycles
...
We've launched Pydantic Logfire - pydantic.dev/logfire