Samuel Colvin
from datetime import datetime
from pydantic import BaseModel
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20)The obvious advantages...
(maybe) Less obviously advantages:
Disadvantages:
Read type hints
construct a "core schema"
(pure python)
(binary + stubs + core-schema)
process core schema
return SchemaValidator
Receive data
call schema_validator(data)
run validator
return the result of validation
import timeit
from pydantic import BaseModel, __version__
class Model(BaseModel):
    name: str
    age: int
    friends: list[int]
    settings: dict[str, float]
data = {
    'name': 'John',
    'age': 42,
    'friends': list(range(200)),
    'settings': {f'v_{i}': i / 2.0 for i in range(50)}
}
t = timeit.timeit(
    'Model(**data)',
    globals={'data': data, 'Model': Model},
    number=10_000,
)
print(f'version={__version__} time taken {t * 100:.2f}us')
version=1.10.4 time taken 179.81us
version=2.30   time taken   7.99us
22.5x speedupfrom pydantic import BaseModel, ConfigDict, ValidationError
class Model(BaseModel):
    model_config = ConfigDict(strict=True)
    
    age: int
    friends: tuple[int, int]
try:
    Model(age='42', friends=[1, 2])
except ValidationError as e:
    print(e)
    """
    2 validation errors for Model
    age
      Input should be a valid integer ... input_value='42'
    friends
      Input should be a valid tuple ... input_value=[1, 2]
    """
print(Model(age=42, friends=(1, 2)))
#> age=42 friends=(1, 2)
AKA Pedant mode.
from pydantic import BaseModel, ConfigDict
class Model(BaseModel):
    model_config = ConfigDict(strict=True)
    age: int
    friends: tuple[int, int]
print(Model.model_validate_json('{"age": 1, "friends": [1, 2]}'))
#> age=1 friends=(1, 2)If you're going to be a pedant, you better be right.
Also gives us:
from pydantic import BaseModel, field_validator
class Model(BaseModel):
    x: int
    @field_validator('x', mode='wrap')
    def validate_x(cls, v, handler):
        if v == 'one':
            return 1
        try:
            x = handler(v)
        except ValueError:
            return -999
        else:
            return x + 1
print(Model(x='one'))
#> x=1
print(Model(x=2))
#> x=3
print(Model(x='three'))
#> x=-999
AKA "The Onion"
Before
On Error
After
from pydantic import BaseModel, Field, AliasPath, AliasChoices
class MyModel(BaseModel):
    a: int = Field(validation_alias=AliasPath('foo', 1, 'bar'))
    b: str = Field(validation_alias=AliasChoices('x', 'y'))
m = MyModel.model_validate(
    {
        'foo': [{'bar': 0}, {'bar': 1}],
        'y': 'Y',
    }
)
print(m)
#> a=1 b='Y'Somewhat similar to serde's "flatten".
PyO3 provides the magic, that allows Pydantic (and many other libraries) to call Rust from Python.
Â
While Rust is very fast:
Is slower than it could be.
Â
But much of this is fixable...
Let's look at an example...
def dict_not_none(**kwargs: Any) -> Any:
    return {k: v for k, v in kwargs.items() if v is not None}| Implementation | Measurement | 
|---|---|
| Python | 281ns | 
| PyO3 Today | 350ns | 
| Baremetal FFI | 54ns | 
| PyO3 Next | 235ns | 
We currently parse JSON completely, store it in a heap of Maps and Arrays, then validate.
Â
We can do much better...
What we have now:
def validate_json_today(model_type: ModelType, json_data: str):
    json_object = JsonObject()
    for chunk in JsonParser(json_data).chunks():
        json_chunk = chunk.to_object()
        json_object.add(json_chunk)
    model_data = {}
    errors = []
    for f in model_type.fields:
        try:
            model_data[f.name] = f.validate(json_object[f.key])
        except Error:
            errors.append(field)
    if errors:
        raise ValidationError(errors)
    else:
        return model_type(model_data)
Warning: Python as pseudo code for Rust 😱
What we might have in future:
def validate_json_future(model_type: ModelType, json_data: str):
    tmp: list[Any | None] = [None for _ in range(len(model_type.fields))]
    errors = []
    for key, chunk in iter_json_parser(json_data):
        field = model_type.fields.get(key)
        if field:
            try:
                tmp[field.index] = field.validate(chunk.parse())
            except Error:
                errors.append(field)
    model_data = []
    for index, field in enumerate(tmp):
        if field is None:
            errors.append(model_type.get_by_index(index))
        else:
            model_data.append(field)
    ...Warning: Python as pseudo code for Rust 😱
Pydantic will start a closed beta of our Observability tool later this year!
Come and find me for beta access, or scan: