$whoami

Python and Rust developer
Created Pydantic in 2017
Started a Company around Pydantic last year

What is Pydantic?

Data validation & more using Python type hints
Top 30 package on PyPI, >280M downloads / month

 from datetime import datetime
from pydantic import BaseModel
 
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
 
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20) from datetime import datetime
from pydantic import BaseModel
 
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
 
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20) from datetime import datetime
from pydantic import BaseModel
 
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
 
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20) from datetime import datetime
from pydantic import BaseModel
 
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
 
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20)

Pydantic V2

Complete rewrite of Pydantic, with the core written in Rust
Released in June 2023
5 - 50x faster than Pydantic V1
More correct, more extensible

 from datetime import datetime
from pydantic import BaseModel
 
class Delivery(BaseModel):
    timestamp: datetime
    dimensions: tuple[int, int]
 
m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
print(repr(m.timestamp))
#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
print(m.dimensions)
#> (10, 20)

Rust Advantages

The obvious...

Performance
Reusing high quality rust libraries
More explicit error handling

(maybe) Less obviously advantages:

Virtually zero cost customisation, even in hot code
Arguably easier to maintain - the compiler picks up more of mistake
Private means private

Rust Advantages

Nested modular structures

 from pydantic import BaseModel
 
class Qualification(BaseModel):
    name: str
    description: str
    required: bool
    value: int
 
 
class Student(BaseModel):
    id: int
    name: str
    qualifications: list[Qualification]
    friends: list[int]

[
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
    ...,
]

continued...

What does that tree look like?

 class Talk(BaseModel):
    title: Annotated[
        str,
        Maxlen(100)
    ]
    attendance: PosInt
    when: datetime | None = None
    mistakes: list[
        tuple[timedelta, str]
    ]

ModelValidator {
  cls: Talk,
  validator: TypeDictValidator [
    Field {
      key: "title",
      validator: StrValidator { max_len: 100 },
    },
    Field {
      key: "attendance",
      validator: IntValidator { min: 0 },
    },
    Field {
      key: "when",
      validator: UnionValidator [
        DateTimeValidator {},
        NoneValidator {},
      ],
      default: None,
    },
    Field {
      key: "mistakes",
      validator: ListValidator {
        item_validator: TupleValidator [
          TimedeltaValidator {},
          StrValidator {},
        ],
      },
    },
  ],
}

Rust Disdvantages

Disadvantages:

Slower to develop
Fewer people can help you
Have to distribute binaries, or leave users to compile it
Refactoring hell!

Rust Disdvantages

RecursionError is bad, but no RecursionError is worse!

Also no multiple ownership.

continued...

 fn main() {
    main();
}

 from __future__ import annotations
from pydantic import BaseModel
 
 
class Foo(BaseModel):
    a: int
    f: list[Foo]
 
 
f = {'a': 1, 'f': []}
f['f'].append(f)
Foo(**f)

Pydantic V2 Architecture

Read type hints

construct a "core schema"

pydantic

(pure python)

pydantic-core

(binary + stubs + core-schema)

process core schema

return SchemaValidator

Receive input data

call .validate_python(data)

run validators

return the result of validation

Again for SchemaSerializer

Python Interface to Rust

class Talk(BaseModel):
    title: Annotated[
        str,
        Maxlen(100)
    ]
    attendance: PosInt
    when: datetime | None = None
    mistakes: list[
        tuple[timedelta, str]
    ]

 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""
 from pydantic_core import SchemaValidator
 
 
class Talk:
    ...
 
talk_validator = SchemaValidator({
    'type': 'model',
    'cls': Talk,
    'schema': {
        'type': 'model-fields',
        'fields': {
            'title': {'schema': {'type': 'str', 'max_length': 100}},
            'attendance': {'schema': {'type': 'int', 'ge': 0}},
            'when': {
                'schema': {
                    'type': 'default',
                    'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
                    'default': None,
                }
            },
            'mistakes': {
                'schema': {
                    'type': 'list',
                    'items_schema': {
                        'type': 'tuple',
                        'mode': 'positional',
                        'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
                    }
                }
            },
        },
    }
})
 
some_data = {
    'title': "How Pydantic V2 leverages Rust's Superpowers",
    'attendance': '100',
    'when': '2024-10-22T19:15:00',
    'mistakes': [
        ('00:00:00', 'Screen mirroring confusion'),
        ('00:00:30', 'Forgot to turn on the mic'),
        ('00:25:00', 'Too short'),
        ('00:40:00', 'Too long!'),
    ],
}
talk = talk_validator.validate_python(some_data)
print(talk.mistakes)
"""
[
    (datetime.timedelta(0), 'Screen mirroring confusion'), 
    (datetime.timedelta(seconds=30), 'Forgot to turn on the mic'), 
    (datetime.timedelta(seconds=1500), 'Too short'), 
    (datetime.timedelta(seconds=2400), 'Too long!')
]
"""

Not Rust vs. Python

But rather: Python as the user* interface for Rust.

(* by user, I mean "application developer")

I'd love to see a generation of libraries for Python (and other high level languages) built in Rust.

TLS

Routing

HTTP parsing

Validation

DB query

Serializing

Rust/C

Python

Application Logic

HTTPS request lifecycle:

100% of Developer time

=

1% of CPU cycles

...

Thank you

pydantic.dev / github.com/pydantic / x.com/pydantic

Alert!

We've launched Pydantic Logfire - pydantic.dev/logfire

	import timeit
	from pydantic import BaseModel, __version__

	class Model(BaseModel):
	name: str
	age: int
	friends: list[int]
	settings: dict[str, float]

	data = {
	'name': 'John',
	'age': 42,
	'friends': list(range(200)),
	'settings': {f'v_{i}': i / 2.0 for i in range(50)}
	}
	t = timeit.timeit(
	'Model(**data)',
	globals={'data': data, 'Model': Model},
	number=10_000,
	)
	print(f'version={__version__} time taken {t * 100:.2f}us')

	import timeit
	from pydantic import BaseModel, __version__

	class Model(BaseModel):
	name: str
	age: int
	friends: list[int]
	settings: dict[str, float]

	data = {
	'name': 'John',
	'age': 42,
	'friends': list(range(200)),
	'settings': {f'v_{i}': i / 2.0 for i in range(50)}
	}
	t = timeit.timeit(
	'Model(**data)',
	globals={'data': data, 'Model': Model},
	number=10_000,
	)
	print(f'version={__version__} time taken {t * 100:.2f}us')

	from datetime import datetime
	from pydantic import BaseModel

	class Delivery(BaseModel):
	timestamp: datetime
	dimensions: tuple[int, int]

	m = Delivery(timestamp='2020-01-02T03:04:05Z', dimensions=['10', '20'])
	print(repr(m.timestamp))
	#> datetime.datetime(2020, 1, 2, 3, 4, 5, tzinfo=TzInfo(UTC))
	print(m.dimensions)
	#> (10, 20)

	from pydantic import BaseModel

	class Qualification(BaseModel):
	name: str
	description: str
	required: bool
	value: int


	class Student(BaseModel):
	id: int
	name: str
	qualifications: list[Qualification]
	friends: list[int]

	class Talk(BaseModel):
	title: Annotated[
	str,
	Maxlen(100)
	]
	attendance: PosInt
	when: datetime \| None = None
	mistakes: list[
	tuple[timedelta, str]
	]

	from __future__ import annotations
	from pydantic import BaseModel


	class Foo(BaseModel):
	a: int
	f: list[Foo]


	f = {'a': 1, 'f': []}
	f['f'].append(f)
	Foo(**f)

	from pydantic_core import SchemaValidator


	class Talk:
	...

	talk_validator = SchemaValidator({
	'type': 'model',
	'cls': Talk,
	'schema': {
	'type': 'model-fields',
	'fields': {
	'title': {'schema': {'type': 'str', 'max_length': 100}},
	'attendance': {'schema': {'type': 'int', 'ge': 0}},
	'when': {
	'schema': {
	'type': 'default',
	'schema': {'type': 'nullable', 'schema': {'type': 'datetime'}},
	'default': None,
	}
	},
	'mistakes': {
	'schema': {
	'type': 'list',
	'items_schema': {
	'type': 'tuple',
	'mode': 'positional',
	'items_schema': [{'type': 'timedelta'}, {'type': 'str'}]
	}
	}
	},
	},
	}
	})

	some_data = {
	'title': "How Pydantic V2 leverages Rust's Superpowers",
	'attendance': '100',
	'when': '2024-10-22T19:15:00',
	'mistakes': [
	('00:00:00', 'Screen mirroring confusion'),
	('00:00:30', 'Forgot to turn on the mic'),
	('00:25:00', 'Too short'),
	('00:40:00', 'Too long!'),
	],
	}
	talk = talk_validator.validate_python(some_data)
	print(talk.mistakes)
	"""
	[
	(datetime.timedelta(0), 'Screen mirroring confusion'),
	(datetime.timedelta(seconds=30), 'Forgot to turn on the mic'),
	(datetime.timedelta(seconds=1500), 'Too short'),
	(datetime.timedelta(seconds=2400), 'Too long!')
	]
	"""

Pydantic & Rust