21st April 2026
FLOW MATCHING FOR GENERATIVE MODELING
Autonomous Driving
Audio
Image/Video
Protein Structure
Generation




Introduction

Introduction


Introduction

Preliminaries
\begin{aligned}
&v_t: \textbf{Vector Field}\\
&\phi_t: \textbf{Diffeomorphic map}\\
\\
&v_t \text{ can be used to construct a time-dependent diffeomorphic map,} \\
&\phi_t \text{ using ODE:} \\
\\
&\frac{d}{dt}\phi_t(x) = v_t(\phi_t(x))\\
&\phi_0(x) = x \\
\\
&\textbf{push-forward equation}\\
&p_t = [\phi_t]_*p_0\\
&[\phi_t]_*p_0(x) = p_0(\phi_t^{-1}(x)) \det\left(\frac{\partial \phi_t^{-1}}{\partial x}(x)\right)
\end{aligned}

Preliminaries
\begin{aligned}
&\textbf{Continuity Equation}\\
&\text{If } v_t \text{ generates } p_t \text{, then } p_t \text{ and } v_t \text{ satisfy:}\\
&\boxed{\frac{\partial}{\partial t} p_t(x) + \nabla \cdot \big( v_t(x) \, p_t(x) \big) = 0}\\
&\text{when it satisfies: } p_t = [\phi_t]_* \, p_0
\end{aligned}

Lets Go...
Data
p_0 = p ; \text{ Simple Distribution} \\
p_1 = q ; \text{ Data Distribution} \\

p can be some simple distributions e.g.:
Path Design
\begin{aligned}
&p_0 = p ; \text{ Simple Distribution} \\
&p_1 = q ; \text{ Data Distribution} \\
\end{aligned}
p_t \text{ probability path s.t. } 0 \le t \le 1
p = \mathcal{N}(x|0, I)

Training
\begin{aligned}
&\boxed{\mathcal{L}_{\text{FM}}(\theta) = \mathbb{E}_{t, p_t(x)} \|v_t(x) - u_t(x)\|^2}\\
\\
&\text{where,} \\
&\theta \text{ learnable param of the CNF vector field } v_t \\
&t \sim \mathcal{U}[0,1] \\
&x \sim p_t(x)
\end{aligned}

Training
\begin{aligned}
&\boxed{\mathcal{L}_{\text{FM}}(\theta) = \mathbb{E}_{t, p_t(x)} \|v_t(x) - u_t(x)\|^2}\\
\\
&\text{where,} \\
&\theta \text{ learnable param of the CNF vector field } v_t \\
&t \sim \mathcal{U}[0,1] \\
&x \sim p_t(x)
\end{aligned}

\begin{aligned}
&\textbf{\color{red}FM Loss is Intractable} \\
&\text{Target vector field: } u_t(x) = \int u_t(x \mid x_1) \frac{p_t(x \mid x_1)\, q(x_1)}{p_t(x)}\, dx_1\\
&\text{Reason:}\\
&1. q(x_1): \text{true data distribution is \textbf{unknown}}\\
&2. p_t(x) = \int p_t(x \mid x_1) q(x_1) dx_1 \textbf{marginal is intractable}\\
\end{aligned}
Training
\begin{aligned}
&\boxed{\mathcal{L}_{\text{FM}}(\theta) = \mathbb{E}_{t, p_t(x)} \|v_t(x) - u_t(x)\|^2}\\
\\
&\text{where,} \\
&\theta \text{ learnable param of the CNF vector field } v_t \\
&t \sim \mathcal{U}[0,1] \\
&x \sim p_t(x)
\end{aligned}

\begin{aligned}
&\textbf{FM Loss is Intractable} \\
&\text{Target vector field: } u_t(x) = \int u_t(x \mid x_1) \frac{p_t(x \mid x_1)\, q(x_1)}{p_t(x)}\, dx_1\\
&\text{Reason:}\\
&1. q(x_1): \text{true data distribution is \textbf{unknown}}\\
&2. p_t(x) = \int p_t(x \mid x_1) q(x_1) dx_1 \textbf{marginal is intractable}\\
\end{aligned}
\text{\color{green}Solution: Conditional Flow Matching}
Idea
Mix all the easy conditional paths together, to get hard marginal path for free.
Constructing p_t, u_t
\begin{aligned}
&x_1 \sim q(x); \quad \text{sample from data distribution} \\
&p_0(x|x_1) = p(x) \\
&p_1(x|x_1) = \mathcal{N}(x|x_1, \sigma^{2} I) \quad \text{small } \sigma > 0\\
\end{aligned}

Constructing p_t, u_t
\begin{aligned}
&x_1 \sim q(x); \quad \text{sample from data distribution} \\
&p_0(x|x_1) = p(x) \\
&p_1(x|x_1) = \mathcal{N}(x|x_1, \sigma^{2} I) \quad \text{small } \sigma > 0\\
\end{aligned}

\begin{aligned}
&\textbf{From Marginalization:} \\
&p_t(x) = \int p_t(x|x_1) q(x_1) dx_1 \\
&p_1(x) = \int p_1(x|x_1) q(x_1) dx_1 \approx q(x)\\
\end{aligned}
Constructing p_t, u_t
\begin{aligned}
&x_1 \sim q(x); \quad \text{sample from data distribution} \\
&p_0(x|x_1) = p(x) \\
&p_1(x|x_1) = \mathcal{N}(x|x_1, \sigma^{2} I) \quad \text{small } \sigma > 0\\
\end{aligned}

\begin{aligned}
&\textbf{From Marginalization:} \\
&\boxed{p_t(x) = \int p_t(x|x_1) q(x_1) dx_1}\\
&p_1(x) = \int p_1(x|x_1) q(x_1) dx_1 \approx q(x)\\
\end{aligned}
\begin{aligned}
&\textbf{On solving the Continuity Equation:}\\
&\frac{\partial p_t(x)}{\partial t} + \nabla \cdot \left( p_t(x) \, u_t(x) \right) = 0 \text{ we get,} \\
&\boxed{u_t(x) = \int u_t(x|x_1) \frac{p_t(x|x_1) q(x_1)}{p_t(x)} \, dx_1} \quad \\
\end{aligned}
Theorem 1


Conditional Flow Matching
\boxed{\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - u_t(x|x_1) \right\|^2}
\begin{aligned}
&\text{where,} \\
&t \sim \mathcal{U}[0, 1] \\
&x_1 \sim q(x_1) \\
&x \sim p_t(x|x_1)
\end{aligned}

Conditional Flow Matching
\boxed{\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - u_t(x|x_1) \right\|^2}
\boxed{\nabla \mathcal{L}_{FM}{(\theta)} =\nabla \mathcal{L}_{CFM}{(\theta)}}
Theorem 2
\begin{aligned}
&\text{where,} \\
&t \sim \mathcal{U}[0, 1] \\
&x_1 \sim q(x_1) \\
&x \sim p_t(x|x_1)
\end{aligned}


Conditional Probability Path & Vector Fields
\begin{aligned}
&p_t(x|x_1) = \mathcal{N}(x \mid \mu_t(x_1), \sigma_t(x_1)^2 I) \\
&\text{where,} \\
&\mu_0{(x_1)} = 0, \quad \sigma_0{(x_1)} = 1 \rightarrow p_0(x|x_1) = \mathcal{N}{(0, I)} \\
&\mu_1{(x_1)} = x_1, \quad \sigma_1{(x_1)} = \sigma_{min} \approx 0 \rightarrow p_1(x|x_1) = \mathcal{N}(x_1, \sigma_{\min}^2 I)\\
\\
&\text{Sample from } p_t(x|x_1) \text{ can be written as:}\\
&\psi_t(x_0) = \sigma_t(x_1) \, x_0 + \mu_t(x_1); \text{\color{blue}Flow Map}\\
&\text{Flow moves particles from their starting position to where they are at time } t \\
&\text{where } x_0 \sim \mathcal{N}(0, I)
\end{aligned}
\psi_t \text{pushes noise distribution} p_0(x|x_1) = p(x) \text{ to } p_t(x|x_1) \\
[\psi_t]_*p(x) = p_t(x|x_1)
Conditional Flow Matching works with any conditional probability path
Conditional Probability Path & Vector Fields
\begin{aligned}
&p_t(x|x_1) = \mathcal{N}(x \mid \mu_t(x_1), \sigma_t(x_1)^2 I) \\
&\text{where,} \\
&\mu_0{(x_1)} = 0, \quad \sigma_0{(x_1)} = 1 \rightarrow p_0(x|x_1) = \mathcal{N}{(0, I)} \\
&\mu_1{(x_1)} = x_1, \quad \sigma_1{(x_1)} = \sigma_{min} \approx 0 \rightarrow p_1(x|x_1) = \mathcal{N}(x_1, \sigma_{\min}^2 I)\\
\end{aligned}
Conditional Flow Matching works with any conditional probability path

Conditional Probability Path & Vector Fields
\begin{aligned}
&p_t(x|x_1) = \mathcal{N}(x \mid \mu_t(x_1), \sigma_t(x_1)^2 I) \\
&\text{where,} \\
&\mu_0{(x_1)} = 0, \quad \sigma_0{(x_1)} = 1 \rightarrow p_0(x|x_1) = \mathcal{N}{(0, I)} \\
&\mu_1{(x_1)} = x_1, \quad \sigma_1{(x_1)} = \sigma_{min} \approx 0 \rightarrow p_1(x|x_1) = \mathcal{N}(x_1, \sigma_{\min}^2 I)\\
\end{aligned}
Conditional Flow Matching works with any conditional probability path

\begin{aligned}
&\text{Sample from } p_t(x|x_1) \text{ can be written as:}\\
&\boxed{\psi_t(x_0) = \sigma_t(x_1) \, x_0 + \mu_t(x_1)}; \text{\color{blue}Flow Map}\\\\
\end{aligned}
Conditional Probability Path & Vector Fields
\begin{aligned}
&\frac{d}{dt}\psi_t(x) = u_t(\psi_t(x)|x_1)\\
\\
&\text{On substituting values in } \mathcal{L}_{CFM} \\
&\boxed{\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - \frac{d}{dt}\psi_t(x) \right\|^2}
\end{aligned}
Conditional Probability Path & Vector Fields
\begin{aligned}
&\frac{d}{dt}\psi_t(x) = u_t(\psi_t(x)|x_1)\\
\\
&\text{On substituting values in } \mathcal{L}_{CFM} \\
&\boxed{\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - \frac{d}{dt}\psi_t(x) \right\|^2}
\end{aligned}
Theorem 3
\begin{aligned}
&\text{On differentiating } \psi_t(x_0) = \sigma_t(x_1) \, x_0 + \mu_t(x_1) \\
&\text{ and substituting the value } \frac{d}{dt}\psi_t(x) = u_t(\psi_t(x)|x_1) \\
&\boxed{u_t(x|x_1) = \frac{\sigma_t'(x_1)}{\sigma_t(x_1)}(x - \mu_t(x_1)) + \mu_t'(x_1)} \\
\end{aligned}

Special Instances
\begin{aligned}
&\textbf{Variance Exploding Path:}\\
&p_t(x|x_1) = \mathcal{N}(x \mid x_1, \sigma_{1-t}^2 I) \\
&u_t(x|x_1) = -\frac{\sigma_{1-t}'}{\sigma_{1-t}}(x - x_1) \\
\\
&\textbf{Variance Preserving Path:}\\
&p_t(x|x_1) = \mathcal{N}\left(x \mid \alpha_{1-t} x_1, \left(1 - \alpha_{1-t}^2\right) I\right), \\
&\text{where } \alpha_t = e^{-\frac{1}{2}T(t)}, \quad T(t) = \int_0^t \beta(s) \, ds \\
&u_t(x|x_1) = \frac{\alpha_{1-t}'}{1 - \alpha_{1-t}^2}(\alpha_{1-t} x - x_1) \\
&= -\frac{T'(1-t)}{2}\left[\frac{e^{-T(1-t)}x - e^{-\frac{1}{2}T(1-t)}x_1}{1 - e^{-T(1-t)}}\right] \\
\end{aligned}
\begin{aligned}
&\textbf{Optimal Transport Path:}\\
&\mu_t(x_1) = t \, x_1, \quad \sigma_t(x_1) = 1 - (1 - \sigma_{\min})t \\
\\
&\text{Conditional probability path:} \\
&p_t(x|x_1) = \mathcal{N}\left(x \mid t \, x_1, \, \left(1 - (1-\sigma_{\min})t\right)^2 I\right) \\
\\
&\text{Conditional vector field (from Theorem 3):} \\
&u_t(x|x_1) = \frac{x_1 - (1 - \sigma_{\min})x}{1 - (1 - \sigma_{\min})t} \\
\end{aligned}
Diffusion Conditional
Optimal Transport
Comparison

Comparison



Comparison



Optimal Transport
Diffusion Conditional
Results

DEMO
Train
for x1 in dataloader:
x0 = torch.randn_like(x1) # sample noise
t = torch.rand(batch_size, 1) # sample time
xt = sigma_t * x0 + mu_t * x1 # depends on p_t choice
ut = d_mu_t * x1 + d_sigma_t * x0 # corresponding target velocity
loss = ((model(t, xt) - ut) ** 2).mean() # MSE loss
loss.backward()
optimizer.step()
Inference
@torch.no_grad()
def generate(model, shape, steps=100):
x = torch.randn(shape) # x0 ~ N(0, I)
dt = 1.0 / steps
for i in range(steps):
t = torch.full((shape[0], 1), i * dt)
v = model(t, x) # predict velocity
x = ODEStep(x, v, t, dt) # depends on ODE solver
return x # x1 ~ q
Quick References

End
Prerequisites
Probability
\begin{aligned}
&P(X,Y) \quad \text{Joint Probability} \\
&P(X|Y) = \frac{P(X,Y)}{P(Y)} \quad \text{Conditional Probability} \\
&P(X) = \sum_Y P(X,Y) \quad \text{Marginal Probability} \\
&P(X) = \sum_y P(X|Y=y)\, P(Y=y) \\
&P(X|Y) = \frac{P(Y|X)\, P(X)}{P(Y)} \quad \text{Bayes' Theorem} \\
&P(X|Y) = P(X) \quad \text{Independence}
\end{aligned}
\begin{aligned}
&P(X| y=y) = P(X, Y=y) / P(Y=y) \\
&P(X, Y) = P(X|Y) P(Y) \\
&P(X) = \int P(X, Y) dY \\
&P(X) = \int P(X|Y=y)P(Y=y) dY \\
&P(X) = E_Y[P(X|Y)] \\
&p(X) = E_Y[p(x|Y)] \\
&E[X] = \int x p(x) dx = \int x E_Y[p(x|Y)] dx = E_Y[\int x p(x|Y) dx] = E_Y[E[X|Y]]\\
\end{aligned}
Derivation
\begin{aligned}
&\frac{\partial p_t(x|x_1)}{\partial t} + \nabla \cdot \left( p_t(x|x_1) \, u_t(x|x_1) \right) = 0 \\
&\frac{\partial p_t(x)}{\partial t} + \nabla \cdot \left( p_t(x) \, u_t(x) \right) = 0 \tag{B} \\
&p_t(x) = \int p_t(x|x_1) \, q(x_1) \, dx_1 \\
&\frac{\partial p_t(x)}{\partial t} = \int \frac{\partial p_t(x|x_1)}{\partial t} \, q(x_1) \, dx_1 \\
&= -\int \nabla \cdot \left( p_t(x|x_1) \, u_t(x|x_1) \right) q(x_1) \, dx_1 \\
&= -\nabla \cdot \left( \int u_t(x|x_1) \, p_t(x|x_1) \, q(x_1) \, dx_1 \right) \\
&p_t(x) \, u_t(x) = \int u_t(x|x_1) \, p_t(x|x_1) \, q(x_1) \, dx_1 \\
&\boxed{u_t(x) = \int u_t(x|x_1) \, \frac{p_t(x|x_1) \, q(x_1)}{p_t(x)} \, dx_1} \\
&\frac{p_t(x|x_1) \, q(x_1)}{p_t(x)} = p_t(x_1|x) \\
&u_t(x) = \int u_t(x|x_1) \, p_t(x_1|x) \, dx_1 \\
&u_t(x) = \mathbb{E}_{x_1 \sim p_t(x_1|x)} \left[ u_t(x|x_1) \right] \\
\end{aligned}
All equations
\frac{d}{dt}\phi_t(x) = v_t(\phi_t(x))
\phi_0(x) = x
p_t = [\phi_t]_*p_0
[\phi_t]_*p_0(x) = p_0(\phi_t^{-1}(x)) \det\left(\frac{\partial \phi_t^{-1}}{\partial x}(x)\right)
\mathcal{L}_{\text{FM}}(\theta) = \mathbb{E}_{t, p_t(x)} \|v_t(x) - u_t(x)\|^2
All equations
p_t(x) = \int p_t(x|x_1) q(x_1) \, dx_1
p_1(x) = \int p_1(x|x_1) q(x_1) \, dx_1 \approx q(x)
u_t(x) = \int u_t(x|x_1) \frac{p_t(x|x_1) q(x_1)}{p_t(x)} \, dx_1
\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p_t(x|x_1)} \|v_t(x) - u_t(x|x_1)\|^2
p_t(x|x_1) = \mathcal{N}(x \mid \mu_t(x_1), \sigma_t(x_1)^2 I)
All equations
p_t(x|x_1) = \mathcal{N}(x \mid \mu_t(x_1), \sigma_t(x_1)^2 I)
\psi_t(x) = \sigma_t(x_1) x + \mu_t(x_1)
[\psi_t]_*p(x) = p_t(x|x_1)
\frac{d}{dt}\psi_t(x) = u_t(\psi_t(x)|x_1)
\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - \frac{d}{dt}\psi_t(x_0) \right\|^2
All equations
u_t(x|x_1) = \frac{\sigma_t'(x_1)}{\sigma_t(x_1)}(x - \mu_t(x_1)) + \mu_t'(x_1)
p_t(x|x_1) = \mathcal{N}(x \mid x_1, \sigma_{1-t}^2 I)
u_t(x|x_1) = -\frac{\sigma_{1-t}'}{\sigma_{1-t}}(x - x_1)
p_t(x|x_1) = \mathcal{N}\left(x \mid \alpha_{1-t} x_1, \left(1 - \alpha_{1-t}^2\right) I\right),\text{ where } \alpha_t = e^{-\frac{1}{2}T(t)}, \quad T(t) = \int_0^t \beta(s) \, ds
u_t(x|x_1) = \frac{\alpha_{1-t}'}{1 - \alpha_{1-t}^2}(\alpha_{1-t} x - x_1) = -\frac{T'(1-t)}{2}\left[\frac{e^{-T(1-t)}x - e^{-\frac{1}{2}T(1-t)}x_1}{1 - e^{-T(1-t)}}\right]
All equations
\mu_t(x) = t x_1, \quad \sigma_t(x) = 1 - (1 - \sigma_{\min})t
u_t(x|x_1) = \frac{x_1 - (1 - \sigma_{\min})x}{1 - (1 - \sigma_{\min})t}
\psi_t(x) = (1 - (1 - \sigma_{\min})t)x + t x_1
\mathcal{L}_{\text{CFM}}(\theta) = \mathbb{E}_{t, q(x_1), p(x_0)} \left\| v_t(\psi_t(x_0)) - \left(x_1 - (1 - \sigma_{\min})x_0\right) \right\|^2
p_t = [(1-t)\text{id} + t\psi]_*p_0
\frac{d}{dt}p_t(x) + \operatorname{div}(p_t(x) v_t(x)) = 0
deck
By Rohit Kumar
deck
- 0