BIC_ICCV

\text{An Investigation of Unsupervised Cell Tracking }

\text{and Interactive Fine-Tuning }

\text{Bio-Image Computing Workshop, ICCV 2025}

\text{HHMI Janelia Research Campus}

\text{Manan Lalit}

\text{Jan Funke}

\text{A typical cell tracking workflow}

\text{t}

\text{A typical cell tracking workflow}

\text{t}

\text{(for e.g. use a pre-trained}

\text{instance segmentation model)}

\text{A typical cell tracking workflow}

\text{t}

\text{(reserve some data for training and testing}

\text{a tracking model)}

\text{Train Subset}

\text{Test Subset}

\text{A typical cell tracking workflow}

\text{t}

\text{(annotate links on the train subset,}

\text{write as an association matrix)}

\text{Train Subset}

\text{Manually-prepared Association Matrix}

\text{A typical cell tracking workflow}

\text{t}

\text{infer soft-links (association matrices) }

\text{for test subset}

\text{A typical cell tracking workflow}

\text{t}

\text{re-id images}

\text{(for e.g., using a globally constrained }

\text{optimization.)}

\text{A typical cell tracking workflow}

\text{t}

\text{scroll through movie,}

\text{scan for errors and correctly re-id them}.

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{Incorporate new GT links}

\text{in fine-tuning tracker model}

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{fine-tune}

\text{2. Strategies which present }

\text{ranked, ambiguous locations}

\text{are needed!}

\text{1. Acquiring link annotations }

\text{is time-consuming!}

\text{3. How do we best incorporate }

\text{new annotations}

\text{for fine-tuning model?}

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{fine-tune}

\text{1. Acquiring link annotations }

\text{is time-consuming!}

\text{Fully, unsupervised }

\text{\textit{Attrackt} loss.}

\text{2. Strategies which present }

\text{ranked, ambiguous locations}

\text{are needed!}

\text{3. How do we best incorporate }

\text{new annotations}

\text{for fine-tuning model?}

\text{Active Learning Scheme }

\text{which identifies}

\text{cells for proof-reading.}

\text{We present ...}

\text{Fine-tuning Strategy }

\text{which prevents}

\text{over-fitting to few labels.}

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{fine-tune}

\text{1. Acquiring link annotations }

\text{is time-consuming!}

\text{Fully, unsupervised }

\text{\textit{Attrackt} loss.}

\text{2. Strategies which present }

\text{ranked, ambiguous locations}

\text{are needed!}

\text{3. How do we best incorporate }

\text{new annotations}

\text{for fine-tuning model?}

\text{Active Learning Scheme }

\text{which identifies}

\text{cells for proof-reading.}

\text{We present ...}

\text{Fine-tuning Strategy }

\text{which prevents}

\text{over-fitting to few labels.}

\text{Attrackt Loss}

\text{Step One: Train an auto-encoder with reconstruction loss.}

\text{Attrackt Loss}

\text{(Supervised formulation)}

\text{The goal is to learn a forward function that transforms the cell embeddings at $t$}

\text{to become the corresponding cell embeddings at $t{+}k$.}

\text{If this were done in a supervised manner, i.e. $\mathcal{A}^{GT}$ is available, then the goal is to learn $m_{E}$ and $m_{D}$.}

\text{Attrackt Loss}

\text{(Fully Unsupervised)}

\text{If there are predictable changes in cell appearance over time,}

\text{the model will learn to treat the true predecessor of a cell as the signal,}

\text{and this preference will be reflected in the association matrix.}

\text{consider the other unrelated cells as noise,}

\text{Attrackt Loss}

\text{(Fully Unsupervised)}

\text{Trainable tracker follows \textit{Trackastra} model architecture which maps segmentations in a spatio-temporal}

\text{``Trackastra: Transformer-based cell tracking for live-cell microscopy", Gallusser and Weigert, 2024.}

\text{window to an association matrix.}

\text{Globally constrained optimization}

\text{t}

\text{Build a \textit{candidate graph} by connecting each segmentation at $t$}

\text{to its $k$ nearest spatial neighbors at $t{-}1$.}

\text{Globally constrained optimization}

\text{t}

\text{1. Build a \textit{candidate graph} by connecting each segmentation at $t$}

\text{to its $k$ nearest spatial neighbors at $t{-}1$.}

\text{2. Solve for } \hat{X}_{\mathrm{a}}, \hat{X}_{\mathrm{d}}, \hat{X}_{\mathrm{e}} = \mathrm{arg min}_{X} C_{\mathrm{a}} X_{\mathrm{a}} + C_{\mathrm{d}} X_{\mathrm{d}} + C_{\mathrm{e}} X_{\mathrm{e}},

\text{where }X_{\mathrm{a}}, X_{\mathrm{d}}, X_{\mathrm{e}} \in \{0,1\},

\text{and }C_{\mathrm{e}} = w_{1} \left\|d_{1}\right\|_{2} + w_{2} \left\|d_{2}\right\|_{2} + b.

k=3

\text{Multi-Object Tracker using Integer-Linear Equations}

github.com/funkelab/motile

\text{$C_a$ : Cost of appearance of a new tracklet}

\text{$C_d$ : Cost of disappearance of an existing tracklet}

\text{$C_e$ : Cost of an edge}

\text{``Automated reconstruction of whole-embryo cell lineages}

\text{by learning from sparse annotations", Malin-Mayor \text{et al}, 2023.}

\text{Results: Unsupervised Training}

\text{FP}_{\text{e}}

\text{FN}_{\text{e}}

\text{Results: Unsupervised Training}

\text{Visualizations created using:}

github.com/bentaculum/divisualisation

\text{FP}_{\text{e}}

\text{FN}_{\text{e}}

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{fine-tune}

\text{1. Acquiring link annotations }

\text{is time-consuming!}

\text{Fully, unsupervised }

\text{\textit{Attrackt} loss.}

\text{2. Strategies which present }

\text{ranked, ambiguous locations}

\text{are needed!}

\text{3. How do we best incorporate }

\text{new annotations}

\text{for fine-tuning model?}

\text{Active Learning Scheme }

\text{which identifies}

\text{cells for proof-reading.}

\text{We present ...}

\text{Fine-tuning Strategy }

\text{which prevents}

\text{over-fitting to few labels.}

\text{Active-Learning Scheme}

\text{Compare against randomly selecting nodes}

\text{score}(v) = \left| \sum \mathcal{A}_{\text{GT}}(v) - \sum \mathcal{A}_{\text{solver}}(v) \right| .

\text{Identify ambiguous nodes! }

\text{Rank nodes based on difference between G.T. and ILP solution. }

\text{Instead use, }\text{score}(v) = \left| \sum \mathcal{A}(v) - \sum \mathcal{A}_{\text{solver}}(v) \right| .

\text{In reality, one doesn't have access to the $\mathcal{A}_{\text{GT}} (v)$. }

\text{(Optimal Sampling)}

\text{(Confidence-based Sampling)}

\text{(Random Sampling)}

\text{$\sum \mathcal{A}_{\text{GT}} (v)$: Number of outgoing edges from node $v$ in G.T.}

\text{$\sum \mathcal{A}_{\text{solver}} (v)$: Number of outgoing edges from node $v$ in ILP solution}

\text{$\sum \mathcal{A} (v)$: Sum of ougoing associations for node $v$ predicted by}

\text{unsupervised tracker.}

\text{Instance Segment Cells}

\text{Infer and Re-Id on dataset}

\text{Scroll and Scan for Errors}

\text{Fix Errors}

\text{Prepare GT links for}

\text{training a tracker model}

\text{fine-tune}

\text{1. Acquiring link annotations }

\text{is time-consuming!}

\text{Fully, unsupervised }

\text{\textit{Attrackt} loss.}

\text{2. Strategies which present }

\text{ranked, ambiguous locations}

\text{are needed!}

\text{3. How do we best incorporate }

\text{new annotations}

\text{for fine-tuning model?}

\text{Active Learning Scheme }

\text{which identifies}

\text{cells for proof-reading.}

\text{We present ...}

\text{Fine-tuning Strategy }

\text{which prevents}

\text{over-fitting to few labels.}

\text{Interactive Fine-Tuning}

\text{1. Use LoRA to train a fraction of weights.}

\text{2. Fine-tuning objective: $\mathcal{L}_{\text{fine-tuning}} = \mathcal{L}_{\text{unsup.}} + \mathcal{L}_{\text{sup.}} + \mathcal{L}_{\text{pseudo-sup.}}$}

\text{``LoRA: Low-Rank Adaptation of Large Language Models", Hu \textit{et al}, 2021.}

\text{Compare predicted associations for G.T. associations}

\text{available for top $k$ ambiguous nodes.}

\text{Simulate proof-reading by selecting the top $k$ ambiguous nodes }

\text{and using the outgoing G.T. links for these nodes. }

\text{Interactive Fine-Tuning}

\text{1. Use LoRA to train a fraction of weights.}

\text{2. Fine-tuning objective: $\mathcal{L}_{\text{fine-tuning}} = \mathcal{L}_{\text{unsup.}} + \mathcal{L}_{\text{sup.}} + \mathcal{L}_{\text{pseudo-sup.}}$}

\text{``LoRA: Low-Rank Adaptation of Large Language Models", Hu \textit{et al}, 2021.}

\text{Compare predicted associations with associations}

\text{inferred by solver and available for bottom $10\%$}

\text{ambiguous nodes.}

\text{Results: Interactive Fine-Tuning}