Neven Miculinić
Associate prof. Mile Šikić, PhD
The University of Zagreb,
Faculty of electrical engineering and computing
CTC loss
CTC loss
CTC loss
Decoding
Greedy search
Beam search
Training dataset:
Jared's Simpsons R9.4 E.coli
Test dataset:
Ryan's Wick R9.4 Klebsiella pneumoniae
syntax = "proto3";
package dataset;
enum BasePair {
A = 0;
C = 1;
G = 2;
T = 3;
BLANK = 4;
}
enum Cigar {
MATCH = 0;
MISMATCH = 1;
INSERTION = 2; // Insertion, soft clip, hard clip
DELETION = 3; // Deletion, N, P
}
message DataPoint {
message BPConfidenceInterval {
uint64 lower = 1;
uint64 upper = 2;
BasePair pair = 3;
}
repeated float signal = 1;
repeated BasePair basecalled = 2; // What we basecalled
repeated BPConfidenceInterval labels = 3; // labels describe corrected basecalled signal for training
}
identity rate | |
---|---|
minion_b0 | 99.9671 |
minion_b50 | 99.9604 |
chiron_v0.3 | 99.9957 |
albacore_v2.2.7 | 99.9904 |
guppy_v0.5.1 | 99.9907 |