Костров Евгений
Чтение по губам - процесс использования только визуальной информации о движении губ для преобразования речи в текст
*Multi-task Cascaded Convolutional Networks
*The Face Alignment Network (FAN)
3D-2D-CNN BLSTM with character CTC
LipReading with 3D-2D-CNN BLSTM-HMM and word-CTC model - https://arxiv.org/abs/1906.12170
*Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks
Character Error Rate:
model.add(Input(shape=(None, 112, 112, 1), name="input"))
model.add(BatchNormalization(name="batch_norm_input"))
model.add(Conv3D(128, (10, 5, 5), strides=(1, 2, 2), padding="same", name="conv_3d_1", kernel_initializer=he_normal(seed=SEED)))
model.add(ReLU(name="relu_1"))
model.add(BatchNormalization(name="batch_norm_1"))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding="valid", name="max_pool_3d_1"))
model.add(Conv3D(64, (10, 5, 5), strides=(1, 2, 2), padding="same", name="conv_3d_2", kernel_initializer=he_normal(seed=SEED)))
model.add(ReLU(name="relu_2"))
model.add(BatchNormalization(name="batch_norm_2"))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding="valid", name="max_pool_3d_2"))
model.add(Conv2D(256, (5, 5), strides=(1, 1), padding="same", name="conv_2d_1", kernel_initializer=he_normal(seed=SEED)))
model.add(ReLU(name="relu_3"))
model.add(BatchNormalization(name="batch_norm_3"))
model.add(Conv2D(128, (3, 3), strides=(1, 1), padding="same", name="conv_2d_2", kernel_initializer=he_normal(seed=SEED)))
model.add(ReLU(name="relu_4"))
model.add(BatchNormalization(name="batch_norm_4"))
channels = 7
model.add(Conv1D(filters=400, kernel_size=7, strides=1, name="conv1d_1", kernel_initializer='lecun_normal'))
model.add(Dropout(0.2, noise_shape=(channels, 1, 400)))
model.add(BatchNormalization(scale=False))
model.add(Activation('selu'))
model.add(AlphaDropout(0.1))
model.add(Reshape((-1, model.output_shape[2]*model.output_shape[3]*model.output_shape[4]), name="reshape"))
model.add(Dense(100, activation='relu', use_bias=False, kernel_initializer='he_normal'))
model.add(Dense(400, activation='sigmoid', use_bias=False, kernel_initializer='he_normal'))
model.add(Dense(36, name="last"))Adam, lr=1e3