~ Image Clustering~
將混在一起的Data進行分類。
# detect is gpu available.
use_gpu = torch.cuda.is_available()
autoencoder = Autoencoder()
# load data and normalize to [-1, 1]
trainX = np.load('release/trainX.npy')
trainX = np.transpose(trainX, (0, 3, 1, 2)) / 255. * 2 - 1
trainX = torch.Tensor(trainX)
# if use_gpu, send model / data to GPU.
if use_gpu:
autoencoder.cuda()
trainX = trainX.cuda()
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)
需要注意的是, 這裡我們要將圖片壓成 [-1~ 1]。
壓成[-1,1]時通常最後過tanh, 如果是[0, 1]的話則是sigmoid。
# We set criterion : L1 loss (or Mean Absolute Error, MAE)
criterion = nn.L1Loss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
這裡我們選用L1 - loss ,
Optimizer則是Adam,ae的模型之後詳述。
Layer | Parameter | Output Shape |
---|---|---|
Encoder Input | - | (3, 32, 32) |
Conv2d | (3, 8, 3, 2, 1) | (8, 16, 16) |
Conv2d | (8, 16, 3, 2, 1) | (16, 8, 8) |
Encoder Output | - | (16, 8, 8) |
Decoder Input | - | (16, 8, 8) |
ConvTranspose2d | (16, 8, 2, 2) | (8, 16, 16) |
ConvTranspose2d | (8, 3, 2, 2) | (3, 32, 32) |
Decoder Output | (3, 32, 32) |
Deconv / Conv 參數:
(input channel, output channel, kernel size, strides, padding )
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# define: encoder
self.encoder = nn.Sequential(
nn.Conv2d(3, 8, 3, 2, 1),
nn.Conv2d(8, 16, 3, 2, 1),
)
# define: decoder
self.decoder = nn.Sequential(
nn.ConvTranspose2d(16, 8, 2, 2),
nn.ConvTranspose2d(8, 3, 2, 2),
nn.Tanh(),
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
# Total AE: return latent & reconstruct
return encoded, decoded
for epoch in range(20):
for x in train_dataloader:
latent, reconstruct = autoencoder(x)
loss = criterion(reconstruct, x)
optimizer.zero_grad()
loss.backward()
optimizer.step()
latents = []
for x in test_dataloader:
latent, reconstruct = autoencoder(x)
latents.append(latent.cpu().detach().numpy())
latents = np.concatenate(latents, axis=0)
latents = latents.reshape([9000, -1])
latents_mean = np.mean(latents, axis=0)
latents_std = np.std(latents, axis=0)
latents = (latents - latents_mean) / latents_std
PCA / SVD / t-SNE ...
from sklearn.decomposition import PCA
latents = PCA(n_components=32).fit_transform(latents)
k-means / knn ...
from sklearn.cluster import KMeans
result = KMeans(n_clusters = 2).fit(latents).labels_
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from torch import optim
from torch.utils.data import DataLoader, Dataset
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# define: encoder
self.encoder = nn.Sequential(
nn.Conv2d(3, 8, 3, 2, 1),
nn.Conv2d(8, 16, 3, 2, 1),
)
# define: decoder
self.decoder = nn.Sequential(
nn.ConvTranspose2d(16, 8, 2, 2),
nn.ConvTranspose2d(8, 3, 2, 2),
nn.Tanh(),
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
# Total AE: return latent & reconstruct
return encoded, decoded
if __name__ == '__main__':
# detect is gpu available.
use_gpu = torch.cuda.is_available()
autoencoder = Autoencoder()
# load data and normalize to [-1, 1]
trainX = np.load('release/trainX.npy')
trainX = np.transpose(trainX, (0, 3, 1, 2)) / 255. * 2 - 1
trainX = torch.Tensor(trainX)
# if use_gpu, send model / data to GPU.
if use_gpu:
autoencoder.cuda()
trainX = trainX.cuda()
# Dataloader: train shuffle = True
train_dataloader = DataLoader(trainX, batch_size=32, shuffle=True)
test_dataloader = DataLoader(trainX, batch_size=32, shuffle=False)
# We set criterion : L1 loss (or Mean Absolute Error, MAE)
criterion = nn.L1Loss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
# Now, we train 20 epochs.
for epoch in range(20):
cumulate_loss = 0
for x in train_dataloader:
latent, reconstruct = autoencoder(x)
loss = criterion(reconstruct, x)
optimizer.zero_grad()
loss.backward()
optimizer.step()
cumulate_loss = loss.item() * x.shape[0]
print(f'Epoch { "%03d" % epoch }: Loss : { "%.5f" % (cumulate_loss / trainX.shape[0])}')
# Collect the latents and stdardize it.
latents = []
reconstructs = []
for x in test_dataloader:
latent, reconstruct = autoencoder(x)
latents.append(latent.cpu().detach().numpy())
reconstructs.append(reconstruct.cpu().detach().numpy())
latents = np.concatenate(latents, axis=0).reshape([9000, -1])
latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0)
# Use PCA to lower dim of latents and use K-means to clustering.
latents = PCA(n_components=32).fit_transform(latents)
result = KMeans(n_clusters = 2).fit(latents).labels_
# We know first 5 labels are zeros, it's a mechanism to check are your answers
# need to be flipped or not.
if np.sum(result[:5]) >= 3:
result = 1 - result
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('haha_submission.csv',index=False)