EXPERIMENT-1
AIM:
To Implement a basic autoencoder using TensorFlow or PyTorch and train it on a dataset like
MNIST for image reconstruction.
DESCRIPTION:
In this experiment, a basic autoencoder was implemented using fully connected layers on
the MNIST handwritten digits dataset. The autoencoder was designed to compress the
28×28 pixel input images into a 32-dimensional latent space and then reconstruct them
back. The model was trained for 5 epochs using the Adam optimizer with a learning rate of
0.001 and Mean Squared Error (MSE) as the reconstruction loss. After training, the
autoencoder was able to reconstruct digits with reasonable clarity, preserving the overall
shape of the digits while losing some fine pixel details.
CODE:
# STEP 1: Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
# STEP 2: Load MNIST dataset
transform = transforms.ToTensor()
train_dataset = torchvision.datasets.MNIST(root="./data", train=True, transform=transform,
download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataset = torchvision.datasets.MNIST(root="./data", train=False, transform=transform,
download=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)
# STEP 3: Define Autoencoder model
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Linear(28*28, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 32) # compressed representation
)
# Decoder
self.decoder = nn.Sequential(
nn.Linear(32, 64),
nn.ReLU(),
nn.Linear(64, 128),
nn.ReLU(),
nn.Linear(128, 28*28),
nn.Sigmoid() # output in range [0,1]
)
def forward(self, x):
x = x.view(-1, 28*28) # flatten image
encoded = self.encoder(x)
decoded = self.decoder(encoded)
decoded = decoded.view(-1, 1, 28, 28) # reshape back to image
return decoded
# STEP 4: Initialize model, loss, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# STEP 5: Train the model
epochs = 5
for epoch in range(epochs):
for images, _ in train_loader:
images = images.to(device)
outputs = model(images)
loss = criterion(outputs, images)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
# STEP 6: Test and visualize reconstructed images
model.eval()
with torch.no_grad():
for images, _ in test_loader:
images = images.to(device)
outputs = model(images)
break # take only first batch
# Plot original vs reconstructed
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
# Original
ax = plt.subplot(2, n, i+1)
plt.imshow(images[i].cpu().squeeze(), cmap="gray")
plt.axis("off")
# Reconstructed
ax = plt.subplot(2, n, i+1+n)
plt.imshow(outputs[i].cpu().squeeze(), cmap="gray")
plt.axis("off")
plt.show()
OUTPUT:
100%|██████████| 9.91M/9.91M [00:01<00:00, 5.00MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 131kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.07MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.39MB/s]
Epoch [1/5], Loss: 0.0359
Epoch [2/5], Loss: 0.0274
Epoch [3/5], Loss: 0.0223
Epoch [4/5], Loss: 0.0208
Epoch [5/5], Loss: 0.0177
EXPERIMENT-2
AIM:
To Explore different regularization techniques such as L1/L2 regularization or dropout and
compare their effects on the autoencoder's performance.
DESCRIPTION:
L2 regularization (weight decay): Penalizes large weights, encouraging smoother
reconstructions and reducing overfitting.
L1 regularization: Promotes sparsity in the model’s weights, leading to more compressed
representations but slightly faded reconstructions.
Dropout: Randomly deactivates neurons during training, making the model more robust but
often producing slightly blurrier outputs.
CODE:
# =======================
# Autoencoder Regularization Experiment on MNIST
# =======================
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
# =======================
# STEP 1: Load Dataset
# =======================
transform = transforms.ToTensor()
train_dataset = torchvision.datasets.MNIST(root="./data", train=True, transform=transform,
download=True)
test_dataset = torchvision.datasets.MNIST(root="./data", train=False, transform=transform,
download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# =======================
# STEP 2: Define Models
# =======================
class Autoencoder(nn.Module):
def __init__(self, use_dropout=False):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(28*28, 128),
nn.ReLU(),
nn.Dropout(0.2) if use_dropout else nn.Identity(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 32)
)
self.decoder = nn.Sequential(
nn.Linear(32, 64),
nn.ReLU(),
nn.Dropout(0.2) if use_dropout else nn.Identity(),
nn.Linear(64, 128),
nn.ReLU(),
nn.Linear(128, 28*28),
nn.Sigmoid()
)
def forward(self, x):
x = x.view(-1, 28*28)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
decoded = decoded.view(-1, 1, 28, 28)
return decoded
# =======================
# STEP 3: Training Function
# =======================
def train_model(reg_type="baseline", epochs=5, l1_lambda=1e-5, weight_decay=0):
model = Autoencoder(use_dropout=(reg_type=="dropout")).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=weight_decay)
train_losses, test_losses = [], []
for epoch in range(epochs):
model.train()
running_loss = 0
for images, _ in train_loader:
images = images.to(device)
outputs = model(images)
# Base loss
loss = criterion(outputs, images)
# Add L1 if chosen
if reg_type == "l1":
l1_norm = sum(p.abs().sum() for p in model.parameters())
loss = loss + l1_lambda * l1_norm
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
train_losses.append(running_loss/len(train_loader))
# Evaluate on test set
model.eval()
test_loss = 0
with torch.no_grad():
for images, _ in test_loader:
images = images.to(device)
outputs = model(images)
test_loss += criterion(outputs, images).item()
test_losses.append(test_loss/len(test_loader))
print(f"[{reg_type.upper()}] Epoch {epoch+1}/{epochs}, Train Loss: {train_losses[-1]:.4f},
Test Loss: {test_losses[-1]:.4f}")
return model, train_losses, test_losses
# =======================
# STEP 4: Run Experiments
# =======================
epochs = 5
results = {}
# Baseline
results["baseline"] = train_model("baseline", epochs=epochs)
# L2
results["l2"] = train_model("l2", epochs=epochs, weight_decay=1e-5)
# L1
results["l1"] = train_model("l1", epochs=epochs, l1_lambda=1e-5)
# Dropout
results["dropout"] = train_model("dropout", epochs=epochs)
# =======================
# STEP 5: Compare Loss Curves
# =======================
plt.figure(figsize=(10,5))
for key, (_, train_losses, test_losses) in results.items():
plt.plot(train_losses, label=f"{key}-train")
plt.plot(test_losses, '--', label=f"{key}-test")
plt.title("Train vs Test Loss")
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.legend()
plt.show()
# =======================
# STEP 6: Visual Comparison of Reconstructions
# =======================
def show_reconstructions(models_dict):
test_iter = iter(test_loader)
images, _ = next(test_iter)
images = images[:10].to(device) # take 10 test images
plt.figure(figsize=(20, 8))
n_models = len(models_dict)
for idx, (name, (model, _, _)) in enumerate(models_dict.items()):
model.eval()
with torch.no_grad():
outputs = model(images)
for i in range(10):
ax = plt.subplot(n_models+1, 10, i+1) # original row
if idx == 0:
plt.imshow(images[i].cpu().squeeze(), cmap="gray")
if i == 0: ax.set_ylabel("Original")
plt.axis("off")
ax = plt.subplot(n_models+1, 10, (idx+1)*10 + i + 1)
plt.imshow(outputs[i].cpu().squeeze(), cmap="gray")
if i == 0: ax.set_ylabel(name.upper())
plt.axis("off")
plt.show()
show_reconstructions(results)
OUTPUT:
[BASELINE] Epoch 1/5, Train Loss: 0.0619, Test Loss: 0.0380
[BASELINE] Epoch 2/5, Train Loss: 0.0333, Test Loss: 0.0290
[BASELINE] Epoch 3/5, Train Loss: 0.0267, Test Loss: 0.0238
[BASELINE] Epoch 4/5, Train Loss: 0.0224, Test Loss: 0.0205
[BASELINE] Epoch 5/5, Train Loss: 0.0198, Test Loss: 0.0184
[L2] Epoch 1/5, Train Loss: 0.0619, Test Loss: 0.0399
[L2] Epoch 2/5, Train Loss: 0.0351, Test Loss: 0.0315
[L2] Epoch 3/5, Train Loss: 0.0299, Test Loss: 0.0273
[L2] Epoch 4/5, Train Loss: 0.0263, Test Loss: 0.0248
[L2] Epoch 5/5, Train Loss: 0.0239, Test Loss: 0.0224
[L1] Epoch 1/5, Train Loss: 0.0908, Test Loss: 0.0646
[L1] Epoch 2/5, Train Loss: 0.0696, Test Loss: 0.0628
[L1] Epoch 3/5, Train Loss: 0.0664, Test Loss: 0.0620
[L1] Epoch 4/5, Train Loss: 0.0648, Test Loss: 0.0617
[L1] Epoch 5/5, Train Loss: 0.0641, Test Loss: 0.0616
[DROPOUT] Epoch 1/5, Train Loss: 0.0630, Test Loss: 0.0393
[DROPOUT] Epoch 2/5, Train Loss: 0.0367, Test Loss: 0.0294
[DROPOUT] Epoch 3/5, Train Loss: 0.0320, Test Loss: 0.0262
[DROPOUT] Epoch 4/5, Train Loss: 0.0298, Test Loss: 0.0245
[DROPOUT] Epoch 5/5, Train Loss: 0.0284, Test Loss: 0.0229
EXPERIMENT-3
AIM:
Implement a variational autoencoder (VAE) and train it on a dataset like FashionMNIST to
generate new images.
DESCRIPTION:
Variational Autoencoder (VAE) was implemented and trained on the FashionMNIST
dataset to go beyond reconstruction and enable generation of new images. Unlike the
basic autoencoder, the VAE learns a probabilistic latent space where each input is
encoded as a distribution (mean and variance) instead of a single point. A
reparameterization trick was used to sample latent vectors during training.The architecture
used convolutional layers for the encoder and decoder.
CODE:
# ===========================
# FashionMNIST Variational Autoencoder (PyTorch)
# ===========================
import os
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, utils
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from datetime import datetime
torch.manual_seed(42)
# ---------------------------
# Config
# ---------------------------
batch_size = 128
epochs = 10
lr = 2e-3
latent_dim = 16
beta = 1.0 # set >1.0 for β-VAE
kl_warmup_epochs = 3 # linearly ramp KL from 0 → beta during first N epochs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
save_dir = "./vae_fmnist_runs"
os.makedirs(save_dir, exist_ok=True)
# ---------------------------
# Data
# ---------------------------
transform = transforms.ToTensor() # outputs in [0,1], good for BCE
train_ds = datasets.FashionMNIST(root="./data", train=True, download=True,
transform=transform)
test_ds = datasets.FashionMNIST(root="./data", train=False, download=True,
transform=transform)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2,
pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2,
pin_memory=True)
# ---------------------------
# Model (Conv VAE)
# ---------------------------
class VAE(nn.Module):
def __init__(self, z_dim=16):
super().__init__()
# Encoder: 1x28x28 -> (features)
self.enc = nn.Sequential(
nn.Conv2d(1, 32, 4, stride=2, padding=1), # 32x14x14
nn.ReLU(inplace=True),
nn.Conv2d(32, 64, 4, stride=2, padding=1), # 64x7x7
nn.ReLU(inplace=True),
nn.Conv2d(64, 128, 3, stride=1, padding=1),# 128x7x7
nn.ReLU(inplace=True)
)
self.enc_flat = nn.Flatten() # 128*7*7 = 6272
self.fc_mu = nn.Linear(6272, z_dim)
self.fc_logvar= nn.Linear(6272, z_dim)
# Decoder: z -> 1x28x28
self.fc_dec = nn.Linear(z_dim, 128*7*7)
self.dec = nn.Sequential(
nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1), # 64x14x14
nn.ReLU(inplace=True),
nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1), # 32x28x28
nn.ReLU(inplace=True),
nn.Conv2d(32, 1, 3, stride=1, padding=1), # 1x28x28
nn.Sigmoid() # output in [0,1] for BCE
)
def encode(self, x):
h = self.enc(x)
h = self.enc_flat(h)
mu, logvar = self.fc_mu(h), self.fc_logvar(h)
return mu, logvar
def reparameterize(self, mu, logvar):
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
def decode(self, z):
h = self.fc_dec(z)
h = h.view(-1, 128, 7, 7)
return self.dec(h)
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterize(mu, logvar)
x_hat = self.decode(z)
return x_hat, mu, logvar
model = VAE(z_dim=latent_dim).to(device)
# ---------------------------
# Loss (ELBO: BCE + KL)
# ---------------------------
def elbo_loss(x_hat, x, mu, logvar, kl_weight=1.0):
# Reconstruction: sum over pixels then mean over batch for stable KL scale
# (You can use reduction='sum' and divide by batch if you prefer.)
bce = F.binary_cross_entropy(x_hat, x, reduction='sum') / x.size(0)
# KL divergence (closed-form): D_KL(q(z|x) || p(z)) where p(z)=N(0,I)
kl = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / x.size(0)
return bce + kl_weight * kl, bce, kl
# ---------------------------
# Optimizer
# ---------------------------
opt = optim.Adam(model.parameters(), lr=lr)
# ---------------------------
# Training / Evaluation
# ---------------------------
def evaluate(dataloader, kl_weight):
model.eval()
total, total_bce, total_kl = 0.0, 0.0, 0.0
with torch.no_grad():
for x, _ in dataloader:
x = x.to(device)
x_hat, mu, logvar = model(x)
loss, bce, kl = elbo_loss(x_hat, x, mu, logvar, kl_weight)
total += loss.item()
total_bce += bce.item()
total_kl += kl.item()
n = len(dataloader)
return total / n, total_bce / n, total_kl / n
train_hist, test_hist = [], []
for epoch in range(1, epochs+1):
# KL warmup (0 → beta over kl_warmup_epochs)
if kl_warmup_epochs > 0:
kl_weight = beta * min(1.0, epoch / kl_warmup_epochs)
else:
kl_weight = beta
model.train()
running, running_bce, running_kl = 0.0, 0.0, 0.0
for x, _ in train_loader:
x = x.to(device)
x_hat, mu, logvar = model(x)
loss, bce, kl = elbo_loss(x_hat, x, mu, logvar, kl_weight)
opt.zero_grad()
loss.backward()
opt.step()
running += loss.item()
running_bce += bce.item()
running_kl += kl.item()
tr_loss = running / len(train_loader)
tr_bce = running_bce / len(train_loader)
tr_kl = running_kl / len(train_loader)
te_loss, te_bce, te_kl = evaluate(test_loader, kl_weight)
train_hist.append((tr_loss, tr_bce, tr_kl, kl_weight))
test_hist.append((te_loss, te_bce, te_kl, kl_weight))
print(f"Epoch {epoch:02d}/{epochs} | "
f"KLw={kl_weight:.3f} | "
f"Train: loss={tr_loss:.3f} (bce={tr_bce:.3f}, kl={tr_kl:.3f}) | "
f"Test: loss={te_loss:.3f} (bce={te_bce:.3f}, kl={te_kl:.3f})")
# ---------------------------
# Reconstructions (qualitative)
# ---------------------------
model.eval()
x, _ = next(iter(test_loader))
x = x[:16].to(device)
with torch.no_grad():
x_hat, _, _ = model(x)
grid = utils.make_grid(torch.cat([x.cpu(), x_hat.cpu()], dim=0), nrow=16, padding=2)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
recon_path = os.path.join(save_dir, f"recon_{ts}.png")
utils.save_image(grid, recon_path)
plt.figure(figsize=(12, 4))
plt.title("Top: original | Bottom: reconstruction")
plt.imshow(grid.permute(1, 2, 0), cmap="gray")
plt.axis("off")
plt.show()
print(f"Saved reconstructions → {recon_path}")
# ---------------------------
# Sampling new images from prior z~N(0,I)
# ---------------------------
with torch.no_grad():
z = torch.randn(64, latent_dim).to(device)
samples = model.decode(z).cpu()
sample_grid = utils.make_grid(samples, nrow=8, padding=2)
sample_path = os.path.join(save_dir, f"samples_{ts}.png")
utils.save_image(sample_grid, sample_path)
plt.figure(figsize=(8, 8))
plt.title("Generated samples from prior")
plt.imshow(sample_grid.permute(1, 2, 0), cmap="gray")
plt.axis("off")
plt.show()
print(f"Saved samples → {sample_path}")
# ---------------------------
# Latent Interpolation (fun extra)
# ---------------------------
def interpolate(z1, z2, steps=10):
alphas = torch.linspace(0, 1, steps, device=z1.device)
return torch.stack([(1-a)*z1 + a*z2 for a in alphas], dim=0)
with torch.no_grad():
x_pair, _ = next(iter(test_loader))
a, b = x_pair[0:1].to(device), x_pair[1:2].to(device)
mu1, logvar1 = model.encode(a)
mu2, logvar2 = model.encode(b)
z1 = model.reparameterize(mu1, logvar1)
z2 = model.reparameterize(mu2, logvar2)
z_path = interpolate(z1, z2, steps=12).view(-1, latent_dim)
interp_imgs = model.decode(z_path).cpu()
interp_grid = utils.make_grid(interp_imgs, nrow=12, padding=2)
interp_path = os.path.join(save_dir, f"interpolation_{ts}.png")
utils.save_image(interp_grid, interp_path)
plt.figure(figsize=(14, 2))
plt.title("Latent interpolation")
plt.imshow(interp_grid.permute(1, 2, 0), cmap="gray")
plt.axis("off")
plt.show()
print(f"Saved interpolation → {interp_path}")
OUTPUT:
100%|██████████| 26.4M/26.4M [00:01<00:00, 20.8MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 341kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 6.20MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 13.0MB/s]
Epoch 01/10 | KLw=0.333 | Train: loss=261.640 (bce=252.647, kl=26.980) | Test: loss=235.369
(bce=225.651, kl=29.152)
Epoch 02/10 | KLw=0.667 | Train: loss=239.617 (bce=224.292, kl=22.988) | Test: loss=238.817
(bce=223.652, kl=22.747)
Epoch 03/10 | KLw=1.000 | Train: loss=243.470 (bce=224.881, kl=18.589) | Test: loss=244.064
(bce=227.098, kl=16.966)
Epoch 04/10 | KLw=1.000 | Train: loss=241.623 (bce=223.703, kl=17.920) | Test: loss=242.497
(bce=225.012, kl=17.485)
Epoch 05/10 | KLw=1.000 | Train: loss=240.609 (bce=222.907, kl=17.702) | Test: loss=241.773
(bce=224.204, kl=17.569)
Epoch 06/10 | KLw=1.000 | Train: loss=239.785 (bce=222.283, kl=17.502) | Test: loss=241.349
(bce=223.788, kl=17.561)
Epoch 07/10 | KLw=1.000 | Train: loss=239.252 (bce=221.841, kl=17.410) | Test: loss=240.661
(bce=223.559, kl=17.102)
Epoch 08/10 | KLw=1.000 | Train: loss=238.795 (bce=221.429, kl=17.366) | Test: loss=240.623
(bce=222.759, kl=17.865)
Epoch 09/10 | KLw=1.000 | Train: loss=238.494 (bce=221.173, kl=17.322) | Test: loss=239.935
(bce=222.524, kl=17.411)
Epoch 10/10 | KLw=1.000 | Train: loss=238.147 (bce=220.908, kl=17.239) | Test: loss=239.664
(bce=222.118, kl=17.546)