Pytorch Autoencoder
Autoencoder (Autoencoder, AE) is an unsupervised learning neural network that learns to compress input data into a low-dimensional latent space, and then reconstruct the original data from the compressed representation.
Autoencoders are widely used in data dimensionality reduction, feature extraction, anomaly detection, image denoising, generative models, and other scenarios.
* * *
## 1. Basic Principles of Autoencoders
The basic structure of an autoencoder contains three parts:
* **Encoder**: Maps input data \(x\) to a low-dimensional latent representation \(z\)
* **Latent Space**: The low-dimensional vector output by the encoder, also called the bottleneck layer
* **Decoder**: Reconstructs the latent representation \(z\) into output \(\hat{x}\)
### 1.1 Network Structure
The goal of the autoencoder is to make the output \(\hat{x}\) as close as possible to the input \(x\):
$$
\underset{\theta , \phi}{\min β‘} \frac{1}{n} \sum_{i = 1}^{n} \parallel x_{i} - D_{\phi} \left(\right. E_{\theta} \left(\right. x_{i} \left.\right) \left.\right) \parallel^{2}
$$
Where \(\theta\) are the encoder parameters, and \(\phi\) are the decoder parameters.
### 1.2 Dimensionality Reduction Effect
Autoencoders learn compressed representations of data by forcing data through a bottleneck layer with smaller dimensions than the input. This compression preserves the main information of the data.
> Compared with Principal Component Analysis (PCA), autoencoders can learn nonlinear dimensionality reduction and capture more complex data structures.
* * *
## 2. Basic Autoencoder Implementation
### 2.1 Simple Autoencoder
## Example
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# ββ Autoencoder Model βββββββββββββββββββββββββββββββββ
class Autoencoder(nn.Module):
"""
Basic Autoencoder: Symmetric Structure
"""
def __init__ (self, input_dim, hidden_dim, latent_dim):
super(). __init__ ()
# Encoder
self.encoder= nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, latent_dim),# Bottleneck layer
)
# Decoder
self.decoder= nn.Sequential(
nn.Linear(latent_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim),
)
def forward(self, x):
z =self.encoder(x)
x_recon =self.decoder(z)
return x_recon
def encode(self, x):
"""Encode: Get latent representation"""
return self.encoder(x)
def decode(self, z):
"""Decode: Reconstruct from latent representation"""
return self.decoder(z)
# ββ Usage Example ββββββββββββββββββββββββββββββββββββββ
INPUT_DIM =784# e.g., flattened MNIST image
HIDDEN_DIM =256
LATENT_DIM =32# Latent space dimension, much smaller than input dimension
model = Autoencoder(INPUT_DIM, HIDDEN_DIM, LATENT_DIM)
print(f"Input dimension: {INPUT_DIM}")
print(f"Latent dimension: {LATENT_DIM}")
print(f"Compression ratio: {INPUT_DIM / LATENT_DIM:.1f}x")
# View parameter count
total_params =sum(p.numel()for p in model.parameters())
print(f"Total parameters: {total_params:,}")
### 2.2 Convolutional Autoencoder
For image data, autoencoders using convolutional layers perform better:
## Example
import torch
import torch.nn as nn
class ConvAutoencoder(nn.Module):
"""
Convolutional Autoencoder: Suitable for images
"""
def __init__ (self, channels=3, latent_dim=128):
super(). __init__ ()
# Encoder: Gradually reduce size, increase channels
# Input: (batch, channels, 64, 64)
self.encoder= nn.Sequential(
# 32 -> 16
nn.Conv2d(channels,32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
# 16 -> 8
nn.Conv2d(32,64, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
# 8 -> 4
nn.Conv2d(64,128, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
# 4 -> 2
nn.Conv2d(128,256, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
)
# Latent space mapping
self.to_latent= nn.AdaptiveAvgPool2d((1,1))
# Decoder: Gradually increase size
# Input: (batch, 256, 2, 2)
self.from_latent= nn.Sequential(
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
nn.Conv2d(256,128, kernel_size=3, padding=1),
nn.ReLU(),
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
nn.Conv2d(128,64, kernel_size=3, padding=1),
nn.ReLU(),
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
nn.Conv2d(64,32, kernel_size=3, padding=1),
nn.ReLU(),
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
nn.Conv2d(32, channels, kernel_size=3, padding=1),
nn.Sigmoid()# Output [0, 1]
)
def forward(self, x):
z =self.encode(x)
x_recon =self.decode(z)
return x_recon
def encode(self, x):
"""Encode"""
features =self.encoder(x)
z =self.to_latent(features)
z = z.view(z.size(0), -1)# (batch, 256)
return z
def decode(self, z):
"""Decode"""
# Reshape vector to feature map
batch_size = z.size(0)
z = z.view(batch_size,256,1,1)
z = z.expand(-1, -1,2,2)# Upsample to 2x2
x_recon =self.from_latent(z)
return x_recon
# Test
model = ConvAutoencoder(channels=3, latent_dim=128)
x = torch.randn(4,3,64,64)
x_recon = model(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {x_recon.shape}")
print(f"Latent vector shape: {model.encode(x).shape}")
### 2.3 Training and Reconstruction
## Example
import torch
import torch.nn as nn
import torch.optim as optim
# ββ Training Configuration βββββββββββββββββββββββββββββββββββββ
device = torch.device("cuda"if torch.cuda.is_available()else"cpu")
model = ConvAutoencoder(channels=3, latent_dim=128).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# ββ Training Loop βββββββββββββββββββββββββββββββββββββ
def train_autoencoder(model, dataloader, criterion, optimizer, num_epochs=10):
model.train()
for epoch in range(num_epochs):
total_loss =0
for batch in dataloader:
images = batch.to(device)
# Forward propagation
outputs = model(images)
loss = criterion(outputs, images)
# Backward propagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.6f}")
return model
# Assuming dataloader already exists
# train_autoencoder(model, train_loader, criterion, optimizer, num_epochs=10)
print("Autoencoder training completed!")
* * *
## 3. Denoising Autoencoder (DAE)
Denoising Autoencoder (DAE) adds noise to the input during training, then learns to remove the noise and recover the original input. This enables the model to learn more robust feature representations.
### 3.1 Denoising Autoencoder Implementation
## Example
import torch
import torch.nn as nn
class DenoisingAutoencoder(nn.Module):
"""
Denoising Autoencoder
"""
def __init__ (self, input_dim, hidden_dim, latent_dim):
super(). __init__ ()
self.encoder= nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, latent_dim),
)
self.decoder= nn.Sequential(
nn.Linear(latent_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim),
nn.Sigmoid()# Output [0, 1]
)
def forward(self, x):
z =self.encode(x)
return self.decode(z)
def encode(self, x):
return self.encoder(x)
def decode(self, z):
return self.decoder(z)
def add_noise(x, noise_factor=0.3):
"""
Add Gaussian noise
"""
noise = torch.randn_like(x) * noise_factor
noisy_x = x + noise
return torch.clamp(noisy_x,0.0,1.0)
# Train denoising autoencoder
def train_dae(model, dataloader, noise_factor=0.3, lr=1e-3):
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
model.train()
for epoch in range(10):
for batch in dataloader:
images = batch
# Add noise
noisy_images = add_noise(images, noise_factor)
noisy_images = noisy_images.to(next(model.parameters()).device)
images = images.to(next(model.parameters()).device)
# Forward propagation
outputs = model(noisy_images)
loss = criterion(outputs, images)# Compare with original image, not noisy image
# Backward propagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
return model
### 3.2 Other Noise Types
## Example
def salt_pepper_noise(x, prob=0.1):
"""Salt and pepper noise"""
random_mask = torch.rand_like(x)
noisy = x.clone()
noisy[random_mask < prob / 2]=0.0
noisy[random_mask >1 - prob / 2]=1.0
return noisy
def mask_noise(x, prob=0.1):
"""Masking noise (random zeroing)"""
mask = torch.rand_like(x)> prob
return x * mask.float()
def dropout_noise(x, rate=0.2):
"""Dropout noise"""
mask = torch.rand_like(x)> rate
return x * mask.float() / (1 - rate)
* * *
## 4. Variational Autoencoder (VAE)
Variational Autoencoder (VAE) is a generative model that encodes data into a probability distribution in latent space, rather than a fixed vector. This allows us to sample from latent space to generate new data.
### 4.1 VAE Core Principles
The key innovation of VAE is learning the probability distribution of latent variables:
* The encoder outputs mean \(\mu\) and standard deviation \(\sigma\)
* Sample latent vector \(z\) from normal distribution \(\mathcal{N} \left(\right. \mu , \sigma \left.\right)\)
* The decoder reconstructs data from \(z\)
To achieve a differentiable sampling process, the **Reparameterization Trick** is used:
$$
z = \mu + \sigma \cdot \epsilon , \epsilon \sim \mathcal{N} \left(\right. 0 , 1 \left.\right)
$$
### 4.2 VAE Implementation
## Example
import torch
import torch.nn as nn
import torch.optim as optim
class VAE(nn.Module):
"""
Variational Autoencoder
"""
def __init__ (self, input_dim, hidden_dim, latent_dim):
super(). __init__ ()
# Encoder: Output mean and log variance
self.encoder= nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
)
self.fc_mu= nn.Linear(hidden_dim, latent_dim)
self.fc_logvar= nn.Linear(hidden_dim, latent_dim)
# Decoder
self.decoder= nn.Sequential(
nn.Lin
YouTip