Training Custom Models
This tutorial will guide you through the process of training a custom retina model using OpenRetina.
Overview
Training a model in OpenRetina involves:
- Preparing your dataset
- Defining model architecture
- Configuring training parameters
- Running the training loop
- Evaluating the trained model
Prerequisites
Before starting, ensure you have installed OpenRetina with development dependencies:
pip install -e ".[dev]"
Setting Up Your Data
First, you need to prepare your data or use one of the built-in datasets:
TODO wrong dataloading!! Page was AI generated.
# Import data loading functions
from openretina.data_io.hoefling_2024.dataloaders import natmov_dataloaders_v2
from openretina.data_io.hoefling_2024.responses import get_all_responses
from openretina.data_io.hoefling_2024.stimuli import get_all_movies
# Load responses and movies
responses = get_all_responses()
movies = get_all_movies()
# Create dataloaders with validation clips 0 and 1
dataloaders = natmov_dataloaders_v2(
neuron_data_dictionary=responses,
movies_dictionary=movies,
validation_clip_indices=[0, 1],
batch_size=32
)
# Access specific splits
train_dataloaders = dataloaders["train"]
validation_dataloaders = dataloaders["validation"]
test_dataloaders = dataloaders["test"]
Defining Your Model
Next, define your model architecture:
import torch
from pytorch_lightning import LightningModule
from openretina.modules.core.base_core import Core
from openretina.modules.readout.base import Readout
from openretina.models.core_readout import CoreReadout
class SimpleCore(Core):
def __init__(self, input_channels=2, hidden_channels=32):
super().__init__()
self.features = torch.nn.Sequential(
torch.nn.Conv3d(input_channels, hidden_channels, kernel_size=(5, 3, 3), padding=(2, 1, 1)),
torch.nn.ReLU(),
torch.nn.Conv3d(hidden_channels, hidden_channels, kernel_size=(5, 3, 3), padding=(2, 1, 1)),
torch.nn.ReLU()
)
def forward(self, x):
return self.features(x)
def stimulus_shape(self, time_steps, num_batches=1):
return (num_batches, 2, time_steps, 16, 18)
class SimpleReadout(Readout):
def __init__(self, input_shape, num_neurons=150):
super().__init__()
self.num_neurons = num_neurons
_, channels, _, height, width = input_shape
self.spatial_dims = height * width
self.linear = torch.nn.Linear(channels * height * width, num_neurons)
def forward(self, x):
# x has shape (batch, channels, time, height, width)
batch, channels, time, height, width = x.shape
x = x.permute(0, 2, 1, 3, 4) # (batch, time, channels, height, width)
x = x.reshape(batch * time, channels, height * width)
x = x.reshape(batch * time, channels * height * width)
x = self.linear(x)
x = x.reshape(batch, time, self.num_neurons)
return x
class RetinaLightningModel(LightningModule):
def __init__(self, learning_rate=1e-3):
super().__init__()
# Define the core
self.core = SimpleCore(input_channels=2, hidden_channels=32)
# Define the readout
input_shape = (1, 32, 1, 16, 18) # (batch, channels, time, height, width)
self.readout = SimpleReadout(input_shape, num_neurons=150)
# Combine core and readout
self.model = CoreReadout(core=self.core, readout=self.readout)
# Save hyperparameters
self.learning_rate = learning_rate
self.save_hyperparameters()
def forward(self, x):
return self.model(x)
def training_step(self, batch, batch_idx):
session_key, (stimulus, response) = batch
y_hat = self(stimulus)
# Calculate loss (e.g., Poisson loss for neural data)
loss = torch.nn.functional.poisson_nll_loss(y_hat, response)
# Log the loss
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
session_key, (stimulus, response) = batch
y_hat = self(stimulus)
# Calculate validation metrics
loss = torch.nn.functional.poisson_nll_loss(y_hat, response)
self.log("val_loss", loss)
# Calculate correlation coefficient
with torch.no_grad():
pred = y_hat.reshape(-1, y_hat.shape[-1])
target = response.reshape(-1, response.shape[-1])
corr = calculate_correlation(pred, target)
self.log("val_correlation", corr.mean())
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
def calculate_correlation(pred, target):
"""Calculate correlation coefficient between predictions and targets."""
pred_centered = pred - pred.mean(dim=0, keepdim=True)
target_centered = target - target.mean(dim=0, keepdim=True)
pred_std = torch.sqrt(torch.sum(pred_centered**2, dim=0))
target_std = torch.sqrt(torch.sum(target_centered**2, dim=0))
correlation = torch.sum(pred_centered * target_centered, dim=0) / (pred_std * target_std + 1e-8)
return correlation
Configuring Training with Hydra
OpenRetina uses Hydra for configuration management. Create a configuration file config.yaml
:
# config.yaml
training:
max_epochs: 100
batch_size: 32
learning_rate: 1e-3
model:
core:
type: "SimpleCore"
input_channels: 2
hidden_channels: 32
readout:
type: "SimpleReadout"
num_neurons: 150
data:
dataset: "hoefling_2024"
validation_clip_indices: [0, 1]
batch_size: 32
Running the Training
To train your model:
import hydra
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader
from openretina.data_io.cyclers import LongCycler
@hydra.main(config_path="config", config_name="config")
def train(cfg):
# Initialize model
model = RetinaLightningModel(learning_rate=cfg.training.learning_rate)
# Load data
responses = get_all_responses()
movies = get_all_movies()
dataloaders = natmov_dataloaders_v2(
neuron_data_dictionary=responses,
movies_dictionary=movies,
validation_clip_indices=cfg.data.validation_clip_indices,
batch_size=cfg.data.batch_size
)
# Create cyclers for multi-session training
train_cycler = LongCycler(dataloaders["train"], shuffle=True)
val_cycler = LongCycler(dataloaders["validation"], shuffle=False)
# Wrap cyclers in DataLoader
train_loader = DataLoader(train_cycler, batch_size=None)
val_loader = DataLoader(val_cycler, batch_size=None)
# Define callbacks
checkpoint_callback = ModelCheckpoint(
monitor="val_correlation",
mode="max",
save_top_k=3,
filename="{epoch}-{val_correlation:.4f}"
)
early_stopping = EarlyStopping(
monitor="val_correlation",
patience=10,
mode="max"
)
# Initialize trainer
trainer = Trainer(
max_epochs=cfg.training.max_epochs,
callbacks=[checkpoint_callback, early_stopping],
accelerator="gpu" if torch.cuda.is_available() else "cpu",
devices=1
)
# Train the model
trainer.fit(model, train_loader, val_loader)
# Save the final model
trainer.save_checkpoint("final_model.ckpt")
return model
if __name__ == "__main__":
train()
Evaluating the Trained Model
After training, evaluate your model on a test set:
# Load the best model
best_model = RetinaLightningModel.load_from_checkpoint("best_model.ckpt")
# Create test cycler
test_cycler = LongCycler(dataloaders["test"], shuffle=False)
test_loader = DataLoader(test_cycler, batch_size=None)
# Initialize the trainer for testing
trainer = Trainer(accelerator="gpu" if torch.cuda.is_available() else "cpu", devices=1)
# Test the model
test_results = trainer.test(best_model, test_loader)
print(f"Test results: {test_results}")
Visualizing Model Filters
Visualize what your model has learned:
import matplotlib.pyplot as plt
import numpy as np
from openretina.utils.plotting import plot_stimulus_composition
# Extract filters from the first convolutional layer
filters = best_model.core.features[0].weight.detach().cpu()
# Plot the first few filters
fig, axes = plt.subplots(2, 3, figsize=(12, 8))
for i, ax in enumerate(axes.flat[:5]):
if i < filters.shape[0]:
# Plot spatial filter (first channel, middle time step)
time_idx = filters.shape[2] // 2
spatial_filter = filters[i, 0, time_idx].numpy()
im = ax.imshow(spatial_filter, cmap='RdBu_r')
ax.set_title(f"Filter {i+1}")
ax.axis('off')
plt.colorbar(im, ax=axes.ravel().tolist())
plt.tight_layout()
plt.savefig("model_filters.png")
plt.show()
Tips for Successful Training
- Regularization: Use appropriate regularization to prevent overfitting
- Learning Rate: Start with a conservative learning rate (e.g., 1e-3) and adjust as needed
- Batch Size: Use the largest batch size that fits in your GPU memory
- Data Augmentation: Consider applying data augmentation for more robust models
- Model Complexity: Start with a simple model and gradually increase complexity
Next Steps
After training your model, you can:
- Analyze its behavior using in-silico experiments
- Fine-tune it for specific applications
- Save and share it with the community