Examples
This page contains comprehensive examples for common use cases.
Example 1: Basic 2D Image Processing
Complete workflow for processing 2D images:
import torch
from qlty import NCYXQuilt
# Setup
quilt = NCYXQuilt(
Y=256, X=256,
window=(64, 64),
step=(32, 32), # 50% overlap
border=(8, 8),
border_weight=0.1
)
# Load data
images = torch.randn(20, 3, 256, 256)
# Split into patches
patches = quilt.unstitch(images)
print(f"Created {patches.shape[0]} patches from {images.shape[0]} images")
# Process patches
processed_patches = your_model(patches)
# Stitch back together
reconstructed, weights = quilt.stitch(processed_patches)
assert reconstructed.shape[0] == images.shape[0]
Example 2: Training with Input-Output Pairs
Training a model on unstitched patches:
from qlty import NCYXQuilt
import torch
quilt = NCYXQuilt(Y=128, X=128, window=(32, 32), step=(16, 16), border=(5, 5))
# Training data
input_images = torch.randn(100, 3, 128, 128)
target_labels = torch.randn(100, 128, 128)
# Unstitch pairs
input_patches, target_patches = quilt.unstitch_data_pair(input_images, target_labels)
# Training loop
model.train()
optimizer = torch.optim.Adam(model.parameters())
for inp, tgt in zip(input_patches, target_patches):
optimizer.zero_grad()
output = model(inp.unsqueeze(0))
loss = criterion(output, tgt.unsqueeze(0))
loss.backward()
optimizer.step()
Example 3: Large Dataset with Disk Caching
Processing datasets too large for memory:
from qlty import LargeNCYXQuilt
import torch
import tempfile
import os
# Setup
temp_dir = tempfile.mkdtemp()
filename = os.path.join(temp_dir, "large_dataset")
quilt = LargeNCYXQuilt(
filename=filename,
N=1000, # 1000 images
Y=1024, X=1024, # Large images
window=(256, 256),
step=(128, 128),
border=(20, 20),
border_weight=0.1
)
# Load data (or iterate through dataset)
data = torch.randn(1000, 3, 1024, 1024)
# Process all chunks
print(f"Processing {quilt.N_chunks} chunks...")
for i in range(quilt.N_chunks):
if i % 100 == 0:
print(f"Progress: {i}/{quilt.N_chunks}")
index, patch = quilt.unstitch_next(data)
# Process patch
with torch.no_grad():
processed = model(patch.unsqueeze(0))
# Accumulate
quilt.stitch(processed, index)
# Get final results
mean_result = quilt.return_mean()
mean_result, std_result = quilt.return_mean(std=True)
print(f"Final shape: {mean_result.shape}")
# Cleanup
for suffix in ["_mean_cache.zarr", "_std_cache.zarr", "_norma_cache.zarr",
"_mean.zarr", "_std.zarr"]:
path = filename + suffix
if os.path.exists(path):
import shutil
shutil.rmtree(path)
Example 4: Handling Sparse/Missing Data
Filtering out patches with no valid data:
from qlty import NCYXQuilt, weed_sparse_classification_training_pairs_2D
quilt = NCYXQuilt(Y=128, X=128, window=(32, 32), step=(16, 16), border=(5, 5))
# Data with missing labels
input_data = torch.randn(50, 3, 128, 128)
labels = torch.ones(50, 128, 128) * (-1) # All missing initially
# Add some valid data
labels[:, 30:98, 30:98] = torch.randint(0, 10, (50, 68, 68)).float()
# Unstitch
input_patches, label_patches = quilt.unstitch_data_pair(
input_data, labels, missing_label=-1
)
print(f"Total patches: {input_patches.shape[0]}")
# Filter valid patches
border_tensor = quilt.border_tensor()
valid_input, valid_labels, removed_mask = weed_sparse_classification_training_pairs_2D(
input_patches, label_patches, missing_label=-1, border_tensor=border_tensor
)
print(f"Valid patches: {valid_input.shape[0]}")
print(f"Removed patches: {removed_mask.sum().item()}")
Example 5: 3D Volume Processing
Processing 3D medical imaging or microscopy data:
from qlty import NCZYXQuilt
import torch
quilt = NCZYXQuilt(
Z=128, Y=128, X=128,
window=(64, 64, 64),
step=(32, 32, 32), # 50% overlap in each dimension
border=(8, 8, 8),
border_weight=0.1
)
# 3D volume data
volumes = torch.randn(10, 1, 128, 128, 128) # (N, C, Z, Y, X)
# Process
patches = quilt.unstitch(volumes)
print(f"Created {patches.shape[0]} patches from {volumes.shape[0]} volumes")
# Process with 3D model
processed = your_3d_model(patches)
# Stitch back
reconstructed, weights = quilt.stitch(processed)
assert reconstructed.shape == volumes.shape
Example 6: Inference with Softmax Handling
Correct way to handle softmax when stitching:
from qlty import NCYXQuilt
import torch.nn.functional as F
quilt = NCYXQuilt(Y=256, X=256, window=(64, 64), step=(32, 32), border=(8, 8))
image = torch.randn(1, 3, 256, 256)
patches = quilt.unstitch(image)
# Process patches (get logits, NOT softmax)
with torch.no_grad():
logits = model(patches) # Shape: (M, num_classes, 64, 64)
# Stitch logits first
stitched_logits, weights = quilt.stitch(logits)
# THEN apply softmax
probabilities = F.softmax(stitched_logits, dim=1)
# This is correct! Averaging logits then softmaxing = softmax of averaged logits
Example 7: Custom Border Weighting
Experimenting with different border weights:
from qlty import NCYXQuilt
# Test different border weights
for border_weight in [0.0, 0.1, 0.5, 1.0]:
quilt = NCYXQuilt(
Y=128, X=128,
window=(32, 32),
step=(16, 16),
border=(5, 5),
border_weight=border_weight
)
data = torch.randn(5, 3, 128, 128)
patches = quilt.unstitch(data)
reconstructed, weights = quilt.stitch(patches)
# Evaluate reconstruction quality
error = torch.mean(torch.abs(reconstructed - data))
print(f"Border weight {border_weight}: Error = {error:.6f}")
Example 8: Batch Processing for Efficiency
Processing patches in batches for better GPU utilization:
from qlty import NCYXQuilt
import torch
quilt = NCYXQuilt(Y=512, X=512, window=(128, 128), step=(64, 64), border=(10, 10))
image = torch.randn(1, 3, 512, 512)
patches = quilt.unstitch(image)
# Process in batches
batch_size = 32
processed_patches = []
for i in range(0, len(patches), batch_size):
batch = patches[i:i+batch_size]
with torch.no_grad():
output = model(batch)
processed_patches.append(output)
processed_patches = torch.cat(processed_patches, dim=0)
result, weights = quilt.stitch(processed_patches)
Example 9: Combining with DataLoaders
Integrating with PyTorch DataLoaders:
from torch.utils.data import Dataset, DataLoader
from qlty import NCYXQuilt
class PatchedDataset(Dataset):
def __init__(self, images, labels, quilt):
self.quilt = quilt
self.input_patches, self.label_patches = quilt.unstitch_data_pair(
images, labels
)
def __len__(self):
return len(self.input_patches)
def __getitem__(self, idx):
return self.input_patches[idx], self.label_patches[idx]
# Create dataset
images = torch.randn(100, 3, 128, 128)
labels = torch.randn(100, 128, 128)
quilt = NCYXQuilt(Y=128, X=128, window=(32, 32), step=(16, 16), border=(5, 5))
dataset = PatchedDataset(images, labels, quilt)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
# Train
for batch_input, batch_labels in dataloader:
# Training code...
pass
Example 10: Error Handling and Validation
Proper error handling:
from qlty import NCYXQuilt
import torch
# Valid usage
try:
quilt = NCYXQuilt(
Y=128, X=128,
window=(32, 32),
step=(16, 16),
border=(5, 5),
border_weight=0.1
)
print("✓ Quilt created successfully")
except ValueError as e:
print(f"✗ Error: {e}")
# Invalid border_weight
try:
quilt = NCYXQuilt(Y=128, X=128, window=(32, 32), step=(16, 16),
border=(5, 5), border_weight=2.0) # Invalid!
except ValueError as e:
print(f"✓ Caught error: {e}")
# Invalid border dimensions
try:
quilt = NCYXQuilt(Y=128, X=128, window=(32, 32), step=(16, 16),
border=(1, 2, 3)) # Wrong size for 2D!
except ValueError as e:
print(f"✓ Caught error: {e}")
Example 11: Pre-Tokenization for Patch Processing (2D)
What and Why: The pretokenizer_2d module prepares patches for tokenization by
enabling sequence-based models (like transformers) to work with image patches. This is
useful for:
Self-supervised learning: Learning representations from patch pairs with known geometric relationships
Contrastive learning: Using overlapping tokens as positive pairs
Sequence models: Converting 2D patches into token sequences with spatial awareness
Efficient batch processing: Processing many patch pairs in parallel with numba acceleration
The key innovation is that it identifies which tokens overlap between two patches that have undergone a known rigid transformation (translation + rotation), providing the overlap information needed for training sequence-based models.
Basic Usage - Single Patch Pair:
from qlty import extract_patch_pairs, build_sequence_pair, tokenize_patch
import torch
# Step 1: Extract patch pairs using qlty's existing functionality
images = torch.randn(5, 3, 128, 128)
patches1, patches2, deltas, rotations = extract_patch_pairs(
images,
window=(64, 64),
num_patches=10,
delta_range=(10.0, 20.0),
random_seed=42
)
# Step 2: Build sequence pairs with overlap information
# This tokenizes both patches and finds overlapping tokens
result = build_sequence_pair(
patches1[0], # First patch: (3, 64, 64)
patches2[0], # Second patch: (3, 64, 64)
dx=deltas[0, 0].item(), # Translation in x
dy=deltas[0, 1].item(), # Translation in y
rot_k90=rotations[0].item(), # Rotation (0, 1, 2, or 3 for 0°, 90°, 180°, 270°)
patch_size=16, # Size of each token
stride=8 # Stride for overlapping tokens (default: patch_size//2)
)
# Result contains:
print(f"Tokens from patch1: {result['tokens1'].shape}") # (T, D) where T=number of tokens
print(f"Tokens from patch2: {result['tokens2'].shape}") # (T, D)
print(f"Overlapping tokens: {result['overlap_mask1'].sum().item()} out of {result['tokens1'].shape[0]}")
# Use for training:
# - tokens1, tokens2: Input to your sequence model (e.g., transformer)
# - coords1, coords2: Absolute coordinates for positional encoding
# - overlap_mask1, overlap_mask2: Which tokens have corresponding overlaps
# - overlap_indices1_to_2: Mapping from patch1 tokens to patch2 tokens
# - overlap_fractions: How much each token overlaps (0.0 to 1.0)
Batch Processing - Efficient for Large Datasets:
# Process all patch pairs at once (much faster!)
batch_result = build_sequence_pair(
patches1, # (50, 3, 64, 64) - batch of patches
patches2, # (50, 3, 64, 64)
dx=deltas[:, 0], # (50,) - x translations
dy=deltas[:, 1], # (50,) - y translations
rot_k90=rotations, # (50,) - rotations
patch_size=16,
stride=8
)
# Batch result has padded tensors for efficient processing
print(f"Batch tokens1: {batch_result['tokens1'].shape}") # (50, T_max, D)
print(f"Sequence lengths: {batch_result['sequence_lengths']}") # (50,) - actual lengths
print(f"Overlap counts: {batch_result['overlap_pair_counts']}") # (50,) - overlaps per pair
# Use sequence_lengths to mask padding in your model
# Use overlap_pair_counts to understand data distribution
Tokenization Only - When You Just Need Tokens:
# If you only need to tokenize a patch (no overlap computation)
patch = torch.randn(3, 64, 64)
tokens, coords = tokenize_patch(patch, patch_size=16, stride=8)
print(f"Created {tokens.shape[0]} tokens")
print(f"Token shape: {tokens.shape[1]}") # 3*16*16 = 768 dimensions
print(f"Coordinates shape: {coords.shape}") # (T, 2) - (y, x) for each token
# Use tokens as input to sequence models
# Use coords for positional encoding
Real-World Use Case - Self-Supervised Learning:
from qlty import extract_patch_pairs, build_sequence_pair
import torch
import torch.nn as nn
# Extract patch pairs from unlabeled images
images = torch.randn(100, 3, 256, 256)
patches1, patches2, deltas, rotations = extract_patch_pairs(
images, window=(128, 128), num_patches=20, delta_range=(20.0, 40.0)
)
# Build sequence pairs
batch_result = build_sequence_pair(
patches1, patches2, deltas[:, 0], deltas[:, 1], rotations,
patch_size=32, stride=16
)
# Train a transformer to predict overlapping tokens
class PatchTransformer(nn.Module):
def __init__(self, token_dim, hidden_dim):
super().__init__()
self.embedding = nn.Linear(token_dim, hidden_dim)
self.transformer = nn.TransformerEncoder(
nn.TransformerEncoderLayer(hidden_dim, nhead=8), num_layers=6
)
self.predictor = nn.Linear(hidden_dim, token_dim)
def forward(self, tokens, coords, mask):
# Add positional encoding from coords
pos_enc = self.positional_encoding(coords)
x = self.embedding(tokens) + pos_enc
x = self.transformer(x)
return self.predictor(x)
model = PatchTransformer(token_dim=3*32*32, hidden_dim=512)
# Training loop
for epoch in range(10):
for i in range(0, len(patches1), 32): # Process in batches
batch_idx = slice(i, i+32)
result = build_sequence_pair(
patches1[batch_idx], patches2[batch_idx],
deltas[batch_idx, 0], deltas[batch_idx, 1], rotations[batch_idx],
patch_size=32, stride=16
)
# Get overlapping tokens
tokens1 = result['tokens1'] # (32, T_max, D)
tokens2 = result['tokens2'] # (32, T_max, D)
overlap_mask = result['overlap_mask1'] # (32, T_max)
overlap_indices = result['overlap_indices1_to_2'] # (32, T_max)
# Predict tokens2 from tokens1
predicted = model(tokens1, result['coords1'], overlap_mask)
# Loss only on overlapping tokens
# (simplified - actual implementation would handle padding)
loss = nn.functional.mse_loss(
predicted[overlap_mask],
tokens2[overlap_mask]
)
# Backprop and update...
Performance Notes:
Batch processing is highly optimized: Uses numba JIT compilation and parallel processing for large batches (N > 5)
Automatic fallback: Falls back to sequential processing for small batches or when numba is unavailable
Memory efficient: Batch tokenization reuses a single
NCYXQuiltobjectGPU support: All tensors maintain device placement (CPU/GPU)
When to Use:
✅ Training sequence models (transformers) on image patches
✅ Self-supervised learning with geometric augmentations
✅ Contrastive learning with patch pairs
✅ Any task requiring token-level overlap information
When NOT to Use:
❌ Simple patch extraction (use
NCYXQuilt.unstitch()instead)❌ Stitching patches back together (use
NCYXQuilt.stitch()instead)❌ When you don’t need overlap information