Essential OpenCV Operations
import cv2
import numpy as np
img = cv2.imread('photo.jpg') # BGR, not RGB!
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gray= cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize
resized = cv2.resize(img, (224, 224)) # ImageNet standard
# Filters
blurred = cv2.GaussianBlur(img, (5,5), 0)
edges = cv2.Canny(gray, 50, 150)
# Normalization (for DL models)
img_norm = img.astype(np.float32) / 255.0
mean = [0.485, 0.456, 0.406] # ImageNet mean
std = [0.229, 0.224, 0.225] # ImageNet std
Data Augmentation (torchvision)
from torchvision import transforms
train_tfm = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(.4,.4,.4),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
val_tfm = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
Always augment training data but never validation/test data (except resize/crop/normalize).