torch.tensor([1, 2, 3]) | Create tensor from list |
torch.zeros(3, 4) | Tensor of zeros |
torch.ones(2, 3) | Tensor of ones |
torch.empty(2, 2) | Uninitialized tensor |
torch.rand(3, 4) | Random uniform [0, 1) |
torch.randn(3, 4) | Random normal |
torch.arange(0, 10, 2) | Range tensor |
torch.linspace(0, 1, 5) | Evenly spaced |
torch.eye(4) | Identity matrix |
t.shape, t.size() | Tensor shape |
t.dtype | Data type |
t.device | Device (CPU/GPU) |
t.requires_grad | Gradient tracking |
t.numel() | Number of elements |
t.dim() | Number of dimensions |
t.float(), t.double(), t.int() | Convert type |
t.to(torch.float32) | Convert to dtype |
t.cuda() | Move to GPU |
t.cpu() | Move to CPU |
t.to("cuda:0") | Move to specific GPU |
torch.cuda.is_available() | Check GPU availability |
t.view(3, 4) | Reshape (must be contiguous) |
t.reshape(3, 4) | Reshape (flexible) |
t.flatten() | Flatten to 1D |
t.squeeze() | Remove size-1 dimensions |
t.unsqueeze(0) | Add dimension at index |
t.transpose(0, 1) | Swap dimensions |
t.permute(2, 0, 1) | Reorder dimensions |
torch.add(a, b), a + b | Addition |
torch.mul(a, b), a * b | Element-wise multiply |
torch.matmul(a, b), a @ b | Matrix multiplication |
torch.mm(a, b) | 2D matrix multiply |
torch.bmm(a, b) | Batch matrix multiply |
torch.pow(t, 2), t ** 2 | Power |
torch.sqrt(t) | Square root |
torch.exp(t), torch.log(t) | Exp/Log |
t.sum(), t.sum(dim=0) | Sum |
t.mean(), t.mean(dim=0) | Mean |
t.std(), t.var() | Std/Variance |
t.max(), t.min() | Max/Min |
t.argmax(), t.argmin() | Index of max/min |
t.prod() | Product |
torch.cat([a, b], dim=0) | Concatenate |
torch.stack([a, b], dim=0) | Stack (new dim) |
torch.split(t, 2, dim=0) | Split into chunks |
torch.chunk(t, 3, dim=0) | Split into n chunks |
x = torch.tensor([1.0], requires_grad=True) | Enable gradient tracking |
y.backward() | Compute gradients |
x.grad | Access gradient |
x.grad.zero_() | Zero gradients |
with torch.no_grad(): | Disable gradient |
x.detach() | Detach from graph |
x.requires_grad_(True) | Enable gradient in-place |
nn.Linear(in_features, out_features) | Fully connected layer |
nn.Conv2d(in_ch, out_ch, kernel) | 2D convolution |
nn.ConvTranspose2d(...) | Transposed convolution |
nn.MaxPool2d(kernel_size) | Max pooling |
nn.AvgPool2d(kernel_size) | Average pooling |
nn.BatchNorm2d(num_features) | Batch normalization |
nn.LayerNorm(normalized_shape) | Layer normalization |
nn.Dropout(p=0.5) | Dropout |
nn.Embedding(num_embed, embed_dim) | Embedding layer |
nn.LSTM(input_size, hidden_size) | LSTM layer |
nn.GRU(input_size, hidden_size) | GRU layer |
nn.ReLU() | ReLU activation |
nn.LeakyReLU(0.1) | Leaky ReLU |
nn.Sigmoid() | Sigmoid |
nn.Tanh() | Tanh |
nn.Softmax(dim=1) | Softmax |
nn.GELU() | GELU activation |
nn.MSELoss() | Mean squared error |
nn.CrossEntropyLoss() | Cross entropy |
nn.BCELoss() | Binary cross entropy |
nn.BCEWithLogitsLoss() | BCE with sigmoid |
nn.L1Loss() | L1/MAE loss |
nn.NLLLoss() | Negative log likelihood |
class Model(nn.Module): | Define custom model |
def __init__(self): super().__init__() | Initialize model |
def forward(self, x): return self.layer(x) | Forward pass |
nn.Sequential(nn.Linear(10, 5), nn.ReLU()) | Sequential model |
model.parameters() | Get all parameters |
model.named_parameters() | Parameters with names |
model.train() | Set training mode |
model.eval() | Set evaluation mode |
model.to(device) | Move model to device |
torch.optim.SGD(params, lr=0.01) | SGD optimizer |
torch.optim.Adam(params, lr=0.001) | Adam optimizer |
torch.optim.AdamW(params, lr=0.001) | AdamW optimizer |
torch.optim.RMSprop(params, lr=0.01) | RMSprop optimizer |
optimizer.zero_grad() | Zero gradients |
optimizer.step() | Update weights |
optim.lr_scheduler.StepLR(opt, step_size=10) | Step LR decay |
optim.lr_scheduler.ExponentialLR(opt, gamma=0.9) | Exponential decay |
optim.lr_scheduler.CosineAnnealingLR(opt, T_max) | Cosine annealing |
scheduler.step() | Update learning rate |
output = model(input) | Forward pass |
loss = criterion(output, target) | Compute loss |
loss.backward() | Backward pass |
optimizer.step() | Update parameters |
torch.nn.utils.clip_grad_norm_(params, max_norm) | Gradient clipping |
DataLoader(dataset, batch_size=32, shuffle=True) | Create DataLoader |
for batch in dataloader: | Iterate batches |
num_workers=4 | Parallel data loading |
pin_memory=True | Pin memory for GPU |
drop_last=True | Drop incomplete batch |
class MyDataset(Dataset): | Custom dataset |
def __len__(self): | Return dataset length |
def __getitem__(self, idx): | Return single sample |
TensorDataset(x_tensor, y_tensor) | Dataset from tensors |
torch.save(model.state_dict(), "model.pt") | Save model weights |
model.load_state_dict(torch.load("model.pt")) | Load model weights |
torch.save(model, "full_model.pt") | Save entire model |
torch.save({"model": m.state_dict(), "opt": o.state_dict()}, "ckpt.pt") | Save checkpoint |