PyTorch 基础用法详解
PyTorch 是一个基于 Python 的科学计算包,主要针对两类人群:
- 替代 NumPy 使用 GPU 的计算能力
- 提供最大灵活性和速度的深度学习研究平台
1. 张量(Tensors)基础
import torch
import numpy as np
print("PyTorch 版本:", torch.__version__)
print("CUDA 是否可用:", torch.cuda.is_available())
# 1.1 创建张量
print("\n=== 创建张量 ===")
# 从列表创建
x = torch.tensor([1, 2, 3, 4])
print("从列表创建:", x)
# 创建全零张量
zeros = torch.zeros(2, 3)
print("全零张量:\n", zeros)
# 创建全一张量
ones = torch.ones(2, 3)
print("全一张量:\n", ones)
# 创建随机张量
rand_tensor = torch.rand(2, 3)
print("随机张量:\n", rand_tensor)
# 创建范围张量
arange = torch.arange(0, 10, 2)
print("范围张量:", arange)
# 1.2 张量属性
print("\n=== 张量属性 ===")
tensor = torch.rand(3, 4)
print("张量形状:", tensor.shape)
print("张量维度:", tensor.dim())
print("张量数据类型:", tensor.dtype)
print("张量设备:", tensor.device)
2. 张量操作
# 2.1 基本运算
print("\n=== 张量运算 ===")
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
print("a =", a)
print("b =", b)
print("a + b =", a + b)
print("a - b =", a - b)
print("a * b =", a * b) # 逐元素乘法
print("a / b =", a / b)
print("a ** 2 =", a ** 2)
# 矩阵乘法
matrix_a = torch.rand(2, 3)
matrix_b = torch.rand(3, 2)
matrix_product = torch.matmul(matrix_a, matrix_b)
print(f"矩阵乘法: {matrix_a.shape} @ {matrix_b.shape} = {matrix_product.shape}")
# 2.2 张量变形
print("\n=== 张量变形 ===")
tensor = torch.arange(12)
print("原始张量:", tensor, "形状:", tensor.shape)
# reshape
reshaped = tensor.reshape(3, 4)
print("reshape(3, 4):\n", reshaped)
# view
viewed = tensor.view(3, 4)
print("view(3, 4):\n", viewed)
# 转置
transposed = reshaped.T
print("转置:\n", transposed)
# 2.3 张量索引和切片
print("\n=== 张量索引和切片 ===")
tensor = torch.arange(24).reshape(4, 6)
print("原始张量:\n", tensor)
print("第一行:", tensor[0])
print("第一列:", tensor[:, 0])
print("子矩阵:\n", tensor[1:3, 2:4])
3. 自动求导(Autograd)
print("\n=== 自动求导 ===")
# 3.1 基本自动求导
x = torch.tensor(2.0, requires_grad=True)
y = x ** 2 + 3 * x + 1
y.backward()
print(f"x = {x}, y = {y}")
print(f"dy/dx = {x.grad}")
# 3.2 多变量求导
x1 = torch.tensor(1.0, requires_grad=True)
x2 = torch.tensor(2.0, requires_grad=True)
z = x1 ** 2 + x1 * x2 + x2 ** 2
z.backward()
print(f"\n多变量函数: z = x1² + x1*x2 + x2²")
print(f"∂z/∂x1 = {x1.grad}")
print(f"∂z/∂x2 = {x2.grad}")
# 3.3 梯度清零
x1.grad.zero_()
x2.grad.zero_()
print("\n梯度清零后:")
print(f"x1.grad = {x1.grad}")
print(f"x2.grad = {x2.grad}")
4. 神经网络基础
import torch.nn as nn
import torch.nn.functional as F
print("\n=== 神经网络基础 ===")
# 4.1 定义简单的神经网络
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(10, 5) # 输入10维,输出5维
self.fc2 = nn.Linear(5, 2) # 输入5维,输出2维
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.fc2(x)
return x
# 创建网络实例
model = SimpleNet()
print("网络结构:")
print(model)
# 4.2 查看参数
print("\n网络参数:")
for name, param in model.named_parameters():
print(f"{name}: {param.shape}")
# 4.3 前向传播
input_data = torch.randn(1, 10) # batch_size=1, input_size=10
output = model(input_data)
print(f"\n输入形状: {input_data.shape}")
print(f"输出形状: {output.shape}")
print(f"输出: {output}")
5. 损失函数和优化器
print("\n=== 损失函数和优化器 ===")
# 5.1 损失函数
criterion = nn.CrossEntropyLoss()
# 5.2 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# 5.3 模拟训练步骤
print("模拟训练过程:")
# 模拟数据
inputs = torch.randn(4, 10) # batch_size=4, input_size=10
labels = torch.tensor([0, 1, 0, 1]) # 4个样本的标签
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, labels)
print(f"初始损失: {loss.item():.4f}")
# 反向传播
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播
optimizer.step() # 更新参数
# 再次前向传播查看损失变化
outputs_after = model(inputs)
loss_after = criterion(outputs_after, labels)
print(f"一次更新后损失: {loss_after.item():.4f}")
6. 数据集和数据加载器
from torch.utils.data import Dataset, DataLoader
print("\n=== 数据集和数据加载器 ===")
# 6.1 自定义数据集
class CustomDataset(Dataset):
def __init__(self, data, labels):
self.data = data
self.labels = labels
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
# 创建模拟数据
data = torch.randn(100, 10) # 100个样本,每个10维
labels = torch.randint(0, 2, (100,)) # 100个二分类标签
dataset = CustomDataset(data, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
print(f"数据集大小: {len(dataset)}")
print(f"数据加载器批次数: {len(dataloader)}")
# 6.2 遍历数据加载器
print("\n遍历前3个批次:")
for i, (batch_data, batch_labels) in enumerate(dataloader):
print(f"批次 {i+1}: 数据形状 {batch_data.shape}, 标签形状 {batch_labels.shape}")
if i == 2: # 只显示前3个批次
break
7. GPU 使用
print("\n=== GPU 使用 ===")
# 7.1 检查设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")
# 7.2 将模型和数据移动到GPU
model = SimpleNet().to(device)
# 创建一些数据并移动到设备
if torch.cuda.is_available():
data_gpu = torch.randn(4, 10).to(device)
labels_gpu = torch.tensor([0, 1, 0, 1]).to(device)
output_gpu = model(data_gpu)
print(f"GPU计算结果形状: {output_gpu.shape}")
print(f"GPU计算结果设备: {output_gpu.device}")
8. 完整的训练示例
print("\n=== 完整训练示例 ===")
# 8.1 准备数据
def generate_data(n_samples=1000):
"""生成简单的二分类数据"""
X = torch.randn(n_samples, 2)
# 根据到原点的距离创建标签
y = (X[:, 0]**2 + X[:, 1]**2 > 1).long()
return X, y
X, y = generate_data(1000)
# 分割训练集和测试集
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
print(f"训练集: {X_train.shape}, 测试集: {X_test.shape}")
# 8.2 定义模型
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.fc1 = nn.Linear(2, 10)
self.fc2 = nn.Linear(10, 5)
self.fc3 = nn.Linear(5, 2)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Classifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# 8.3 训练循环
def train_model(model, X_train, y_train, epochs=100):
model.train()
losses = []
for epoch in range(epochs):
# 前向传播
outputs = model(X_train)
loss = criterion(outputs, y_train)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
if epoch % 20 == 0:
print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')
return losses
print("开始训练...")
losses = train_model(model, X_train, y_train, epochs=100)
# 8.4 评估模型
def evaluate_model(model, X_test, y_test):
model.eval()
with torch.no_grad():
outputs = model(X_test)
_, predicted = torch.max(outputs, 1)
accuracy = (predicted == y_test).float().mean()
return accuracy.item()
accuracy = evaluate_model(model, X_test, y_test)
print(f"测试集准确率: {accuracy:.4f}")
9. 模型保存和加载
print("\n=== 模型保存和加载 ===")
# 9.1 保存模型
torch.save(model.state_dict(), 'model.pth')
print("模型已保存为 'model.pth'")
# 9.2 加载模型
new_model = Classifier()
new_model.load_state_dict(torch.load('model.pth'))
new_model.eval()
print("模型加载成功")
# 测试加载的模型
accuracy_loaded = evaluate_model(new_model, X_test, y_test)
print(f"加载模型的测试准确率: {accuracy_loaded:.4f}")
10. 常用工具函数
print("\n=== 常用工具函数 ===")
# 10.1 设置随机种子
torch.manual_seed(42)
print("设置随机种子为 42")
# 10.2 张量与NumPy数组转换
# 张量转NumPy
tensor = torch.tensor([1, 2, 3])
numpy_array = tensor.numpy()
print(f"张量: {tensor} -> NumPy: {numpy_array}")
# NumPy转张量
numpy_array = np.array([4, 5, 6])
tensor_from_numpy = torch.from_numpy(numpy_array)
print(f"NumPy: {numpy_array} -> 张量: {tensor_from_numpy}")
# 10.3 梯度计算控制
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
# 在不需要梯度时关闭
with torch.no_grad():
y = x * 2
print(f"无梯度计算: {y}")
# 或者使用 detach()
z = (x * 2).detach()
print(f"分离梯度: {z}")
运行示例输出
PyTorch 版本: 2.0.1
CUDA 是否可用: True
=== 创建张量 ===
从列表创建: tensor([1, 2, 3, 4])
全零张量:
tensor([[0., 0., 0.],
[0., 0., 0.]])
全一张量:
tensor([[1., 1., 1.],
[1., 1., 1.]])
=== 自动求导 ===
x = 2.0, y = 11.0
dy/dx = 7.0
=== 完整训练示例 ===
开始训练...
Epoch [0/100], Loss: 0.7234
Epoch [20/100], Loss: 0.5321
Epoch [40/100], Loss: 0.4215
Epoch [60/100], Loss: 0.3521
Epoch [80/100], Loss: 0.3012
测试集准确率: 0.8950
这个 PyTorch 基础教程涵盖了从张量操作到完整模型训练的所有基本概念,是学习深度学习的良好起点。