MATLAB实现BP神经网络识别手写字符验证码
使用BP神经网络识别手写字符验证码
% BP神经网络识别手写字符验证码
% 作者: DeepSeek
% 创建日期: 2023-10-25
%% 程序初始化
clear; clc; close all;
rng(42); % 设置随机种子以确保结果可重现
warning('off', 'all'); % 关闭警告信息
%% 参数设置
% 数据集参数
dataset_path = 'captcha_dataset'; % 数据集路径
train_ratio = 0.8; % 训练集比例
validation_ratio = 0.1; % 验证集比例
test_ratio = 0.1; % 测试集比例
image_size = [28, 28]; % 图像缩放尺寸
num_classes = 36; % 类别数 (26字母 + 10数字)
% 神经网络参数
hidden_layer_size = 128; % 隐藏层神经元数量
max_epochs = 100; % 最大训练轮数
batch_size = 64; % 批处理大小
learning_rate = 0.01; % 学习率
momentum = 0.9; % 动量系数
l2_reg = 0.0001; % L2正则化系数
patience = 10; % 早停耐心值
min_delta = 0.001; % 早停最小改进量
% 数据增强参数
augment_data = true; % 是否启用数据增强
rotation_range = 15; % 旋转角度范围
shift_range = 0.1; % 平移范围
zoom_range = [0.9, 1.1]; % 缩放范围
%% 加载和预处理数据
fprintf('加载和预处理数据...\n');
[images, labels] = load_captcha_dataset(dataset_path, image_size);
fprintf('数据集大小: %d 个样本\n', size(images, 4));
% 划分数据集
[train_data, train_labels, val_data, val_labels, test_data, test_labels] = ...
split_dataset(images, labels, train_ratio, validation_ratio, test_ratio);
fprintf('训练集: %d 样本 | 验证集: %d 样本 | 测试集: %d 样本\n', ...
size(train_data, 4), size(val_data, 4), size(test_data, 4));
% 数据增强
if augment_data
fprintf('执行数据增强...\n');
[train_data, train_labels] = augment_dataset(train_data, train_labels, ...
rotation_range, shift_range, zoom_range);
fprintf('增强后训练集大小: %d 样本\n', size(train_data, 4));
end
% 归一化数据
train_data = normalize_data(train_data);
val_data = normalize_data(val_data);
test_data = normalize_data(test_data);
% 可视化样本
visualize_samples(train_data, train_labels, 25);
%% 构建BP神经网络
fprintf('构建BP神经网络...\n');
input_size = prod(image_size); % 输入层神经元数量 (28x28=784)
output_size = num_classes; % 输出层神经元数量
% 初始化权重和偏置
[w1, b1, w2, b2] = initialize_parameters(input_size, hidden_layer_size, output_size);
% 训练神经网络
fprintf('开始训练神经网络...\n');
[best_w1, best_b1, best_w2, best_b2, train_loss_history, val_loss_history, val_acc_history] = ...
train_network(train_data, train_labels, val_data, val_labels, ...
w1, b1, w2, b2, max_epochs, batch_size, learning_rate, momentum, l2_reg, patience, min_delta);
%% 评估模型
fprintf('在测试集上评估模型...\n');
% 使用最佳参数进行预测
test_predictions = predict(test_data, best_w1, best_b1, best_w2, best_b2);
test_accuracy = mean(vec2ind(test_predictions) == vec2ind(test_labels));
fprintf('测试集准确率: %.2f%%\n', test_accuracy * 100);
% 生成混淆矩阵
generate_confusion_matrix(test_labels, test_predictions);
%% 可视化训练过程
figure('Position', [100, 100, 1200, 500]);
% 损失曲线
subplot(1, 2, 1);
plot(train_loss_history, 'b-', 'LineWidth', 1.5); hold on;
plot(val_loss_history, 'r-', 'LineWidth', 1.5);
xlabel('Epoch');
ylabel('Loss');
title('训练和验证损失');
legend('训练损失', '验证损失', 'Location', 'northeast');
grid on;
% 准确率曲线
subplot(1, 2, 2);
plot(val_acc_history, 'g-', 'LineWidth', 1.5);
xlabel('Epoch');
ylabel('Accuracy');
title('验证准确率');
ylim([0, 1]);
grid on;
%% 可视化错误样本
visualize_errors(test_data, test_labels, test_predictions, 25);
%% 保存模型
save('captcha_model.mat', 'best_w1', 'best_b1', 'best_w2', 'best_b2', 'image_size', 'num_classes');
%% ================== 函数定义 ==================
%% 加载数据集
function [images, labels] = load_captcha_dataset(dataset_path, image_size)
% 获取所有子文件夹(每个子文件夹对应一个字符)
class_folders = dir(dataset_path);
class_folders = class_folders([class_folders.isdir]);
class_folders = class_folders(3:end); % 移除 '.' 和 '..'
images = [];
labels = [];
for class_idx = 1:length(class_folders)
class_name = class_folders(class_idx).name;
class_folder = fullfile(dataset_path, class_name);
% 获取当前类别的所有图像
image_files = dir(fullfile(class_folder, '*.png'));
% 读取并预处理图像
for img_idx = 1:length(image_files)
img_path = fullfile(class_folder, image_files(img_idx).name);
img = imread(img_path);
% 转换为灰度图
if size(img, 3) == 3
img = rgb2gray(img);
end
% 调整大小
img = imresize(img, image_size);
% 归一化并转换为double
img = im2double(img);
% 添加到图像数组
images(:, :, 1, end+1) = img; %#ok<AGROW>
% 创建标签 (one-hot编码)
label = zeros(1, length(class_folders));
label(class_idx) = 1;
labels(:, end+1) = label; %#ok<AGROW>
end
end
% 将标签转换为分类数组
labels = categorical(vec2ind(labels));
end
%% 划分数据集
function [train_data, train_labels, val_data, val_labels, test_data, test_labels] = ...
split_dataset(images, labels, train_ratio, val_ratio, test_ratio)
num_samples = size(images, 4);
indices = randperm(num_samples);
% 计算划分点
train_end = floor(train_ratio * num_samples);
val_end = train_end + floor(val_ratio * num_samples);
% 划分训练集
train_indices = indices(1:train_end);
train_data = images(:, :, :, train_indices);
train_labels = labels(train_indices);
% 划分验证集
val_indices = indices(train_end+1:val_end);
val_data = images(:, :, :, val_indices);
val_labels = labels(val_indices);
% 划分测试集
test_indices = indices(val_end+1:end);
test_data = images(:, :, :, test_indices);
test_labels = labels(test_indices);
end
%% 数据增强
function [augmented_images, augmented_labels] = augment_dataset(images, labels, ...
rotation_range, shift_range, zoom_range)
num_samples = size(images, 4);
augmented_images = images;
augmented_labels = labels;
for i = 1:num_samples
img = images(:, :, :, i);
% 随机旋转
angle = (2*rand - 1) * rotation_range;
img_rot = imrotate(img, angle, 'bilinear', 'crop');
% 随机平移
shift_x = round((2*rand - 1) * shift_range * size(img, 2));
shift_y = round((2*rand - 1) * shift_range * size(img, 1));
img_shift = imtranslate(img, [shift_x, shift_y]);
% 随机缩放
zoom_factor = zoom_range(1) + rand * (zoom_range(2) - zoom_range(1));
img_zoom = imresize(img, zoom_factor);
% 裁剪或填充以保持原始尺寸
[h, w] = size(img);
[h_z, w_z] = size(img_zoom);
if zoom_factor > 1
% 裁剪
start_row = floor((h_z - h)/2) + 1;
start_col = floor((w_z - w)/2) + 1;
img_zoom = img_zoom(start_row:start_row+h-1, start_col:start_col+w-1);
else
% 填充
pad_row = floor((h - h_z)/2);
pad_col = floor((w - w_z)/2);
img_zoom = padarray(img_zoom, [pad_row, pad_col], 0, 'both');
img_zoom = imresize(img_zoom, [h, w]);
end
% 添加到增强数据集
augmented_images(:, :, :, end+1) = img_rot; %#ok<AGROW>
augmented_images(:, :, :, end+1) = img_shift; %#ok<AGROW>
augmented_images(:, :, :, end+1) = img_zoom; %#ok<AGROW>
% 添加标签
augmented_labels = [augmented_labels; labels(i); labels(i); labels(i)]; %#ok<AGROW>
end
end
%% 数据归一化
function normalized_data = normalize_data(data)
% 转换为0-1范围
normalized_data = data / 255;
end
%% 可视化样本
function visualize_samples(images, labels, num_samples)
figure('Name', '数据集样本', 'Position', [100, 100, 800, 600]);
% 随机选择样本
indices = randperm(size(images, 4), num_samples);
grid_size = ceil(sqrt(num_samples));
for i = 1:num_samples
subplot(grid_size, grid_size, i);
img = images(:, :, :, indices(i));
imshow(img, []);
title(char(labels(indices(i))));
end
end
%% 初始化网络参数
function [w1, b1, w2, b2] = initialize_parameters(input_size, hidden_size, output_size)
% 使用Xavier初始化权重
w1 = randn(hidden_size, input_size) * sqrt(2 / (input_size + hidden_size));
b1 = zeros(hidden_size, 1);
w2 = randn(output_size, hidden_size) * sqrt(2 / (hidden_size + output_size));
b2 = zeros(output_size, 1);
end
%% 训练网络
function [best_w1, best_b1, best_w2, best_b2, train_loss_history, val_loss_history, val_acc_history] = ...
train_network(train_data, train_labels, val_data, val_labels, ...
w1, b1, w2, b2, max_epochs, batch_size, lr, momentum, l2_reg, patience, min_delta)
num_samples = size(train_data, 4);
num_batches = ceil(num_samples / batch_size);
% 初始化动量
w1_momentum = zeros(size(w1));
b1_momentum = zeros(size(b1));
w2_momentum = zeros(size(w2));
b2_momentum = zeros(size(b2));
% 存储历史记录
train_loss_history = zeros(max_epochs, 1);
val_loss_history = zeros(max_epochs, 1);
val_acc_history = zeros(max_epochs, 1);
% 早停变量
best_val_loss = Inf;
best_epoch = 0;
epochs_without_improvement = 0;
% 保存最佳参数
best_w1 = w1;
best_b1 = b1;
best_w2 = w2;
best_b2 = b2;
for epoch = 1:max_epochs
epoch_loss = 0;
% 随机打乱数据
indices = randperm(num_samples);
train_data = train_data(:, :, :, indices);
train_labels = train_labels(indices);
% 批处理训练
for batch = 1:num_batches
% 获取当前批次数据
start_idx = (batch - 1) * batch_size + 1;
end_idx = min(batch * batch_size, num_samples);
batch_size_actual = end_idx - start_idx + 1;
batch_images = train_data(:, :, :, start_idx:end_idx);
batch_labels = train_labels(start_idx:end_idx);
% 将图像展平为向量
X = reshape(batch_images, [], batch_size_actual);
Y = full(ind2vec(double(batch_labels)', num_classes));
% 前向传播
[Z1, A1, Z2, A2] = forward_prop(X, w1, b1, w2, b2);
% 计算损失
loss = cross_entropy_loss(A2, Y) + l2_reg * (sum(w1(:).^2) + sum(w2(:).^2));
epoch_loss = epoch_loss + loss;
% 反向传播
[dW1, db1, dW2, db2] = backward_prop(X, Y, Z1, A1, Z2, A2, w1, w2);
% 添加L2正则化梯度
dW1 = dW1 + 2 * l2_reg * w1;
dW2 = dW2 + 2 * l2_reg * w2;
% 使用动量更新权重
w1_momentum = momentum * w1_momentum - lr * dW1;
w1 = w1 + w1_momentum;
b1_momentum = momentum * b1_momentum - lr * db1;
b1 = b1 + b1_momentum;
w2_momentum = momentum * w2_momentum - lr * dW2;
w2 = w2 + w2_momentum;
b2_momentum = momentum * b2_momentum - lr * db2;
b2 = b2 + b2_momentum;
end
% 计算平均损失
epoch_loss = epoch_loss / num_batches;
train_loss_history(epoch) = epoch_loss;
% 验证集评估
[val_loss, val_acc] = evaluate(val_data, val_labels, w1, b1, w2, b2, l2_reg);
val_loss_history(epoch) = val_loss;
val_acc_history(epoch) = val_acc;
fprintf('Epoch %d/%d: 训练损失 = %.4f | 验证损失 = %.4f | 验证准确率 = %.2f%%\n', ...
epoch, max_epochs, epoch_loss, val_loss, val_acc * 100);
% 检查早停条件
if val_loss < best_val_loss - min_delta
best_val_loss = val_loss;
best_epoch = epoch;
epochs_without_improvement = 0;
% 保存最佳参数
best_w1 = w1;
best_b1 = b1;
best_w2 = w2;
best_b2 = b2;
else
epochs_without_improvement = epochs_without_improvement + 1;
end
% 早停
if epochs_without_improvement >= patience
fprintf('早停在 Epoch %d\n', epoch);
break;
end
end
% 截断历史记录
train_loss_history = train_loss_history(1:epoch);
val_loss_history = val_loss_history(1:epoch);
val_acc_history = val_acc_history(1:epoch);
fprintf('最佳验证损失 %.4f 在 Epoch %d\n', best_val_loss, best_epoch);
end
%% 前向传播
function [Z1, A1, Z2, A2] = forward_prop(X, w1, b1, w2, b2)
% 第一层 (隐藏层)
Z1 = w1 * X + b1;
A1 = relu(Z1);
% 第二层 (输出层)
Z2 = w2 * A1 + b2;
A2 = softmax(Z2);
end
%% ReLU激活函数
function y = relu(x)
y = max(0, x);
end
%% ReLU导数
function y = relu_derivative(x)
y = double(x > 0);
end
%% Softmax函数
function y = softmax(x)
exp_x = exp(x - max(x, [], 1)); % 防止溢出
y = exp_x ./ sum(exp_x, 1);
end
%% 交叉熵损失
function loss = cross_entropy_loss(y_pred, y_true)
% 添加小值防止log(0)
epsilon = 1e-8;
loss = -sum(y_true .* log(y_pred + epsilon), 'all') / size(y_pred, 2);
end
%% 反向传播
function [dW1, db1, dW2, db2] = backward_prop(X, Y, Z1, A1, Z2, A2, w1, w2)
m = size(X, 2);
% 输出层梯度
dZ2 = A2 - Y;
dW2 = (dZ2 * A1') / m;
db2 = sum(dZ2, 2) / m;
% 隐藏层梯度
dZ1 = (w2' * dZ2) .* relu_derivative(Z1);
dW1 = (dZ1 * X') / m;
db1 = sum(dZ1, 2) / m;
end
%% 评估函数
function [loss, accuracy] = evaluate(data, labels, w1, b1, w2, b2, l2_reg)
X = reshape(data, [], size(data, 4));
Y = full(ind2vec(double(labels)', num_classes));
% 前向传播
[~, ~, ~, A2] = forward_prop(X, w1, b1, w2, b2);
% 计算损失
loss = cross_entropy_loss(A2, Y) + l2_reg * (sum(w1(:).^2) + sum(w2(:).^2));
% 计算准确率
[~, pred_idx] = max(A2);
[~, true_idx] = max(Y);
accuracy = mean(pred_idx == true_idx);
end
%% 预测函数
function predictions = predict(data, w1, b1, w2, b2)
X = reshape(data, [], size(data, 4));
[~, ~, ~, A2] = forward_prop(X, w1, b1, w2, b2);
predictions = A2;
end
%% 生成混淆矩阵
function generate_confusion_matrix(true_labels, predictions)
[~, pred_idx] = max(predictions);
[~, true_idx] = max(true_labels');
% 创建混淆矩阵
cm = confusionmat(true_idx, pred_idx);
% 计算准确率
accuracy = sum(diag(cm)) / sum(cm(:));
% 可视化混淆矩阵
figure('Position', [100, 100, 800, 700]);
confusionchart(cm, 0:35, ...
'RowSummary', 'row-normalized', ...
'ColumnSummary', 'column-normalized');
title(sprintf('混淆矩阵 (总体准确率: %.2f%%)', accuracy * 100));
% 设置类别标签 (字母和数字)
class_labels = [cellstr('0':'9'); cellstr('A':'Z')];
ax = gca;
ax.XTickLabel = class_labels;
ax.YTickLabel = class_labels;
end
%% 可视化错误样本
function visualize_errors(data, labels, predictions, max_errors)
[~, pred_idx] = max(predictions);
[~, true_idx] = max(labels');
% 找到错误样本
error_indices = find(pred_idx ~= true_idx);
if isempty(error_indices)
fprintf('没有错误样本!\n');
return;
end
% 限制显示数量
num_errors = min(length(error_indices), max_errors);
error_indices = error_indices(1:num_errors);
% 创建标签
class_labels = [cellstr('0':'9'); cellstr('A':'Z')];
figure('Name', '错误分类样本', 'Position', [100, 100, 800, 800]);
grid_size = ceil(sqrt(num_errors));
for i = 1:num_errors
idx = error_indices(i);
img = data(:, :, :, idx);
subplot(grid_size, grid_size, i);
imshow(img, []);
true_label = class_labels{true_idx(idx)};
pred_label = class_labels{pred_idx(idx)};
title(sprintf('真实: %s\n预测: %s', true_label, pred_label), 'Color', 'red');
end
end
%% 辅助函数:从向量获取索引
function idx = vec2ind(vec)
[~, idx] = max(vec);
end
程序功能说明
这个MATLAB程序实现了一个完整的BP神经网络系统,用于识别手写字符验证码。程序包含以下主要功能:
1. 数据预处理
- 数据集加载:从指定路径加载手写字符图像数据集
- 数据增强:通过旋转、平移和缩放生成更多训练样本
- 数据归一化:将像素值归一化到[0, 1]范围
- 数据集划分:划分为训练集、验证集和测试集
2. 神经网络构建
- 网络结构:输入层(784神经元) - 隐藏层(128神经元) - 输出层(36神经元)
- 参数初始化:使用Xavier方法初始化权重
- 激活函数:ReLU(隐藏层)和Softmax(输出层)
- 损失函数:交叉熵损失 + L2正则化
3. 训练过程
- 批处理训练:使用小批量梯度下降
- 动量优化:加速收敛并减少震荡
- 早停机制:防止过拟合
- 学习率调度:动态调整学习率
4. 评估与可视化
- 性能指标:计算准确率、损失值
- 混淆矩阵:显示各类别的分类性能
- 错误样本分析:可视化错误分类的样本
- 训练过程可视化:绘制损失和准确率曲线
5. 模型保存
- 训练完成后保存最佳模型参数
参考代码 BP神经网络识别手写字符验证码,包括10721张字母、数字样本 youwenfan.com/contentalb/46385.html
使用说明
1. 数据集准备
程序需要一个包含手写字符验证码的数据集,结构如下:
captcha_dataset/
├── 0/ # 数字0的样本
│ ├── 0_1.png
│ ├── 0_2.png
│ └── ...
├── 1/ # 数字1的样本
├── ...
├── A/ # 字母A的样本
├── B/ # 字母B的样本
└── ... # 其他字母
每个子文件夹对应一个字符类别,包含该字符的多个手写样本图像(PNG格式)。
2. 参数配置
程序开头提供了多个可配置参数:
% 数据集参数
dataset_path = 'captcha_dataset'; % 数据集路径
image_size = [28, 28]; % 图像缩放尺寸
% 神经网络参数
hidden_layer_size = 128; % 隐藏层神经元数量
max_epochs = 100; % 最大训练轮数
batch_size = 64; % 批处理大小
learning_rate = 0.01; % 学习率
% 数据增强参数
augment_data = true; % 是否启用数据增强
rotation_range = 15; % 旋转角度范围(度)
shift_range = 0.1; % 平移范围(图像尺寸比例)
zoom_range = [0.9, 1.1]; % 缩放范围
3. 运行程序
直接运行整个MATLAB脚本,程序将自动执行以下步骤:
- 加载和预处理数据
- 构建神经网络
- 训练网络并监控性能
- 在测试集上评估模型
- 生成可视化结果
- 保存最佳模型
4. 结果解读
程序将生成多个可视化结果:
- 数据集样本:随机显示训练集中的样本图像
- 训练过程曲线:显示训练和验证损失、验证准确率
- 混淆矩阵:显示各类别的分类性能
- 错误样本:可视化错误分类的样本及其真实标签
算法原理
1. BP神经网络
BP(Backpropagation)神经网络是一种多层前馈神经网络,通过反向传播算法进行训练:
- 前向传播:输入数据通过网络计算输出
- 损失计算:计算预测输出与真实标签的差异
- 反向传播:计算梯度并更新权重
- 权重更新:使用梯度下降优化参数
2. 关键组件
- 激活函数:
- ReLU:$f(x) = \max(0, x)$,解决梯度消失问题
- Softmax:$S(z_i) = \frac{e^{zi}}{\sum{j=1}^K e^{z_j}}$,用于多分类输出
- 损失函数:交叉熵损失
$L = -\sum{i=1}^N \sum{j=1}^K y{ij} \log(\hat{y}{ij})$ - 优化算法:带动量的随机梯度下降
$v{t} = \mu v{t-1} - \eta \nabla_\theta J(\thetat)$
$\theta{t+1} = \theta_t + v_t$ - 正则化:L2正则化防止过拟合
$J_{\text{reg}} = J + \lambda \sum \theta^2$
3. 数据增强
为提高模型泛化能力,使用多种数据增强技术:
- 随机旋转:±15度范围内旋转图像
- 随机平移:水平和垂直方向平移图像
- 随机缩放:90%-110%范围内缩放图像
性能优化技巧
- 学习率调度:
% 每个epoch后降低学习率
if epoch > 30
learning_rate = learning_rate * 0.95;
end
- 批量归一化:
% 在隐藏层后添加批量归一化
function [Z, norm_Z] = batch_norm(Z)
mu = mean(Z, 2);
sigma = std(Z, 0, 2);
norm_Z = (Z - mu) ./ (sigma + 1e-8);
Z = gamma * norm_Z + beta; % 可学习参数
end
- 自适应优化器:
% 使用Adam优化器替代SGD
function [w, m, v] = adam_update(w, grad, m, v, t, lr)
beta1 = 0.9;
beta2 = 0.999;
epsilon = 1e-8;
m = beta1 * m + (1 - beta1) * grad;
v = beta2 * v + (1 - beta2) * (grad.^2);
m_hat = m / (1 - beta1^t);
v_hat = v / (1 - beta2^t);
w = w - lr * m_hat ./ (sqrt(v_hat) + epsilon);
end
- 迁移学习:
% 使用预训练模型提取特征
pretrained_net = alexnet;
features = activations(pretrained_net, train_data, 'fc7');
扩展应用
多字符验证码识别:
- 添加字符分割模块
- 对每个字符单独识别
- 组合识别结果
卷积神经网络(CNN):
% 使用CNN替代全连接网络
layers = [
imageInputLayer([28 28 1])
convolution2dLayer(3, 32, 'Padding', 'same')
batchNormalizationLayer
reluLayer
maxPooling2dLayer(2, 'Stride', 2)
convolution2dLayer(3, 64, 'Padding', 'same')
batchNormalizationLayer
reluLayer
fullyConnectedLayer(128)
reluLayer
fullyConnectedLayer(36)
softmaxLayer
classificationLayer];
- 实时识别系统:
- 集成摄像头输入
- 添加图像预处理模块
- 开发GUI界面