# 粤港澳大湾区（黄埔）国际算法算例大赛-古籍文档图像识别与分析（下）

## 2.训练配置

ch_PP-OCRv3_det_cml.yml

Global:
character_dict_path: ../mb.txt #自定义字典
debug: false
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/ch_PP-OCR_v3_det/
save_epoch_step: 100
eval_batch_step:
- 0
- 400
cal_metric_during_train: false
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: doc/imgs_en/img_10.jpg
save_res_path: ./checkpoints/det_db/predicts_db.txt
distributed: true
Architecture:
name: DistillationModel
algorithm: Distillation
model_type: det
Models:
Student:
pretrained:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: true
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
k: 50
Student2:
pretrained:
model_type: det
algorithm: DB
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: true
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
k: 50
Teacher:
pretrained:
freeze_params: true
return_all_feats: false
model_type: det
algorithm: DB
Backbone:
name: ResNet_vd
in_channels: 3
layers: 50
Neck:
name: LKPAN
out_channels: 256
kernel_list: [7,2,2]
k: 50
Loss:
name: CombinedLoss
loss_config_list:
weight: 1.0
model_name_pairs:
- ["Student", "Teacher"]
- ["Student2", "Teacher"]
key: maps
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
beta: 10
ohem_ratio: 3
- DistillationDMLLoss:
model_name_pairs:
- ["Student", "Student2"]
maps_name: "thrink_maps"
weight: 1.0
model_name_pairs: ["Student", "Student2"]
key: maps
- DistillationDBLoss:
weight: 1.0
model_name_list: ["Student", "Student2"]
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
beta: 10
ohem_ratio: 3
Optimizer:
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
warmup_epoch: 2
regularizer:
name: L2
factor: 5.0e-05
PostProcess:
name: DistillationDBPostProcess
model_name: ["Student"]
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
Metric:
name: DistillationMetric
base_metric_name: DetMetric
main_indicator: hmean
key: "Student"
# 数据集
Train:
dataset:
name: SimpleDataSet
data_dir: /home/aistudio/dataset/train/image
label_file_list:
- /home/aistudio/dataset/train/label.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- DetLabelEncode: null
- CopyPaste:
- IaaAugment:
augmenter_args:
- type: Fliplr
args:
p: 0.5
- type: Affine
args:
rotate:
- -10
- 10
- type: Resize
args:
size:
- 0.5
- 3
- EastRandomCropData:
size:
- 960
- 960
max_tries: 50
keep_ratio: true
- MakeBorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- MakeShrinkMap:
shrink_ratio: 0.4
min_text_size: 8
- NormalizeImage:
scale: 1./255.
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- image
- threshold_map
- shrink_map
shuffle: true
drop_last: false
batch_size_per_card: 12
num_workers: 4
# 数据集
Eval:
dataset:
name: SimpleDataSet
data_dir: /home/aistudio/dataset/train/image
label_file_list:
- /home/aistudio/dataset/train/label.txt
transforms:
img_mode: BGR
channel_first: False
- DetLabelEncode: # Class handling label
- DetResizeForTest:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
num_workers: 2
# 拷贝配置到对应目录
!cp ~/ch_PP-OCRv3_det_cml.yml ~/PaddleOCR/configs/det/ch_PP-OCRv3/
%export CUDA_VISIBLE_DEVICES='0,1,2,3'
# !python tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Optimizer.base_lr=0.0001
!python3 -m paddle.distributed.launch --ips="localhost" --gpus '0,1,2,3' tools/train.py -c  configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Optimizer.base_lr=0.0001
[2022/09/13 20:07:57] ppocr INFO: epoch: [121/500], global_step: 2050, lr: 0.000869, dila_dbloss_Student_Teacher: 1.288603, dila_dbloss_Student2_Teacher: 1.283752, loss: 6.739372, dml_thrink_maps_0: 0.002895, db_Student_loss_shrink_maps: 1.410274, db_Student_loss_threshold_maps: 0.388946, db_Student_loss_binary_maps: 0.280807, db_Student2_loss_shrink_maps: 1.414650, db_Student2_loss_threshold_maps: 0.391234, db_Student2_loss_binary_maps: 0.281578, avg_reader_cost: 7.17047 s, avg_batch_cost: 11.48100 s, avg_samples: 12.0, ips: 1.04521 samples/s, eta: 20:44:22

# 五、识别数据集准备

# ppocr/utils/gen_label.py
# convert the official gt to rec_gt_label.txt
!python ppocr/utils/gen_label.py --mode="rec" --input_path="../dataset/train/train.txt" --output_label="../dataset/train/train_rec_gt_label.txt"
!python ppocr/utils/gen_label.py --mode="rec" --input_path="../dataset/train/eval.txt" --output_label="../dataset/train/eval_rec_gt_label.txt"

# 六、识别模型训练

## 1.预训练模型下载

%cd ~/PaddleOCR/pretrain_models
!tar -xvf  ch_PP-OCRv3_rec_train.tar

## 2.配置训练参数

Global:
debug: false
use_gpu: true
epoch_num: 800
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_ppocr_v3_distillation
save_epoch_step: 3
eval_batch_step: [0, 2000]
cal_metric_during_train: true
# 预训练模型
pretrained_model: pretrain_models/ch_PP-OCRv3_rec_train//best_accuracy.pdparams
checkpoints:
save_inference_dir:
use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg
# 修改码表
character_dict_path: ../mb.txt
max_text_length: &max_text_length 25
infer_mode: false
use_space_char: true
distributed: true
save_res_path: ./output/rec/predicts_ppocrv3_distillation.txt
Optimizer:
beta1: 0.9
beta2: 0.999
lr:
name: Piecewise
decay_epochs : [700, 800]
values : [0.0005, 0.00005]
warmup_epoch: 5
regularizer:
name: L2
factor: 3.0e-05
Architecture:
model_type: &model_type "rec"
name: DistillationModel
algorithm: Distillation
Models:
Teacher:
pretrained:
freeze_params: false
return_all_feats: true
model_type: *model_type
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [1, 2]
last_pool_type: avg
Neck:
name: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
fc_decay: 0.00001
enc_dim: 512
max_text_length: *max_text_length
Student:
pretrained:
freeze_params: false
return_all_feats: true
model_type: *model_type
algorithm: SVTR
Transform:
Backbone:
name: MobileNetV1Enhance
scale: 0.5
last_conv_stride: [1, 2]
last_pool_type: avg
Neck:
name: svtr
dims: 64
depth: 2
hidden_dims: 120
use_guide: True
fc_decay: 0.00001
enc_dim: 512
max_text_length: *max_text_length
Loss:
name: CombinedLoss
loss_config_list:
- DistillationDMLLoss:
weight: 1.0
act: "softmax"
use_log: true
model_name_pairs:
- ["Student", "Teacher"]
name: dml_ctc
- DistillationDMLLoss:
weight: 0.5
act: "softmax"
use_log: true
model_name_pairs:
- ["Student", "Teacher"]
name: dml_sar
- DistillationDistanceLoss:
weight: 1.0
mode: "l2"
model_name_pairs:
- ["Student", "Teacher"]
key: backbone_out
- DistillationCTCLoss:
weight: 1.0
model_name_list: ["Student", "Teacher"]
- DistillationSARLoss:
weight: 1.0
model_name_list: ["Student", "Teacher"]
PostProcess:
name: DistillationCTCLabelDecode
model_name: ["Student", "Teacher"]
Metric:
name: DistillationMetric
base_metric_name: RecMetric
main_indicator: acc
key: "Student"
ignore_space: False
# 修改数据及
Train:
dataset:
name: SimpleDataSet
data_dir: /home/aistudio/dataset/train/image
ext_op_transform_idx: 1
label_file_list:
- /home/aistudio/dataset/train/train_rec_gt_label.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecConAug:
prob: 0.5
ext_data_num: 2
image_shape: [48, 320, 3]
- RecAug:
- MultiLabelEncode:
- RecResizeImg:
image_shape: [3, 48, 320]
- KeepKeys:
keep_keys:
- image
- label_ctc
- label_sar
- length
- valid_ratio
shuffle: true
batch_size_per_card: 128
drop_last: true
num_workers: 4
# 修改数据及
Eval:
dataset:
name: SimpleDataSet
data_dir: /home/aistudio/dataset/train/image
ext_op_transform_idx: 1
label_file_list:
- /home/aistudio/dataset/train/eval_rec_gt_label.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- MultiLabelEncode:
- RecResizeImg:
image_shape: [3, 48, 320]
- KeepKeys:
keep_keys:
- image
- label_ctc
- label_sar
- length
- valid_ratio
shuffle: false
drop_last: false
batch_size_per_card: 128
num_workers: 4
# 拷贝配置好的文件到指定位置
%cd ~
!cp  ~/ch_PP-OCRv3_rec_distillation.yml ~/PaddleOCR/configs/rec/PP-OCRv3/

## 3.模型训练

%cd ~/PaddleOCR/
#多卡训练，通过--gpus参数指定卡号
!python -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml

# 七、联推理串

## 1.模型导出

# 导出检测模型
!python tools/export_model.py -c  configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml  -o Global.pretrained_model=./my_exps/det/best_accuracy Global.save_inference_dir=./inference/det
# 导出识别模型
!python tools/export_model.py -c configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml -o Global.pretrained_model=./my_exps/rec/best_accuracy Global.save_inference_dir=./inference/rec

## 2.联推理串

! python tools/infer/predict_system.py \
--det_model_dir=inference/det \
--rec_model_dir=inference/rec \
--image_dir="/home/aistudio/dataset/train/image/image_0.jpg" \
--rec_image_shape=3,48,320
# show img
plt.figure(figsize=(10, 8))
plt.imshow(img)

|
15天前
|

|
4天前
|

【眼疾病识别】图像识别+深度学习技术+人工智能+卷积神经网络算法+计算机课设+Python+TensorFlow

30 9
|
14天前
|

【7月更文挑战第23天】在Python编程中，掌握算法复杂度—时间与空间消耗，是提升程序效能的关键。算法如冒泡排序（$O(n^2)$时间/$O(1)$空间），或使用Python内置函数找最大值（$O(n)$时间），需精确诊断与优化。数据结构如哈希表可将查找从$O(n)$降至$O(1)$。运用timeit模块评估性能，深入理解数据结构和算法，使Python代码更高效。持续实践与学习，精通复杂度管理。
35 9
|
15天前
|

【7月更文挑战第22天】在大数据领域，Python算法效率至关重要。本文深入解析时间与空间复杂度，用大O表示法衡量执行时间和存储需求。通过冒泡排序(O(n^2)时间，O(1)空间)与快速排序(平均O(n log n)时间，O(log n)空间)实例，展示Python代码实现与复杂度分析。策略包括算法适配、分治法应用及空间换取时间优化。掌握这些，可提升大数据处理能力，持续学习实践是关键。
29 1
|
15天前
|

【7月更文挑战第22天】在编程中，时间复杂度和空间复杂度是评估算法效率的关键。时间复杂度衡量执行时间随数据量增加的趋势，空间复杂度关注算法所需的内存。在实际应用中，开发者需权衡两者，根据场景选择合适算法，如快速排序（平均O(n log n)，最坏O(n^2)，空间复杂度O(log n)至O(n)）适合大规模数据，而归并排序（稳定O(n log n)，空间复杂度O(n)）在内存受限或稳定性要求高时更有利。通过优化，如改进基准选择或减少复制，可平衡这两者。理解并智慧地选择算法是提升代码效率的关键。
23 1
|
1天前
|

【数据挖掘】PCA 主成分分析算法过程及原理讲解

7 0
|
1天前
|

【数据挖掘】十大算法之PageRank连接分析算法

6 0
|
26天前
|

【7月更文挑战第11天】快速排序是编程基础，以O(n log n)时间复杂度和原址排序著称。其核心是“分而治之”，通过选择基准元素分割数组并递归排序两部分。优化包括：选择中位数作基准、尾递归优化、小数组用简单排序。以下是一个考虑优化的Python实现片段，展示了随机基准选择。通过实践和优化，能提升算法技能。**
28 3
|
1天前
|

【8月更文挑战第5天】随着人工智能技术的飞速发展，深度学习已成为图像识别领域的核心技术之一。本文将介绍深度学习在图像识别中的基本原理和应用实例，并通过代码示例展示如何利用深度学习进行图像识别任务的实现。
11 4
|
2天前
|

【8月更文挑战第4天】本文将探讨深度学习技术在图像识别领域的应用，并通过实际案例展示其在解决现实问题中的有效性。我们将介绍一些基本的深度学习模型和算法，并提供代码示例来说明如何实现这些技术。通过本文的学习，读者将能够理解深度学习技术在图像识别中的重要性和应用价值。
10 5