应用场景
竖排繁体文字识别系统主要适用于以下场景:
古籍文献数字化:古代书籍多为竖排繁体,需要转换为现代横排格式
港澳台地区文档处理:这些地区仍保留竖排繁体书写习惯
书法作品识别:传统书法作品多为竖排繁体
历史档案整理:民国及更早时期的档案多为竖排繁体
学术研究:研究古代文献时需要将竖排繁体转为可编辑文本
工具下载:
咕嘎竖排繁体简体中文图片OCR文字识别专用版
百度网盘:https://pan.baidu.com/s/1eH5IKRbNTD5JSkcIXNJTcw?pwd=8888
腾讯云盘:https://share.weiyun.com/tUsrbtHp
原文参考:
https://mp.weixin.qq.com/s/D8QARsz0xvWRaAfaJtLqKQ
详细代码实现
- 图像预处理
import cv2
import numpy as np
from PIL import Image
def preprocess_image(image_path):
# 读取图像
img = cv2.imread(image_path)
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 二值化处理
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 去噪
denoised = cv2.fastNlMeansDenoising(binary, h=10)
# 边缘增强
kernel = np.ones((2, 2), np.uint8)
enhanced = cv2.morphologyEx(denoised, cv2.MORPH_CLOSE, kernel)
return enhanced
竖排文字检测与旋转
def detect_text_direction(image):使用OpenCV检测文字方向
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
如果角度小于15度,认为是横排,需要旋转90度
if abs(angle) < 15:
return 90
return 0
def rotate_image(image, angle):
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
- 使用咕嘎OCR进行竖排繁体识别
import requests
import json
import base64
def gugu_ocr_vertical(image_path, lang='zh-Hant'):
# 读取并预处理图像
img = preprocess_image(image_path)
# 检测文字方向并旋转
angle = detect_text_direction(img)
rotated_img = rotate_image(img, angle)
# 转换为base64
_, buffer = cv2.imencode('.png', rotated_img)
img_base64 = base64.b64encode(buffer).decode('utf-8')
# 调用咕嘎OCR API
url = "https://api.guguocr.com/v1/recognize"
headers = {'Content-Type': 'application/json'}
payload = {
'image': img_base64,
'lang': lang,
'vertical': True, # 指定为竖排文字
'config': {
'preserve_layout': False,
'output_format': 'plain'
}
}
response = requests.post(url, headers=headers, data=json.dumps(payload))
result = response.json()
if result['code'] == 200:
return result['data']['text']
else:
raise Exception(f"OCR识别失败: {result['message']}")
竖排转横排处理
def vertical_to_horizontal(text):竖排文本通常是按列排列,需要转换为行排列
lines = text.split('\n')
max_len = max(len(line) for line in lines)填充各行使其长度一致
padded_lines = [line.ljust(max_len) for line in lines]
转置矩阵实现竖排转横排
horizontal_text = '\n'.join(
''.join(padded_lines[row][col] for row in range(len(padded_lines))) for col in range(max_len)
)
return horizontal_text
- 繁体转简体
from opencc import OpenCC
def traditional_to_simplified(text):
cc = OpenCC('t2s') # 繁体转简体
return cc.convert(text)
主流程整合
def process_vertical_text(image_path, output_format='horizontal_simplified'):1. OCR识别
traditional_text = gugu_ocr_vertical(image_path)
2. 竖排转横排
if 'horizontal' in output_format:
text = vertical_to_horizontal(traditional_text)
else:
text = traditional_text
3. 繁体转简体
if 'simplified' in output_format:
text = traditional_to_simplified(text)
return text
优化与总结
优化策略
性能优化:
实现本地缓存机制,避免重复处理相同图片
使用多线程处理批量图片
对小型图片进行适当放大以提高识别率
识别率优化:
结合多种OCR引擎结果进行投票选择
实现后处理校正算法,基于统计语言模型修正识别错误
针对特定古籍字体训练专用模型
用户体验优化:
添加实时预览功能,显示识别结果与原始图片对比
实现拖拽上传和多文件批量处理
添加历史记录功能,方便用户查看之前的处理记录
完整示例代码
import cv2
import numpy as np
import requests
import json
import base64
from PIL import Image
from opencc import OpenCC
class VerticalTextOCR:
def init(self):
self.cc = OpenCC('t2s')
def preprocess_image(self, image_path):
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
denoised = cv2.fastNlMeansDenoising(binary, h=10)
kernel = np.ones((2, 2), np.uint8)
enhanced = cv2.morphologyEx(denoised, cv2.MORPH_CLOSE, kernel)
return enhanced
def detect_text_direction(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
return 90 if abs(angle) < 15 else 0
def rotate_image(self, image, angle):
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
def gugu_ocr(self, image_path, lang='zh-Hant'):
img = self.preprocess_image(image_path)
angle = self.detect_text_direction(img)
rotated_img = self.rotate_image(img, angle)
_, buffer = cv2.imencode('.png', rotated_img)
img_base64 = base64.b64encode(buffer).decode('utf-8')
url = "https://api.guguocr.com/v1/recognize"
headers = {'Content-Type': 'application/json'}
payload = {
'image': img_base64,
'lang': lang,
'vertical': True,
'config': {
'preserve_layout': False,
'output_format': 'plain'
}
}
response = requests.post(url, headers=headers, data=json.dumps(payload))
result = response.json()
if result['code'] == 200:
return result['data']['text']
else:
raise Exception(f"OCR识别失败: {result['message']}")
def vertical_to_horizontal(self, text):
lines = text.split('\n')
max_len = max(len(line) for line in lines)
padded_lines = [line.ljust(max_len) for line in lines]
horizontal_text = '\n'.join(
''.join(padded_lines[row][col] for row in range(len(padded_lines)))
for col in range(max_len)
)
return horizontal_text
def process(self, image_path, output_format='horizontal_simplified'):
traditional_text = self.gugu_ocr(image_path)
if 'horizontal' in output_format:
text = self.vertical_to_horizontal(traditional_text)
else:
text = traditional_text
if 'simplified' in output_format:
text = self.cc.convert(text)
return text
使用示例
if name == "main":
ocr = VerticalTextOCR()
result = ocr.process("ancient_book_page.png")
print("识别结果:")
print(result)
总结
本系统实现了竖排繁体图片文字的识别、旋转、横排转换和繁简转换的全流程处理。
关键点包括:
使用图像预处理技术提高OCR识别率自动检测文字方向并进行适当旋转利用咕嘎OCR的竖排文字识别能力实现竖排到横排的矩阵转换算法整合OpenCC实现高质量的繁简转换未来可进一步优化的方向包括:添加深度学习模型来提高古籍异体字的识别率、实现更智能的排版保留功能、开发桌面版和移动端应用、添加批处理和自动化工作流功能、此系统特别适合古籍数字化、历史研究、跨地区文档处理等场景,能够有效提高竖排繁体文字的处理效率。