在文字识别ocr中,这种json格式的东西怎么转换成txt或者work文档?HTTP/1.1 200 OK [Date: Mon, 25 Sep 2023 05:17:28 GMT, Content-Type: application/json;charset=UTF-8, Content-Length: 2519, Connection: keep-alive, Keep-Alive: timeout=25, Vary: Accept-Encoding, Vary: Accept-Encoding, Server: Tengine, X-Ca-Request-Id: 75574F4F-9B3A-4A9C-A460-C883475FC714]
13:17:28.966 [main] DEBUG org.apache.http.wire - << "{"page_list":[{"angle":0,"doc_index":1,"height":524,"orgHeight":524,"orgWidth":761,"page_id":0,"subject_list":[{"content_list_info":[{"doc_index":1,"pos":[{"x":6,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":9,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":5,"y":0},{"x":2,"y":0},{"x":2,"y":0},{"x":6,"y":0}],"word":"5.[0xe8][0x8b][0xa5]"},{"pos":[{"x":2,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":2,"y":0}],"word":"x+y=3[0xef][0xbc][0x8c]xy=1[0xef][0xbc][0x8c]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"[0xef][0xbc][0x8c][0xe5][0x88][0x99]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$x ^ { 2 } + y ^ { 2 } =$$"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"."},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$\left( x - y \right) ^ { 2 } =$$"}],"text":"5.[0xe8][0x8b][0xa5] x+y=3[0xef][0xbc][0x8c]xy=1[0xef][0xbc][0x8c] [0xef][0xbc][0x8c][0xe5][0x88][0x99] $$x ^ { 2 } + y ^ { 2 } =$$ . $$\left( x - y \right) ^ { 2 } =$$ "},{"content_list_info":[{"doc_index":1,"pos":[{"x":7,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":9,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":8,"y":0},{"x":2,"y":0},{"x":3,"y":0},{"x":9,"y":0}],"word":"6.[0xe8][0x8b][0xa5]"},{"pos":[{"x":2,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":3,"y":0}],"word":"$$\left( x + y \right) ^ { 2 } = 9 [0xef][0xbc][0x8c] \left( x - y \right) ^ { 2 } = 5 [0xef][0xbc][0x8c]$$"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"[0xe5][0x88][0x99]xy="}],"text":"6.[0xe8][0x8b][0xa5] $$\left( x + y \right) ^ { 2 } = 9 [0xef][0xbc][0x8c] \left( x - y \right) ^ { 2 } = 5 [0xef][0xbc][0x8c]$$ [0xe5][0x88][0x99]xy= "},{"content_list_info":[{"doc_index":1,"pos":[{"x":11,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":13,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":11,"y":0},{"x":4,"y":0},{"x":4,"y":0},{"x":12,"y":0}],"word":"7.[0xe8][0x8b][0xa5]"},{"pos":[{"x":3,"y":0},{"x":1,"y":0},{"x":1,"y":0},{"x":4,"y":0}],"word":"$$x ^ { 2 } + k x + 1 6$$"},{"pos":[{"x":1,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":1,"y":0}],"word":"[0xe6][0x98][0xaf][0xe5][0xae][0x8c][0xe5][0x85][0xa8][0xe5][0xb9][0xb3][0xe6][0x96][0xb9][0xe5][0x85][0xac][0xe5][0xbc][0x8f][0xef][0xbc][0x8c][0xe5][0x88][0x99][0xe6][0x95][0xb4][0xe6][0x95][0xb0]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"k="}],"text":"7.[0xe8][0x8b][0xa5] $$x ^ { 2 } + k x + 1 6$$ [0xe6][0x98][0xaf][0xe5][0xae][0x8c][0xe5][0x85][0xa8][0xe5][0xb9][0xb3][0xe6][0x96][0xb9][0xe5][0x85][0xac][0xe5][0xbc][0x8f][0xef][0xbc][0x8c][0xe5][0x88][0x99][0xe6][0x95][0xb4][0xe6][0x95][0xb0] k= "},{"content_list_info":[{"doc_index":1,"pos":[{"x":15,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":16,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":14,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":15,"y":0}],"word":"8.[0xe8][0xaf][0xa5][0xe5][0x9b][0xbe][0xe5][0x8f][0xaf][0xe4][0xbb][0xa5][0xe7][0x94][0xa8][0xe6][0x9d][0xa5][0xe9][0xaa][0x8c][0xe8][0xaf][0x81][0xe6][0x88][0x91][0xe4][0xbb][0xac][0xe5][0xad][0xa6][0xe8][0xbf][0x87][0xe7][0x9a][0x84][0xe4][0xb9][0x98][0xe6][0xb3][0x95][0xe5][0x85][0xac][0xe5][0xbc][0x8f]"}],"text":"8.[0xe8][0xaf][0xa5][0xe5][0x9b][0xbe][0xe5][0x8f][0xaf][0xe4][0xbb][0xa5][0xe7][0x94][0xa8][0xe6][0x9d][0xa5][0xe9][0xaa][0x8c][0xe8][0xaf][0x81][0xe6][0x88][0x91][0xe4][0xbb][0xac][0xe5][0xad][0xa6][0xe8][0xbf][0x87][0xe7][0x9a][0x84][0xe4][0xb9][0x98][0xe6][0xb3][0x95][0xe5][0x85][0xac][0xe5][0xbc][0x8f] "}],"width":761}],"requestId":"75574F4F-9B3A-4A9C-A460-C883475FC714"}"
13:17:28.967 [main] DEBUG org.apache.http.impl.conn.BasicClientConnectionManager - Releasing connection org.apache.http.impl.conn.ManagedClientConnectionImpl@19d37183
13:17:28.967 [main] DEBUG org.apache.http.impl.conn.BasicClientConnectionManager - Connection can be kept alive for 25000 MILLISECONDS
要将JSON格式的数据转换为txt或word文档,你需要首先解析JSON数据,然后将识别到的文本提取出来。以下是一个使用Python进行解析和转换的简单示例:
import json
# 假设这是你的JSON数据
json_data = '{"page_list":[{"angle":0,"doc_index":1,"height":524,"orgHeight":524,"orgWidth":761,"page_id":0,"subject_list":[{"content_list_info":[{"doc_index":1,"pos":[{"x":6,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":9,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":5,"y":0},{"x":2,"y":0},{"x":2,"y":0},{"x":6,"y":0}],"word":"5.[0xe8][0x8b][0xa5]"},{"pos":[{"x":2,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":2,"y":0}],"word":"x+y=3[0xef][0xbc][0x8c]xy=1[0xef][0xbc][0x8c]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"[0xef][0xbc][0x8c][0xe5][0x88][0x99]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$x ^ { 2 } + y ^ { 2 } =$$"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"."},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$\left( x - y \right) ^ { 2 } =$$"}],"text":"5.[0xe8][0x8b][0xa5] x+y=3[0xef][0xbc][0x8c]xy=1[0xef][0xbc][0x8c] [0xef][0xbc][0x8c][0xe5][0x88][0x99] $$x ^ { 2 } + y ^ { 2 } =$$ . $$\left( x - y \right) ^ { 2 } =$$ "},{"content_list_info":[{"doc_index":1,"pos":[{"x":7,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":9,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":8,"y":0},{"x":2,"y":0},{"x":3,"y":0},{"x":9,"y":0}],"word":"6.[0xe8][0x8b][0xa5]"},{"pos":[{"x":2,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":3,"y":0}],"word":"$$\left( x + y \right) ^ { 2 } = 9 [0xef][0xbc][0x8c] \left( x - y \right) ^ { 2 } = 5 [0xef][0xbc][0x8c]$$"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"[0xe5][0x88][0x99]xy="}],"text":"6.[0xe8][0x8b][0xa5]
"},{"content_list_info":[{"doc_index":1,"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}]}],"ids":[],"is_multipage":false,"prism_wordsInfo":[{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"[0xef][0xbc][0x8c][0xe5][0x88][0x99]"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$x ^ { 2 } + y ^ { 2 } =$$"},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"."},{"pos":[{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0},{"x":0,"y":0}],"word":"$$\left( x - y \right) ^ { 2 } =$$"}],"text":"[0xef][0xbc][0x8c][0xe5][0x88][0x99] $$x ^ { 2 } + y ^ { 2 } =$$ . $$\left( x - y \right) ^ { 2 } =$$ "}],"page_count":1,"total_page_count":1,"word_count":5}'
# 定义一个函数来转换JSON数据
def convert_json_to_txt(json_data):
text_list = []
# 遍历每一页的识别结果
for page in json_data['page_list']:
angle = page['angle']
doc_index = page['doc_index']
height = page['height']
orgHeight = page['orgHeight']
orgWidth = page['orgWidth']
page_id = page['page_id']
# 遍历每一行的识别结果
for line in page['subject_list']:
content_list_info = line['content_list_info']
ids = line['ids']
is_multipage = line['is_multipage']
# 遍历每一行的识别结果
for word in content_list_info:
pos = word['pos']
word_text = word['word']
text_list.append(f'{angle}.{doc_index}.{height}.{orgHeight}.{orgWidth}.{page_id}.{word_text}')
# 将识别结果合并为一个字符串
text = '
'.join(text_list)
# 将识别结果转换为txt文档
with open('recognized_text.txt', 'w', encoding='utf-8') as f:
f.write(text)
# 调用函数来转换JSON数据
convert_json_to_txt(json_data)
这个程序首先解析JSON数据,然后遍历每一页的识别结果,再遍历每一行的识别结果,最后将识别结果合并为一个字符串并保存为txt文档。
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。