1.文档参考地址
https://docmind.console.aliyun.com/file/largeModelVersion
2.文档智能解析
2.1样式效果如下
2.2jdk在线文档
https://help.aliyun.com/zh/document-mind/developer-reference/docstructure?spm=a2c4g.11186623.0.0.4d5810faBqxTpF
2.3使用python语言调用sdk
2.3.1下载相关依赖包
pip install alibabacloud_tea_openapi
pip install alibabacloud_docmind_api20220711==1.4.1
2.3.2配置身份认证
https://help.aliyun.com/zh/sdk/developer-reference/v2-manage-python-access-credentials?spm=a2c4g.11186623.0.i8
2.3.3如处理文件太大,导致耗时太久,需要以下配置
建立连接超时时间
config.connect_timeout = 60000;
读取资源超时时间
config.read_timeout = 60000;
2.3.4处理本地文件
```# -- coding: utf-8 --
import sys
from typing import List
from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_tea_util import models as util_models
from alibabacloud_credentials.client import Client as CredClient
def create_client() -> docmind_api20220711Client:
"""
@return: Client
@throws Exception
"""
# 调用接口时,程序直接访问凭证,读取您的访问密钥(即AccessKey)并自动完成鉴权。
# 运行本示例前,请先完成步骤二:配置身份认证。
# 本示例使用默认配置文件方式,通过配置Credentials文件创建默认的访问凭证。
# 使用默认凭证初始化Credentials Client。
cred=CredClient()
config = open_api_models.Config(
# 通过Credentials获取配置中的AccessKey ID
access_key_id=cred.get_access_key_id(),
# 通过Credentials获取配置中的AccessKey Secret
access_key_secret=cred.get_access_key_secret()
)
# 访问的域名
config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
return docmind_api20220711Client(config)
def main() -> None:
client = create_client()
request = docmind_api20220711_models.SubmitDocStructureJobAdvanceRequest(
# file_url_object : 本地文件流
file_url_object=open("./example.pdf", "rb"),
# file_name :文件名称。名称必须包含文件类型
file_name='123.pdf',
# file_name_extension : 文件后缀格式。与文件名二选一
file_name_extension='pdf'
)
runtime = util_models.RuntimeOptions()
try:
# 复制代码运行请自行打印 API 的返回值
response = client.submit_doc_structure_job_advance(request, runtime)
# API返回值格式层级为 body -> data -> 具体属性。可根据业务需要打印相应的结果。如下示例为打印返回的业务id格式
# 获取属性值均以小写开头,
print(response.body.data.id)
except Exception as error:
# 如有需要,请打印 error
UtilClient.assert_as_string(error.message)
2.3.5处理在线文件
```# -*- coding: utf-8 -*-
import sys
from typing import List
from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_credentials.client import Client as CredClient
def create_client() -> docmind_api20220711Client:
"""
@return: Client
@throws Exception
"""
# 调用接口时,程序直接访问凭证,读取您的访问密钥(即AccessKey)并自动完成鉴权。
# 运行本示例前,请先完成步骤二:配置身份认证。
# 本示例使用默认配置文件方式,通过配置Credentials文件创建默认的访问凭证。
# 使用默认凭证初始化Credentials Client。
cred=CredClient()
config = open_api_models.Config(
# 通过Credentials获取配置中的AccessKey ID
access_key_id=cred.get_access_key_id(),
# 通过Credentials获取配置中的AccessKey Secret
access_key_secret=cred.get_access_key_secret()
)
# 访问的域名
config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
return docmind_api20220711Client(config)
def main() -> None:
client = create_client()
request = docmind_api20220711_models.SubmitDocStructureJobRequest(
# file_url : 文件url地址
file_url='https://example.com/example.pdf',
# file_name :文件名称。名称必须包含文件类型
file_name='123.pdf',
# file_name_extension : 文件后缀格式。与文件名二选一
file_name_extension='pdf'
)
try:
# 复制代码运行请自行打印 API 的返回值
response = client.submit_doc_structure_job(request)
# API返回值格式层级为 body -> data -> 具体属性。可根据业务需要打印相应的结果。如下示例为打印返回的业务id格式
# 获取属性值均以小写开头,
print(response.body.data.id)
except Exception as error:
# 如有需要,请打印 error
UtilClient.assert_as_string(error.message)