PyMuPDF 1.24.4 中文文档(四)(4)https://developer.aliyun.com/article/1559457
如何显示来自 JSON 数据的列表
此示例使用一些 JSON 数据输入来填充故事,还包含一些视觉文本格式化,并显示如何添加链接。
文件:
docs/samples/json-example.py
查看示例
import pymupdf import json my_json = """ [ { "name" : "Five-storied Pagoda", "temple" : "Rurikō-ji", "founded" : "middle Muromachi period, 1442", "region" : "Yamaguchi, Yamaguchi", "position" : "34.190181,131.472917" }, { "name" : "Founder's Hall", "temple" : "Eihō-ji", "founded" : "early Muromachi period", "region" : "Tajimi, Gifu", "position" : "35.346144,137.129189" }, { "name" : "Fudōdō", "temple" : "Kongōbu-ji", "founded" : "early Kamakura period", "region" : "Kōya, Wakayama", "position" : "34.213103,135.580397" }, { "name" : "Goeidō", "temple" : "Nishi Honganji", "founded" : "Edo period, 1636", "region" : "Kyoto", "position" : "34.991394,135.751689" }, { "name" : "Golden Hall", "temple" : "Murō-ji", "founded" : "early Heian period", "region" : "Uda, Nara", "position" : "34.536586819357986,136.0395548452301" }, { "name" : "Golden Hall", "temple" : "Fudō-in", "founded" : "late Muromachi period, 1540", "region" : "Hiroshima", "position" : "34.427014,132.471117" }, { "name" : "Golden Hall", "temple" : "Ninna-ji", "founded" : "Momoyama period, 1613", "region" : "Kyoto", "position" : "35.031078,135.713811" }, { "name" : "Golden Hall", "temple" : "Mii-dera", "founded" : "Momoyama period, 1599", "region" : "Ōtsu, Shiga", "position" : "35.013403,135.852861" }, { "name" : "Golden Hall", "temple" : "Tōshōdai-ji", "founded" : "Nara period, 8th century", "region" : "Nara, Nara", "position" : "34.675619,135.784842" }, { "name" : "Golden Hall", "temple" : "Tō-ji", "founded" : "Momoyama period, 1603", "region" : "Kyoto", "position" : "34.980367,135.747686" }, { "name" : "Golden Hall", "temple" : "Tōdai-ji", "founded" : "middle Edo period, 1705", "region" : "Nara, Nara", "position" : "34.688992,135.839822" }, { "name" : "Golden Hall", "temple" : "Hōryū-ji", "founded" : "Asuka period, by 693", "region" : "Ikaruga, Nara", "position" : "34.614317,135.734458" }, { "name" : "Golden Hall", "temple" : "Daigo-ji", "founded" : "late Heian period", "region" : "Kyoto", "position" : "34.951481,135.821747" }, { "name" : "Keigū-in Main Hall", "temple" : "Kōryū-ji", "founded" : "early Kamakura period, before 1251", "region" : "Kyoto", "position" : "35.015028,135.705425" }, { "name" : "Konpon-chūdō", "temple" : "Enryaku-ji", "founded" : "early Edo period, 1640", "region" : "Ōtsu, Shiga", "position" : "35.070456,135.840942" }, { "name" : "Korō", "temple" : "Tōshōdai-ji", "founded" : "early Kamakura period, 1240", "region" : "Nara, Nara", "position" : "34.675847,135.785069" }, { "name" : "Kōfūzō", "temple" : "Hōryū-ji", "founded" : "early Heian period", "region" : "Ikaruga, Nara", "position" : "34.614439,135.735428" }, { "name" : "Large Lecture Hall", "temple" : "Hōryū-ji", "founded" : "middle Heian period, 990", "region" : "Ikaruga, Nara", "position" : "34.614783,135.734175" }, { "name" : "Lecture Hall", "temple" : "Zuiryū-ji", "founded" : "early Edo period, 1655", "region" : "Takaoka, Toyama", "position" : "36.735689,137.010019" }, { "name" : "Lecture Hall", "temple" : "Tōshōdai-ji", "founded" : "Nara period, 763", "region" : "Nara, Nara", "position" : "34.675933,135.784842" }, { "name" : "Lotus Flower Gate", "temple" : "Tō-ji", "founded" : "early Kamakura period", "region" : "Kyoto", "position" : "34.980678,135.746314" }, { "name" : "Main Hall", "temple" : "Akishinodera", "founded" : "early Kamakura period", "region" : "Nara, Nara", "position" : "34.703769,135.776189" } ] """ # the result is a Python dictionary: my_dict = json.loads(my_json) MEDIABOX = pymupdf.paper_rect("letter") # output page format: Letter WHERE = MEDIABOX + (36, 36, -36, -36) writer = pymupdf.DocumentWriter("json-example.pdf") # create the writer story = pymupdf.Story() body = story.body for i, entry in enumerate(my_dict): for attribute, value in entry.items(): para = body.add_paragraph() if attribute == "position": para.set_fontsize(10) para.add_link(f"www.google.com/maps/@{value},14z") else: para.add_span() para.set_color("#990000") para.set_fontsize(14) para.set_bold() para.add_text(f"{attribute} ") para.add_span() para.set_fontsize(18) para.add_text(f"{value}") body.add_horizontal_line() # This while condition will check a value from the Story `place` method # for whether all content for the story has been written (0), otherwise # more content is waiting to be written (1) more = 1 while more: device = writer.begin_page(MEDIABOX) # make new page more, _ = story.place(WHERE) story.draw(device) writer.end_page() # finish page writer.close() # close output file del story
使用备用的Story.write*()
函数
Story.write*()
函数提供了使用故事功能的另一种方法,无需调用代码来实现调用Story.place()
和Story.draw()
等的循环,代价是必须提供至少一个rectfn()
回调。
如何使用Story.write()
进行基本布局
此脚本将其自身源代码布局到每页的四个矩形中。
文件:
docs/samples/story-write.py
查看示例
""" Demo script for PyMuPDF's `Story.write()` method. This is a way of laying out a story into a PDF document, that avoids the need to write a loop that calls `story.place()` and `story.draw()`. Instead just a single function call is required, albeit with a `rectfn()` callback that returns the rectangles into which the story is placed. """ import html import pymupdf # Create html containing multiple copies of our own source code. # with open(__file__) as f: text = f.read() text = html.escape(text) html = f''' <!DOCTYPE html> <body> <h1>Contents of {__file__}</h1> <h2>Normal</h2> <pre> {text} </pre> <h2>Strong</h2> <strong> <pre> {text} </pre> </strong> <h2>Em</h2> <em> <pre> {text} </pre> </em> </body> ''' def rectfn(rect_num, filled): ''' We return four rectangles per page in this order: 1 3 2 4 ''' page_w = 800 page_h = 600 margin = 50 rect_w = (page_w - 3*margin) / 2 rect_h = (page_h - 3*margin) / 2 if rect_num % 4 == 0: # New page. mediabox = pymupdf.Rect(0, 0, page_w, page_h) else: mediabox = None # Return one of four rects in turn. rect_x = margin + (rect_w+margin) * ((rect_num // 2) % 2) rect_y = margin + (rect_h+margin) * (rect_num % 2) rect = pymupdf.Rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h) #print(f'rectfn(): rect_num={rect_num} filled={filled}. Returning: rect={rect}') return mediabox, rect, None story = pymupdf.Story(html, em=8) out_path = __file__.replace('.py', '.pdf') writer = pymupdf.DocumentWriter(out_path) story.write(writer, rectfn) writer.close()
此脚本动态创建 HTML 内容,基于具有非零.heading
值的 ElementPosition 项目添加内容部分。
文档开头是内容部分,因此对内容的修改可能会更改文档其余部分的页码,从而可能导致内容部分的页码不正确。
因此,脚本使用Story.write_stabilized()
重复布局,直到稳定。
文件:
docs/samples/story-write-stabilized.py
查看示例
""" Demo script for PyMuPDF's `pymupdf.Story.write_stabilized()`. `pymupdf.Story.write_stabilized()` is similar to `pymupdf.Story.write()`, except instead of taking a fixed html document, it does iterative layout of dynamically-generated html content (provided by a callback) to a `pymupdf.DocumentWriter`. For example this allows one to add a dynamically-generated table of contents section while ensuring that page numbers are patched up until stable. """ import textwrap import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' rect = pymupdf.Rect(10, 20, 290, 380) mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None def contentfn(positions): ''' Returns html content, with a table of contents derived from `positions`. ''' ret = '' ret += textwrap.dedent(''' <!DOCTYPE html> <body> <h2>Contents</h2> <ul> ''') # Create table of contents with links to all <h1..6> sections in the # document. for position in positions: if position.heading and (position.open_close & 1): text = position.text if position.text else '' if position.id: ret += f" <li><a href=\"#{position.id}\">{text}</a>\n" else: ret += f" <li>{text}\n" ret += f" <ul>\n" ret += f" <li>page={position.page_num}\n" ret += f" <li>depth={position.depth}\n" ret += f" <li>heading={position.heading}\n" ret += f" <li>id={position.id!r}\n" ret += f" <li>href={position.href!r}\n" ret += f" <li>rect={position.rect}\n" ret += f" <li>text={text!r}\n" ret += f" <li>open_close={position.open_close}\n" ret += f" </ul>\n" ret += '</ul>\n' # Main content. ret += textwrap.dedent(f''' <h1>First section</h1> <p>Contents of first section. <h1>Second section</h1> <p>Contents of second section. <h2>Second section first subsection</h2> <p>Contents of second section first subsection. <h1>Third section</h1> <p>Contents of third section. </body> ''') ret = ret.strip() with open(__file__.replace('.py', '.html'), 'w') as f: f.write(ret) return ret; out_path = __file__.replace('.py', '.pdf') writer = pymupdf.DocumentWriter(out_path) pymupdf.Story.write_stabilized(writer, contentfn, rectfn) writer.close()
此脚本与上述“如何使用 Story.write_stabilized()
”中描述的脚本类似,但生成的 PDF 还包含与原始 HTML 中的内部链接相对应的链接。
通过使用 Story.write_stabilized_links()
完成;这与 Story.write_stabilized()
稍有不同:
- 它不接受 DocumentWriter
writer
参数。 - 它返回一个 PDF Document 实例。
这样做的原因有些复杂;例如 [DocumentWriter 并不一定是 PDF 编写器,因此不适用于特定于 PDF 的 API。]
文件:
docs/samples/story-write-stabilized-links.py
查看示例
""" Demo script for PyMuPDF's `pymupdf.Story.write_stabilized_with_links()`. `pymupdf.Story.write_stabilized_links()` is similar to `pymupdf.Story.write_stabilized()` except that it creates a PDF `pymupdf.Document` that contains PDF links generated from all internal links in the original html. """ import textwrap import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' rect = pymupdf.Rect(10, 20, 290, 380) mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None def contentfn(positions): ''' Returns html content, with a table of contents derived from `positions`. ''' ret = '' ret += textwrap.dedent(''' <!DOCTYPE html> <body> <h2>Contents</h2> <ul> ''') # Create table of contents with links to all <h1..6> sections in the # document. for position in positions: if position.heading and (position.open_close & 1): text = position.text if position.text else '' if position.id: ret += f" <li><a href=\"#{position.id}\">{text}</a>\n" else: ret += f" <li>{text}\n" ret += f" <ul>\n" ret += f" <li>page={position.page_num}\n" ret += f" <li>depth={position.depth}\n" ret += f" <li>heading={position.heading}\n" ret += f" <li>id={position.id!r}\n" ret += f" <li>href={position.href!r}\n" ret += f" <li>rect={position.rect}\n" ret += f" <li>text={text!r}\n" ret += f" <li>open_close={position.open_close}\n" ret += f" </ul>\n" ret += '</ul>\n' # Main content. ret += textwrap.dedent(f''' <h1>First section</h1> <p>Contents of first section. <ul> <li>External <a href="https://artifex.com/">link to https://artifex.com/</a>. <li><a href="#idtest">Link to IDTEST</a>. <li><a href="#nametest">Link to NAMETEST</a>. </ul> <h1>Second section</h1> <p>Contents of second section. <h2>Second section first subsection</h2> <p>Contents of second section first subsection. <p id="idtest">IDTEST <h1>Third section</h1> <p>Contents of third section. <p><a name="nametest">NAMETEST</a>. </body> ''') ret = ret.strip() with open(__file__.replace('.py', '.html'), 'w') as f: f.write(ret) return ret; out_path = __file__.replace('.py', '.pdf') document = pymupdf.Story.write_stabilized_with_links(contentfn, rectfn) document.save(out_path)
脚注
您对此页面有任何反馈吗?
此软件按原样提供,不附带任何明示或暗示的保证。此软件根据许可分发,未经授权不得复制、修改或分发。请参阅 artifex.com 获取许可信息或联系位于美国加利福尼亚州旧金山市 Mesa Street, Suite 108A 的 Artifex Software Inc. 了解更多信息。
此文档涵盖所有版本,直到 1.24.4。
### 如何使用 Story.write()
进行基本布局
此脚本将其自身的源代码多次布局到每页四个矩形中。
文件:
docs/samples/story-write.py
查看示例
""" Demo script for PyMuPDF's `Story.write()` method. This is a way of laying out a story into a PDF document, that avoids the need to write a loop that calls `story.place()` and `story.draw()`. Instead just a single function call is required, albeit with a `rectfn()` callback that returns the rectangles into which the story is placed. """ import html import pymupdf # Create html containing multiple copies of our own source code. # with open(__file__) as f: text = f.read() text = html.escape(text) html = f''' <!DOCTYPE html> <body> <h1>Contents of {__file__}</h1> <h2>Normal</h2> <pre> {text} </pre> <h2>Strong</h2> <strong> <pre> {text} </pre> </strong> <h2>Em</h2> <em> <pre> {text} </pre> </em> </body> ''' def rectfn(rect_num, filled): ''' We return four rectangles per page in this order: 1 3 2 4 ''' page_w = 800 page_h = 600 margin = 50 rect_w = (page_w - 3*margin) / 2 rect_h = (page_h - 3*margin) / 2 if rect_num % 4 == 0: # New page. mediabox = pymupdf.Rect(0, 0, page_w, page_h) else: mediabox = None # Return one of four rects in turn. rect_x = margin + (rect_w+margin) * ((rect_num // 2) % 2) rect_y = margin + (rect_h+margin) * (rect_num % 2) rect = pymupdf.Rect(rect_x, rect_y, rect_x + rect_w, rect_y + rect_h) #print(f'rectfn(): rect_num={rect_num} filled={filled}. Returning: rect={rect}') return mediabox, rect, None story = pymupdf.Story(html, em=8) out_path = __file__.replace('.py', '.pdf') writer = pymupdf.DocumentWriter(out_path) story.write(writer, rectfn) writer.close()
如何为目录创建迭代布局,并使用 Story.write_stabilized()
此脚本动态创建 HTML 内容,基于具有非零 .heading
值的 ElementPosition 项添加内容部分。
内容部分位于文档开头,因此对内容的修改可能会改变文档其余部分的页码,进而导致内容部分的页码不正确。
因此,脚本使用 Story.write_stabilized()
来重复布局,直到稳定为止。
文件:
docs/samples/story-write-stabilized.py
查看示例
""" Demo script for PyMuPDF's `pymupdf.Story.write_stabilized()`. `pymupdf.Story.write_stabilized()` is similar to `pymupdf.Story.write()`, except instead of taking a fixed html document, it does iterative layout of dynamically-generated html content (provided by a callback) to a `pymupdf.DocumentWriter`. For example this allows one to add a dynamically-generated table of contents section while ensuring that page numbers are patched up until stable. """ import textwrap import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' rect = pymupdf.Rect(10, 20, 290, 380) mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None def contentfn(positions): ''' Returns html content, with a table of contents derived from `positions`. ''' ret = '' ret += textwrap.dedent(''' <!DOCTYPE html> <body> <h2>Contents</h2> <ul> ''') # Create table of contents with links to all <h1..6> sections in the # document. for position in positions: if position.heading and (position.open_close & 1): text = position.text if position.text else '' if position.id: ret += f" <li><a href=\"#{position.id}\">{text}</a>\n" else: ret += f" <li>{text}\n" ret += f" <ul>\n" ret += f" <li>page={position.page_num}\n" ret += f" <li>depth={position.depth}\n" ret += f" <li>heading={position.heading}\n" ret += f" <li>id={position.id!r}\n" ret += f" <li>href={position.href!r}\n" ret += f" <li>rect={position.rect}\n" ret += f" <li>text={text!r}\n" ret += f" <li>open_close={position.open_close}\n" ret += f" </ul>\n" ret += '</ul>\n' # Main content. ret += textwrap.dedent(f''' <h1>First section</h1> <p>Contents of first section. <h1>Second section</h1> <p>Contents of second section. <h2>Second section first subsection</h2> <p>Contents of second section first subsection. <h1>Third section</h1> <p>Contents of third section. </body> ''') ret = ret.strip() with open(__file__.replace('.py', '.html'), 'w') as f: f.write(ret) return ret; out_path = __file__.replace('.py', '.pdf') writer = pymupdf.DocumentWriter(out_path) pymupdf.Story.write_stabilized(writer, contentfn, rectfn) writer.close()
如何使用迭代布局并使用 Story.write_stabilized_links()
创建 PDF 链接
此脚本类似于“如何使用Story.write_stabilized()
”中描述的脚本,但生成的 PDF 还包含与原始 HTML 中的内部链接相对应的链接。
这是通过使用Story.write_stabilized_links()
完成的;这与Story.write_stabilized()
略有不同:
- 它不需要 DocumentWriter
writer
参数。 - 它返回一个 PDF Document 实例。
其中原因有些复杂;例如[DocumentWriter 不一定是 PDF 编写器,因此在 PDF 特定的 API 中并不适用。]
文件:
docs/samples/story-write-stabilized-links.py
查看示例
""" Demo script for PyMuPDF's `pymupdf.Story.write_stabilized_with_links()`. `pymupdf.Story.write_stabilized_links()` is similar to `pymupdf.Story.write_stabilized()` except that it creates a PDF `pymupdf.Document` that contains PDF links generated from all internal links in the original html. """ import textwrap import pymupdf def rectfn(rect_num, filled): ''' We return one rect per page. ''' rect = pymupdf.Rect(10, 20, 290, 380) mediabox = pymupdf.Rect(0, 0, 300, 400) #print(f'rectfn(): rect_num={rect_num} filled={filled}') return mediabox, rect, None def contentfn(positions): ''' Returns html content, with a table of contents derived from `positions`. ''' ret = '' ret += textwrap.dedent(''' <!DOCTYPE html> <body> <h2>Contents</h2> <ul> ''') # Create table of contents with links to all <h1..6> sections in the # document. for position in positions: if position.heading and (position.open_close & 1): text = position.text if position.text else '' if position.id: ret += f" <li><a href=\"#{position.id}\">{text}</a>\n" else: ret += f" <li>{text}\n" ret += f" <ul>\n" ret += f" <li>page={position.page_num}\n" ret += f" <li>depth={position.depth}\n" ret += f" <li>heading={position.heading}\n" ret += f" <li>id={position.id!r}\n" ret += f" <li>href={position.href!r}\n" ret += f" <li>rect={position.rect}\n" ret += f" <li>text={text!r}\n" ret += f" <li>open_close={position.open_close}\n" ret += f" </ul>\n" ret += '</ul>\n' # Main content. ret += textwrap.dedent(f''' <h1>First section</h1> <p>Contents of first section. <ul> <li>External <a href="https://artifex.com/">link to https://artifex.com/</a>. <li><a href="#idtest">Link to IDTEST</a>. <li><a href="#nametest">Link to NAMETEST</a>. </ul> <h1>Second section</h1> <p>Contents of second section. <h2>Second section first subsection</h2> <p>Contents of second section first subsection. <p id="idtest">IDTEST <h1>Third section</h1> <p>Contents of third section. <p><a name="nametest">NAMETEST</a>. </body> ''') ret = ret.strip() with open(__file__.replace('.py', '.html'), 'w') as f: f.write(ret) return ret; out_path = __file__.replace('.py', '.pdf') document = pymupdf.Story.write_stabilized_with_links(contentfn, rectfn) document.save(out_path)
脚注
对本页面有任何反馈吗?
该软件按原样提供,不提供任何明示或暗示的担保。该软件在许可下分发,未经许可明确授权,不得复制、修改或分发。有关详细信息,请参阅artifex.com,或联系美国旧金山 CA 94129 Mesa 街 39 号 108A 套房的 Artifex Software Inc.。
此文档涵盖所有版本直到 1.24.4。