Apache PDFBox 库是一个开源、用于操作 PDF 文档的 Java 工具库。PDFBox 允许创建新的 PDF 文档、操作现有文档,以及从文档中提取内容。
获取 Apache PDFBox 下载地址,目前版本 2.0.25,核心 jar 是 pdfbox-2.0.25.jar,其他还有几个 jar 可以根据需要进行导入。
这里 jar包下载与源码学习可以下载全部的 jar 和一些命令行工具,下载 pdfbox-2.0.25-src.zip 源码可以看到文件夹 examples 里面有不少例子进行了示范。
如果帮助到你了麻烦点个赞或收藏哦,会不断更新的
pdf合并
package pdfUtity; import org.apache.pdfbox.multipdf.PDFMergerUtility; import java.io.File; import java.io.IOException; import java.util.Scanner; //import org.apache.pdfbox.util.PDFMergerUtility; public class pdfMerge { private static String[] getPdfs(String fileAddress) throws IOException { File file = new File(fileAddress); String[] pdfs; if (file.isDirectory()) { pdfs = file.list(); return pdfs; } else { throw new IOException("输入的路径有问题"); } } public static void main(String[] args) throws Exception { Scanner in = new Scanner(System.in); PDFMergerUtility mergePdf = new PDFMergerUtility(); System.out.println("请输入要合并的PDF文件所在的文件夹路径"); String fileAddress = in.nextLine(); System.out.println("你输入的路径是:" + fileAddress); String destinationFileName = "javaweb2020.pdf"; String[] pdfs = getPdfs(fileAddress); for (int i = 0; i < pdfs.length; i++) mergePdf.addSource(fileAddress + File.separator + pdfs[i]); mergePdf.setDestinationFileName(destinationFileName); System.out.println("合并比较费时间,请等待个几分钟吧!"); mergePdf.mergeDocuments(); System.out.print("合并完成"); } }
pdf拆分为单页
package pdfUtity; import org.apache.pdfbox.multipdf.Splitter; import org.apache.pdfbox.pdmodel.PDDocument; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Iterator; public class pdfSplitter { public static void main(String[] args)throws IOException { //把需要拆分的pdf文件加载进来 File file = new File("d://desktopfile//pdfs//1.pdf"); PDDocument document = PDDocument.load(file); //创建一个拆分器对象 Splitter splitter = new Splitter(); //list中存放好被拆分的pdf对象 其中内容是pdf的每一页 List<PDDocument>Pages = splitter.split(document); //创建迭代器对象 Iterator<PDDocument>iterator = Pages.listIterator(); //saving splits as individual PDF document int i = 1; while(iterator.hasNext()) { PDDocument pd = iterator.next(); pd.save("d://desktopfile//pdfPhotos//"+i++ +".pdf"); } System.out.println("pdf拆分成功"); document.close(); } }
pdf转换为图片
package pdfUtity;/* * 读取 pdf,将其中的某一页另存为 png 图片 */ import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.tools.imageio.ImageIOUtil; public class PDFSavePNG { public static void main(String[] args) { try { // 打开来源 pdf PDDocument pdfDocument = PDDocument.load(new File("d://desktopfile//pdfs//1.pdf")); PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument); for(int pageNumber=0;pageNumber<pdfDocument.getNumberOfPages();pageNumber++){ // 提取的页码 // int pageNumber = 0; // 以300 dpi 读取存入 BufferedImage 对象 int dpi = 300; BufferedImage buffImage = pdfRenderer.renderImageWithDPI(pageNumber, dpi, ImageType.RGB); // 将 BufferedImage 写入到 png ImageIOUtil.writeImage(buffImage, "d://desktopfile//pdfPhotos//"+pageNumber+".png", dpi); // 关闭文档 } pdfDocument.close(); } catch (InvalidPasswordException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
图片转换为pdf
package pdfUtity; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; /** * @author: Serendipity * Date: 2022/3/16 22:40 * Description: */ public class photoToPDF { /** * 多图片合成pdf的限制后缀 */ private static final List IMAGE_SUFFIX = Arrays.asList("jpg", "png", "jpeg"); /** * 多个图片合成一个pdf * * @param imgFolder 多图片的文件夹路径 例如:"D:\\image\\" * @param target 合并的图片路径 "D:\\image\\merge.pdf" * @throws IOException */ public static void manyImageToOnePdf(String imgFolder, String target) throws IOException { PDDocument doc = new PDDocument(); //创建一个空的pdf文件 doc.save(target); PDPage page; PDImageXObject pdImage; PDPageContentStream contents; BufferedImage bufferedImage; String fileName; float w, h; String suffix; File tempFile; int index; File folder = new File(imgFolder); for (int i = 0; i < folder.listFiles().length; i++) { tempFile = folder.listFiles()[i]; if (!tempFile.isFile()) { continue; } fileName = tempFile.getName(); index = fileName.lastIndexOf("."); if (index == -1) { continue; } //获取文件的后缀 suffix = fileName.substring(index + 1); //如果文件后缀不是图片格式,跳过当前循环 if (!IMAGE_SUFFIX.contains(suffix)) { continue; } bufferedImage = ImageIO.read(folder.listFiles()[i]); //Retrieving the page pdImage = LosslessFactory.createFromImage(doc, bufferedImage); w = pdImage.getWidth(); h = pdImage.getHeight(); page = new PDPage(new PDRectangle(w, h)); contents = new PDPageContentStream(doc, page); contents.drawImage(pdImage, 0, 0, w, h); System.out.println("Image inserted"); contents.close(); doc.addPage(page); } //保存pdf doc.save(target); //关闭pdf doc.close(); } public static void main(String[] args) { try { manyImageToOnePdf("d://图片","photo.pdf"); } catch (IOException e) { e.printStackTrace(); } } }