编辑
pdf转txt
小工具
package utilw; import java.io.File; import java.io.FileOutputStream; import java.io.PrintWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; public class PdfUtil { public static void main(String[] args) { PdfUtil pdfUtil = new PdfUtil(); pdfUtil.pdfToTxt(); } /** @param pdf转txt */ public void pdfToTxt() { try { // 是否排序 boolean sort = false; // 开始提取页数 int startPage = 1; // 结束提取页数 int endPage = Integer.MAX_VALUE; String content = null; PrintWriter writer = null; // pdf文本路径 String path = "E:/数据文件/2019_PDF.pdf"; // 输出txt文本路径 String target = "E:/数据文件/2019_PDF.txt"; PDDocument document = PDDocument.load(new File(path)); PDFTextStripper pts = new PDFTextStripper(); endPage = document.getNumberOfPages(); System.out.println("Total Page: " + endPage); pts.setStartPage(startPage); pts.setEndPage(endPage); try { // content就是从pdf中解析出来的文本 content = pts.getText(document); writer = new PrintWriter(new FileOutputStream(target)); writer.write(content);// 写入文件内容 writer.flush(); writer.close(); } catch (Exception e) { throw e; } finally { if (null != document) document.close(); } System.out.println("Get PDF Content ..."); } catch (Exception e) { e.printStackTrace(); } } }