- 添加jar
<dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-compress</artifactId> <version>1.19</version> </dependency> <dependency> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> <version>3.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.2</version> </dependency> <!-- 针对2007以上版本的库 --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency> <!-- 针对2003版本的库 --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.2</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId> <version>2.0.3</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.core</artifactId> <version>2.0.3</version> </dependency> <!-- jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency> <!-- hutool--> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>5.0.2</version> </dependency>
2. 代码生成
package com.gccx.core.util; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import java.util.HashMap; import java.util.Map; public class JsoupUtils { private static Map<String, String> getHtmlCss(String html) { org.jsoup.nodes.Document doc = Jsoup.parse(html); String[] styles = doc.head().select("style").html().split("\r\n"); Map<String, String> css = new HashMap<>(); for (String style : styles) { String[] kv = style.split("\\{|\\}"); css.put(kv[0], kv[1]); } return css; } public static String changeHtmlCssLineStyle(String html) { Map<String, String> css = getHtmlCss(html); org.jsoup.nodes.Document doc = Jsoup.parse(html); Element body = doc.body(); for (String key : css.keySet()) { body.select(key).attr("style", css.get(key)).outerHtml(); } return body.html(); } }
package com.gccx.core.util; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.URLUtil; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.FileItemFactory; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.PictureType; import org.apache.poi.xwpf.usermodel.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.commons.CommonsMultipartFile; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; public class WordToHtmlUtil { /** * logger */ private static final Logger logger = LoggerFactory.getLogger(WordToHtmlUtil.class); /** * 解析docx成html * * @param file * @return * @throws IOException */ public static String Word2007ToHtml(MultipartFile file) throws IOException { if (file.isEmpty() || file.getSize() <= 0) { logger.error("Sorry File does not Exists!"); return null; } else { if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) { // 1) 加载word文档生成 XWPFDocument对象 InputStream in = file.getInputStream(); XWPFDocument document = new XWPFDocument(in); // 也可以使用字符数组流获取解析的内容 ByteArrayOutputStream baos = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, baos, null); String content = baos.toString(); baos.close(); return content; } else { logger.error("Enter only MS Office 2007+ files"); return null; } } } /** * 解析doc文章成html 不存图片 * * @param file * @return * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public static String Word2003ToHtml(MultipartFile file) throws IOException, ParserConfigurationException, TransformerException { if (file.isEmpty() || file.getSize() <= 0) { logger.error("Sorry File does not Exists!"); return null; } else { if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) { InputStream input = file.getInputStream(); HWPFDocument wordDocument = new HWPFDocument(input); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); // 解析word文档 wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); // 也可以使用字符数组流获取解析的内容 ByteArrayOutputStream baos = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(baos); TransformerFactory factory = TransformerFactory.newInstance(); Transformer serializer = factory.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); // 也可以使用字符数组流获取解析的内容 String content = new String(baos.toByteArray()); baos.close(); return content; } else { logger.error("Enter only MS Office 2003 files"); return null; } } } /** * 解析doc成html 并保存图片文件到本地 * * @param file * @return * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public static String Word2003ToHtmlAndSaveImage(String docsTempImages, MultipartFile file) throws IOException, ParserConfigurationException, TransformerException { if (file.isEmpty() || file.getSize() <= 0) { logger.error("Sorry File does not Exists!"); return null; } else { if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) { HWPFDocument wordDocument = new HWPFDocument(file.getInputStream()); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); //设置图片存放的位置 wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { File imgPath = new File(docsTempImages); if (!imgPath.exists()) {//图片目录不存在则创建 imgPath.mkdirs(); } File file = new File(docsTempImages + suggestedName); try { OutputStream os = new FileOutputStream(file); os.write(content); os.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return docsTempImages + suggestedName; } }); //解析word文档 wordToHtmlConverter.processDocument(wordDocument); Document document = wordToHtmlConverter.getDocument(); // 也可以使用字符数组流获取解析的内容 ByteArrayOutputStream baos = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(document); StreamResult streamResult = new StreamResult(baos); TransformerFactory factory = TransformerFactory.newInstance(); Transformer serializer = factory.newTransformer(); // serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.ENCODING, "gb2312"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); baos.close(); // 也可以使用字符数组流获取解析的内容 return new String(baos.toByteArray()); } else { logger.error("Enter only MS Office 2003 files"); return null; } } } /** * 获取word中的图片名称和本地url(doc或docx) * 返回map<图片名称, 存储的图片url地址> * * @param uploadPath 图片存放路径 * @param docsTempImages 本地临时图片存放地址(这个工具类Word2003ToHtmlAndSaveImage的方法存到了系统临时文件夹里) * @param file * @return * @throws IOException */ public static Map<String, String> getImageMaps(String uploadPath, String docsTempImages, MultipartFile file) throws IOException { //返回map HashMap<String, String> map = new HashMap<>(); if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) { //获取存在word里的图片文件 InputStream in = file.getInputStream(); XWPFDocument document = new XWPFDocument(in); List<XWPFParagraph> paragraphs = document.getParagraphs(); if (CollUtil.isNotEmpty(paragraphs)) { paragraphs.forEach(p -> { List<XWPFRun> runs = p.getRuns(); if (CollUtil.isNotEmpty(runs)) { runs.forEach(r -> { List<XWPFPicture> pictures = r.getEmbeddedPictures(); if (CollUtil.isNotEmpty(pictures)) { pictures.forEach(c -> { //这里找到word中的图片的名字和数据 XWPFPictureData pictureData = c.getPictureData(); String fileName = pictureData.getFileName(); byte[] data = pictureData.getData(); //保存到本地获取url String localUrl = saveImageToLocalWithByte(fileName, data, uploadPath); map.put(pictureData.getFileName(), localUrl); }); } }); } }); } } else if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) { try { File dir = new File(docsTempImages); //如果目录不为空遍历存储到项目中 if (!FileUtil.isEmpty(dir)) { Arrays.asList(FileUtil.ls(docsTempImages)).forEach(f -> { String name = f.getName(); BufferedInputStream inputStream = FileUtil.getInputStream(f); String localUrl = saveImageToLocalWithStream(name, inputStream, uploadPath); map.put(name, localUrl); }); } } finally { //删除临时文件夹 FileUtil.del(docsTempImages); } } return map; } /** * 保存图片到项目中,返回路径(byte[]) * * @param name 图片名字 * @param data 图片字节数组 * @param uploadPath 存储路径 * @return */ private static String saveImageToLocalWithByte(String name, byte[] data, String uploadPath) { FileUtil.writeBytes(data, uploadPath + name); //自己项目的ip和端口,html图片地址要用,或者根据自己需求指定存到什么地方,自定义 String ipAndPort = ""; return URLUtil.normalize(ipAndPort + name); } /** * 保存图片到项目中,返回路径(inputStream) * * @param name 图片名字 * @param inputStream 输入流 * @param uploadPath 存储路径 * @return */ private static String saveImageToLocalWithStream(String name, InputStream inputStream, String uploadPath) { savePic(uploadPath, inputStream, name); //自己项目的ip和端口,html图片地址要用,或者根据自己需求指定存到什么地方,自定义 String ipAndPort = ""; return URLUtil.normalize(ipAndPort + name); } /** * 保存图片 * * @param path 存储路径 * @param inputStream 输入流 * @param fileName 文件名称 */ private static void savePic(String path, InputStream inputStream, String fileName) { OutputStream os = null; try { // 2、保存到临时文件 // 1K的数据缓冲 byte[] bs = new byte[1024]; // 读取到的数据长度 int len; // 输出的文件流保存到本地文件 File tempFile = new File(path); if (!tempFile.exists()) { tempFile.mkdirs(); } os = new FileOutputStream(tempFile.getPath() + File.separator + fileName); // 开始读取 while ((len = inputStream.read(bs)) != -1) { os.write(bs, 0, len); } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } finally { // 完毕,关闭所有链接 try { os.close(); inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } static FileItem createFileItem(String filePath) { FileItemFactory factory = new DiskFileItemFactory(16, null); String textFieldName = "textField"; int num = filePath.lastIndexOf("."); String extFile = filePath.substring(num); String path = filePath.substring(0, num); path = path.replace("\\", "/"); String[] fileNames = path.split("/"); String fileName = fileNames[fileNames.length - 1]; FileItem item = factory.createItem(textFieldName, "text/plain", true, fileName + extFile); File newfile = new File(filePath); int bytesRead = 0; byte[] buffer = new byte[8192]; try { FileInputStream fis = new FileInputStream(newfile); OutputStream os = item.getOutputStream(); while ((bytesRead = fis.read(buffer, 0, 8192)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fis.close(); } catch (IOException e) { e.printStackTrace(); } return item; } public static void main(String[] args) throws Exception { String path1 = "/Users/name/Downloads/全款合同.doc"; String path2 = "/Users/name/Downloads/买卖合同.docx"; MultipartFile file1 = new CommonsMultipartFile(createFileItem(path1)); MultipartFile file2 = new CommonsMultipartFile(createFileItem(path2)); System.out.println(WordToHtmlUtil.Word2003ToHtml(file1)); // 生成2007 WordToHtmlUtil.Word2007ToHtml(file2); } }
- 页面调整
字体:font-family:SimSun; font-family:Times New Roman 表格宽度:.t1{width:100%;} 表格td样式 去掉:width:1.1131945in; body: style="width: 72%;margin: 0 auto;line-height: 150%;"