开发者社区> 问答> 正文

html2pdf导出,windows下正常,linux下乱吗

本来直接用的jsoup,换了linux后乱码了,最后发现linux下读取个文件都乱码
linux下网页内容字节流保存本地xml文件正常没有乱码,然后读取文件就乱码了,
各位大神这啥原因啊,代码里编码都对应的,windows下都正常的,换linux就乱码了

public String convert2PDF() {
PdfContentByte content = null;
BaseFont base = null;
Rectangle pageRect = null;
String pdfPath = context
.getRealPath("/pdfIn/"
+ (new SimpleDateFormat("yyyyMMddHHmmssSSS")
.format(new Date()) + ".pdf"));
String outPath = context
.getRealPath("/pdfOut/"
+ (new SimpleDateFormat("yyyyMMddHHmmssSSS")
.format(new Date()) + ".pdf"));
    String fontPath = context.getRealPath("/font/msyh.ttf");
    String contextPath = context.getContextPath();
    //
    FileOutputStream fos;
    InputStream is;
    try {
        jsp = jsp == null ? "" : jsp;
        //
        URL url = new URL(jsp);
        byte bytes[] = new byte[1024 * 1000];

        String tmpXml = context.getRealPath("/tmpXml/"
                + (new SimpleDateFormat("yyyyMMddHHmmssSSS")
                        .format(new Date()) + ".html"));
        File xml = new File(tmpXml);
        if (!xml.getParentFile().exists())
            xml.getParentFile().mkdirs();
        if (!xml.exists())
            xml.createNewFile();

        int index = 0;
        is = url.openStream();
        int count = is.read(bytes, index, 1024 * 100);
        while (count != -1) {
            index += count;
            count = is.read(bytes, index, 1);
        }
        fos = new FileOutputStream(xml);
        System.out.println(index);
        fos.write(bytes, 0, index);
        // is.close();
        fos.close();
        FileInputStream fis = new FileInputStream(xml);
        InputStreamReader isr = new InputStreamReader(fis, "utf-8");
        BufferedReader br = new BufferedReader(isr);
        StringBuffer sb = new StringBuffer();
        String line = "";
        while ((line = br.readLine()) != null) {
            sb.append(line);
        }
        br.close();
        System.err.println(sb.toString());
        //TODO 读取本地文件乱码问题 
        org.jsoup.nodes.Document doc1 = Jsoup.parse(sb.toString());
        // org.jsoup.nodes.Document doc2 = Jsoup.parse(xml, "GBK");
        System.out.println(doc1.toString());
        // System.out.println(doc2.toString());
        File tmp = new File(pdfPath);
        if (!tmp.getParentFile().exists())
            tmp.getParentFile().mkdirs();
        // System.out.println("-- created -in===" + tmp.getPath());
        Document document = new Document();
        PdfWriter writer = PdfWriter.getInstance(document,
                new FileOutputStream(tmp));
        document.open();
        // Connection conn = Jsoup.connect(jsp);
        // conn.header(
        // "User-Agent",
        // "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36");
        // org.jsoup.nodes.Document doc = conn.timeout(5000).get();
        // doc1.select("div#getpdf").remove();
        InputStream in = new ByteArrayInputStream(doc1.toString().getBytes(
                "utf-8"));
        // System.out
        // .println("-- FileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStream");
        XMLWorkerHelper.getInstance().parseXHtml(writer, document, in,
                Charset.forName("utf-8"));
        // System.out
        // .println("-- FileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStream");
        document.close();
        File out = new File(outPath);
        if (!out.getParentFile().exists())
            out.getParentFile().mkdirs();
        if (!out.exists())
            out.createNewFile();
        System.out.println("-- created -out===" + out.getPath());
        PdfReader pdfReader = new PdfReader(tmp.getPath());
        PdfStamper pdfStamper = new PdfStamper(pdfReader,
                new FileOutputStream(out));
        //
        PdfGState gs = new PdfGState();
        base = BaseFont.createFont("STSong-Light",   "UniGB-UCS2-H",   BaseFont.NOT_EMBEDDED);
// base = BaseFont.createFont(fontPath, BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
System.out.println("-- -fontPath===" + fontPath);
if (base == null || pdfStamper == null) {
msg = "文件生成失败!";
ActionContext.getContext().put("msg", msg);
path = "error";
}
// 设置透明度为0.4
gs.setFillOpacity(0.4f);
gs.setStrokeOpacity(0.4f);
int toPage = pdfStamper.getReader().getNumberOfPages();
for (int i = 1; i <= toPage; i++) {
pageRect = pdfStamper.getReader().getPageSizeWithRotation(i);
// 计算水印X,Y坐标
float x = pageRect.getWidth() / 2;
float y = pageRect.getHeight() / 2;
// 获得PDF最顶层
content = pdfStamper.getOverContent(i);
content.saveState();
// set Transparency
content.setGState(gs);
content.beginText();
content.setColorFill(BaseColor.GRAY);
content.setFontAndSize(base, 60);
// 水印文字成45度角倾斜
content.showTextAligned(Element.ALIGN_CENTER, "eeeee", x, y, 45);
content.endText();
}
//
pdfStamper.close();
// tmp.delete();
// path = jsp.split(contextPath)[0] + contextPath+"/"+
// out.getPath().replace("\",
// "/").split(contextPath)[1].split("/")[1]+"/"+out.getPath().replace("\",
// "/").split(contextPath)[1].split("/")[2];
path = out.getPath().replace("\", "/").split("pdfOut")[0]
+ "pdfOut/$"
+ out.getPath().replace("\", "/").split("pdfOut")[1]
.split("/")[1];
System.out.println("-- created -pdf path===" + path);
} catch (Exception ex) {
ex.printStackTrace();
msg = "文件生成异常!";
ActionContext.getContext().put("msg", msg);
path = "error";
} finally {
content = null;
base = null;
pageRect = null;
}
return SUCCESS;
}

展开
收起
杨冬芳 2016-07-18 13:22:55 3251 0
1 条回答
写回答
取消 提交回答
  • IT从业

    在linux下和win下面的文件格式不一样的,你读写文件的方式可能有问题

    2019-07-17 19:57:43
    赞同 展开评论 打赏
问答排行榜
最热
最新

相关电子书

更多
Alibaba Cloud Linux 3 发布 立即下载
ECS系统指南之Linux系统诊断 立即下载
ECS运维指南 之 Linux系统诊断 立即下载