本来直接用的jsoup,换了linux后乱码了,最后发现linux下读取个文件都乱码
linux下网页内容字节流保存本地xml文件正常没有乱码,然后读取文件就乱码了,
各位大神这啥原因啊,代码里编码都对应的,windows下都正常的,换linux就乱码了
public String convert2PDF() {
PdfContentByte content = null;
BaseFont base = null;
Rectangle pageRect = null;
String pdfPath = context
.getRealPath("/pdfIn/"
+ (new SimpleDateFormat("yyyyMMddHHmmssSSS")
.format(new Date()) + ".pdf"));
String outPath = context
.getRealPath("/pdfOut/"
+ (new SimpleDateFormat("yyyyMMddHHmmssSSS")
.format(new Date()) + ".pdf"));
String fontPath = context.getRealPath("/font/msyh.ttf");
String contextPath = context.getContextPath();
//
FileOutputStream fos;
InputStream is;
try {
jsp = jsp == null ? "" : jsp;
//
URL url = new URL(jsp);
byte bytes[] = new byte[1024 * 1000];
String tmpXml = context.getRealPath("/tmpXml/"
+ (new SimpleDateFormat("yyyyMMddHHmmssSSS")
.format(new Date()) + ".html"));
File xml = new File(tmpXml);
if (!xml.getParentFile().exists())
xml.getParentFile().mkdirs();
if (!xml.exists())
xml.createNewFile();
int index = 0;
is = url.openStream();
int count = is.read(bytes, index, 1024 * 100);
while (count != -1) {
index += count;
count = is.read(bytes, index, 1);
}
fos = new FileOutputStream(xml);
System.out.println(index);
fos.write(bytes, 0, index);
// is.close();
fos.close();
FileInputStream fis = new FileInputStream(xml);
InputStreamReader isr = new InputStreamReader(fis, "utf-8");
BufferedReader br = new BufferedReader(isr);
StringBuffer sb = new StringBuffer();
String line = "";
while ((line = br.readLine()) != null) {
sb.append(line);
}
br.close();
System.err.println(sb.toString());
//TODO 读取本地文件乱码问题
org.jsoup.nodes.Document doc1 = Jsoup.parse(sb.toString());
// org.jsoup.nodes.Document doc2 = Jsoup.parse(xml, "GBK");
System.out.println(doc1.toString());
// System.out.println(doc2.toString());
File tmp = new File(pdfPath);
if (!tmp.getParentFile().exists())
tmp.getParentFile().mkdirs();
// System.out.println("-- created -in===" + tmp.getPath());
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document,
new FileOutputStream(tmp));
document.open();
// Connection conn = Jsoup.connect(jsp);
// conn.header(
// "User-Agent",
// "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36");
// org.jsoup.nodes.Document doc = conn.timeout(5000).get();
// doc1.select("div#getpdf").remove();
InputStream in = new ByteArrayInputStream(doc1.toString().getBytes(
"utf-8"));
// System.out
// .println("-- FileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStream");
XMLWorkerHelper.getInstance().parseXHtml(writer, document, in,
Charset.forName("utf-8"));
// System.out
// .println("-- FileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStreamFileInputStream");
document.close();
File out = new File(outPath);
if (!out.getParentFile().exists())
out.getParentFile().mkdirs();
if (!out.exists())
out.createNewFile();
System.out.println("-- created -out===" + out.getPath());
PdfReader pdfReader = new PdfReader(tmp.getPath());
PdfStamper pdfStamper = new PdfStamper(pdfReader,
new FileOutputStream(out));
//
PdfGState gs = new PdfGState();
base = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
// base = BaseFont.createFont(fontPath, BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
System.out.println("-- -fontPath===" + fontPath);
if (base == null || pdfStamper == null) {
msg = "文件生成失败!";
ActionContext.getContext().put("msg", msg);
path = "error";
}
// 设置透明度为0.4
gs.setFillOpacity(0.4f);
gs.setStrokeOpacity(0.4f);
int toPage = pdfStamper.getReader().getNumberOfPages();
for (int i = 1; i <= toPage; i++) {
pageRect = pdfStamper.getReader().getPageSizeWithRotation(i);
// 计算水印X,Y坐标
float x = pageRect.getWidth() / 2;
float y = pageRect.getHeight() / 2;
// 获得PDF最顶层
content = pdfStamper.getOverContent(i);
content.saveState();
// set Transparency
content.setGState(gs);
content.beginText();
content.setColorFill(BaseColor.GRAY);
content.setFontAndSize(base, 60);
// 水印文字成45度角倾斜
content.showTextAligned(Element.ALIGN_CENTER, "eeeee", x, y, 45);
content.endText();
}
//
pdfStamper.close();
// tmp.delete();
// path = jsp.split(contextPath)[0] + contextPath+"/"+
// out.getPath().replace("\",
// "/").split(contextPath)[1].split("/")[1]+"/"+out.getPath().replace("\",
// "/").split(contextPath)[1].split("/")[2];
path = out.getPath().replace("\", "/").split("pdfOut")[0]
+ "pdfOut/$"
+ out.getPath().replace("\", "/").split("pdfOut")[1]
.split("/")[1];
System.out.println("-- created -pdf path===" + path);
} catch (Exception ex) {
ex.printStackTrace();
msg = "文件生成异常!";
ActionContext.getContext().put("msg", msg);
path = "error";
} finally {
content = null;
base = null;
pageRect = null;
}
return SUCCESS;
}
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。