【java】RTF转HTML或者TEXT

简介: 【java】RTF转HTML或者TEXT

公司需要处理旧的数据,所以有了RTF转HTML和test的需求解决方案如下,暂不支持表格和图片的转换

RTF转HTML

工具类

package com.croot.rims.utils;


import io.netty.util.internal.StringUtil;
import org.springframework.stereotype.Component;

import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;

@Component
public class RtfToTextExtUtil {

    String text;
    DefaultStyledDocument dsd;
    RTFEditorKit rtf;

    public String readRtf(String str) {
        if (StringUtil.isNullOrEmpty(str))
            return "";

        if (str.length() < 10)
            return str;

        if (!str.substring(0, 8).contains("{\\rtf"))
            return str;

        rtf = new RTFEditorKit();
        dsd = new DefaultStyledDocument();
        try {
            rtf.read(str2InputStream(str), dsd, 0);
            String encode = getEncoding(dsd.getText(0, dsd.getLength()));

            if (encode.equals("UTF-8")) {
                text = new String(dsd.getText(0, dsd.getLength()).getBytes(encode), "UTF-8");//"GBK","UTF-8","GB2312","ISO-8859-1","BIG5"
                //System.out.println(text.getBytes().length);
                text = new String(text.getBytes("ISO-8859-1"), "GBK");
            } else {
                text = new String(dsd.getText(0, dsd.getLength()).getBytes(encode), "GBK");//"GBK","UTF-8","GB2312","ISO-8859-1","BIG5"
            }
            return text;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "";

    }


    /**
     * 将str转换为inputStream
     *
     * @param str
     * @return
     */
    public static InputStream str2InputStream(String str) {
        ByteArrayInputStream is = new ByteArrayInputStream(str.getBytes());
        return is;
    }


    public void writeRtf(File out) {
        try {
            rtf.write(new FileOutputStream(out), dsd, 0, dsd.getLength());
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (BadLocationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public String getEncoding(String str) {
        String encode = "GB2312";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GB2312
                String s = encode;
                return s; //是的话,返回“GB2312“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "ISO-8859-1";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是ISO-8859-1
                String s1 = encode;
                return s1;
            }
        } catch (Exception exception) {

        }

        encode = "GBK";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GBK
                String s = encode;
                return s; //是的话,返回“GBK“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "ASCII";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是ASCII
                String s = encode;
                return s; //是的话,返回“ASCII“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "UTF-8";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是UTF-8
                String s = encode;
                return s; //是的话,返回“UTF-8“,以下代码同理
            }
        } catch (Exception exception) {
        }

        return "";
    }


    /**
     * 去除富文本内容的html标签
     * @param content
     * @return
     */
    public static String stripHtml(String content) {
        // <p>段落替换为换行
        content = content.replaceAll("<p .*?>", "\r\n");
        // <br><br/>替换为换行
        content = content.replaceAll("<br\\s*/?>", "\r\n");
        // 去掉其它的<>之间的东西
        content = content.replaceAll("\\<.*?>", "");
        return content;
    }

}

测试类

1.转数据库的数据

    public static void main(String[] args) throws UnsupportedEncodingException {
        String a="{\\rtf1\\ansi\\ansicpg936\\deff0{\\fonttbl{\\f0\\fmodern\\fprq6\\fcharset134 \\'cb\\'ce\\'cc\\'e5;}{\\f1\\fnil\\fcharset0 MS Sans Serif;}}\n" +
                "\\viewkind4\\uc1\\pard\\lang2052\\f0\\fs18\\'ce\\'ca\\'cc\\'e2\\'c3\\'e8\\'ca\\'f6*\n" +
                "\\par     ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b2\\'bb\\'c4\\'dc\\'ca\\'b9\\'d3\\'c3F5\\'b2\\'e9\\'d1\\'af\\'a1\\'a1\n" +
                "\\par \\'ce\\'ca\\'cc\\'e2\\'d3\\'b0\\'cf\\'ec*\n" +
                "\\par     ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b2\\'bb\\'c4\\'dc\\'ca\\'b9\\'d3\\'c3F5\\'b2\\'e9\\'d1\\'af\n" +
                "\\par \\'cf\\'d6\\'b3\\'a1\\'c1\\'d9\\'ca\\'b1\\'bd\\'e2\\'be\\'f6\\'b7\\'bd\\'b0\\'b8*\n" +
                "\\par     \\'d4\\'f6\\'bc\\'d3ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b5\\'c7\\'c2\\'bd\\'c8\\'a8\\'cf\\'de\\f1\\fs17 \n" +
                "\\par }\n";

        //调用RTF2HTML
        String rlt = new RTF2HTML().convertRTFToHTML(a);
        //解决中文乱码问题
        String context = new String(rlt.getBytes("ISO8859_1"), "GBK");
        System.out.println(context);
    }
 

2.转文件

public class Test {
 
    public static void main(String[] args) {
        convertRTFToHTML("F:\\rtf", "F:\\html");
    }
 
    /**
     * RTF转HTML
     * @param sourceFile 源文件夹(保存需要转换的rft文件)
     * @param targetFile 目标文件夹(保存转换后的html文件)
     */
    public static void convertRTFToHTML(String sourceFile, String targetFile) {
        List<String> fileUrls = getFiles(sourceFile);
        for (String fileUrl : fileUrls) {
            try {
                //调用RTF2HTML
                String rlt = new RTF2HTML().convertRTFToHTML(new File(fileUrl));
                if (StringUtils.isBlank(rlt)) {
                    break;
                }
                //解决中文乱码问题
                String context = new String(rlt.getBytes("ISO8859_1"), "GBK");
                FileWriter fw;
                String[] fileSplit = fileUrl.split("\\\\");
                //拼接html文件名称
                String targetFileName = targetFile + "\\\\" + fileSplit[fileSplit.length - 1].split("\\.")[0] + ".html";
                File f = new File(targetFileName);
                try {
                    if(!f.exists()){
                        f.createNewFile();
                    }
                    fw = new FileWriter(f);
                    BufferedWriter out = new BufferedWriter(fw);
                    //将转换好的文件写入
                    out.write(context, 0, context.length()-1);
                    out.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                FileWriter w = new FileWriter(targetFile);
                w.write(context);
                w.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
 
    /**
     * 获取某个目录下所有直接下级文件,不包括目录下的子目录的下的文件
     */
    public static List<String> getFiles(String path) {
        List<String> files = new ArrayList<>();
        File file = new File(path);
        File[] tempList = file.listFiles();
 
        for (int i = 0; i < tempList.length; i++) {
            if (tempList[i].isFile()) {
                files.add(tempList[i].toString());
            }
        }
        return files;
    }
}

RTF转Text

工具类

@Component
public class RtfToTextExtUtil {

    String text;
    DefaultStyledDocument dsd;
    RTFEditorKit rtf;

    public String readRtf(String str) {
        if (StringUtil.isNullOrEmpty(str))
            return "";

        if (str.length() < 10)
            return str;

        if (!str.substring(0, 8).contains("{\\rtf"))
            return str;

        rtf = new RTFEditorKit();
        dsd = new DefaultStyledDocument();
        try {
            rtf.read(str2InputStream(str), dsd, 0);
            String encode = getEncoding(dsd.getText(0, dsd.getLength()));

            if (encode.equals("UTF-8")) {
                text = new String(dsd.getText(0, dsd.getLength()).getBytes(encode), "UTF-8");//"GBK","UTF-8","GB2312","ISO-8859-1","BIG5"
                //System.out.println(text.getBytes().length);
                text = new String(text.getBytes("ISO-8859-1"), "GBK");
            } else {
                text = new String(dsd.getText(0, dsd.getLength()).getBytes(encode), "GBK");//"GBK","UTF-8","GB2312","ISO-8859-1","BIG5"
            }
            return text;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "";

    }


    /**
     * 将str转换为inputStream
     *
     * @param str
     * @return
     */
    public static InputStream str2InputStream(String str) {
        ByteArrayInputStream is = new ByteArrayInputStream(str.getBytes());
        return is;
    }


    public void writeRtf(File out) {
        try {
            rtf.write(new FileOutputStream(out), dsd, 0, dsd.getLength());
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (BadLocationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public String getEncoding(String str) {
        String encode = "GB2312";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GB2312
                String s = encode;
                return s; //是的话,返回“GB2312“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "ISO-8859-1";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是ISO-8859-1
                String s1 = encode;
                return s1;
            }
        } catch (Exception exception) {

        }

        encode = "GBK";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是GBK
                String s = encode;
                return s; //是的话,返回“GBK“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "ASCII";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是ASCII
                String s = encode;
                return s; //是的话,返回“ASCII“,以下代码同理
            }
        } catch (Exception exception) {
        }
        encode = "UTF-8";
        try {
            if (str.equals(new String(str.getBytes(encode), encode))) { //判断是不是UTF-8
                String s = encode;
                return s; //是的话,返回“UTF-8“,以下代码同理
            }
        } catch (Exception exception) {
        }

        return "";
    }


    /**
     * 去除富文本内容的html标签
     * @param content
     * @return
     */
    public static String stripHtml(String content) {
        // <p>段落替换为换行
        content = content.replaceAll("<p .*?>", "\r\n");
        // <br><br/>替换为换行
        content = content.replaceAll("<br\\s*/?>", "\r\n");
        // 去掉其它的<>之间的东西
        content = content.replaceAll("\\<.*?>", "");
        return content;
    }

}

RTF2HTML.java

public class RTF2HTML {
 
    /**
     * rtf文件表格标识
     */
    public static final String TABLE = "\\trowd";
 
    /**
     * rtf文件图片标识
     */
    public static final String PICTURE = "\\*\\shppic";
 
    /**
     *  Description of the Class
     *
     *@author     bmartins
     *@created    22 de Agosto de 2002
     */
    private class HTMLStateMachine {
 
        private String alignNames[] = { "left", "center", "right" };
        /**
         *  Description of the Field
         */
        public boolean acceptFonts;
        private String fontName;
        private Color color;
        private int size;
        private int alignment;
        private boolean bold;
        private boolean italic;
        private boolean underline;
        private double firstLineIndent;
        private double oldLeftIndent;
        private double oldRightIndent;
        private double leftIndent;
        private double rightIndent;
        private boolean firstLine;
 
        /**
         *  Constructor for the HTMLStateMachine object
         */
        HTMLStateMachine() {
            acceptFonts = true;
            fontName = "";
            alignment = -1;
            bold = false;
            italic = false;
            underline = false;
            color = null;
            size = -1;
            firstLineIndent = 0.0D;
            oldLeftIndent = 0.0D;
            oldRightIndent = 0.0D;
            leftIndent = 0.0D;
            rightIndent = 0.0D;
            firstLine = false;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@param  element       Description of the Parameter
         */
        public void updateState(
                AttributeSet attributeset,
                StringBuffer stringbuffer,
                Element element) {
            String s = element.getName();
            if (s.equalsIgnoreCase("paragraph")) {
                firstLine = true;
            }
            leftIndent =
                    updateDouble(
                            attributeset,
                            leftIndent,
                            StyleConstants.LeftIndent);
            rightIndent =
                    updateDouble(
                            attributeset,
                            rightIndent,
                            StyleConstants.RightIndent);
            if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {
                closeIndentTable(stringbuffer, oldLeftIndent, oldRightIndent);
            }
            bold =
                    updateBoolean(
                            attributeset,
                            StyleConstants.Bold,
                            "b",
                            bold,
                            stringbuffer);
            italic =
                    updateBoolean(
                            attributeset,
                            StyleConstants.Italic,
                            "i",
                            italic,
                            stringbuffer);
            underline =
                    updateBoolean(
                            attributeset,
                            StyleConstants.Underline,
                            "u",
                            underline,
                            stringbuffer);
            size = updateFontSize(attributeset, size, stringbuffer);
            color = updateFontColor(attributeset, color, stringbuffer);
            if (acceptFonts) {
                fontName = updateFontName(attributeset, fontName, stringbuffer);
            }
            alignment = updateAlignment(attributeset, alignment, stringbuffer);
            firstLineIndent =
                    updateDouble(
                            attributeset,
                            firstLineIndent,
                            StyleConstants.FirstLineIndent);
            if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {
                openIndentTable(stringbuffer, leftIndent, rightIndent);
                oldLeftIndent = leftIndent;
                oldRightIndent = rightIndent;
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  stringbuffer  Description of the Parameter
         *@param  d             Description of the Parameter
         *@param  d1            Description of the Parameter
         */
        private void openIndentTable(
                StringBuffer stringbuffer,
                double d,
                double d1) {
            if (d != 0.0D || d1 != 0.0D) {
                closeSubsetTags(stringbuffer);
                stringbuffer.append("<table><tr>");
                String s = getSpaceTab((int) (d / 4D));
                if (s.length() > 0) {
                    stringbuffer.append("<td>" + s + "</td>");
                }
                stringbuffer.append("<td>");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  stringbuffer  Description of the Parameter
         *@param  d             Description of the Parameter
         *@param  d1            Description of the Parameter
         */
        private void closeIndentTable(
                StringBuffer stringbuffer,
                double d,
                double d1) {
            if (d != 0.0D || d1 != 0.0D) {
                closeSubsetTags(stringbuffer);
                stringbuffer.append("</td>");
                String s = getSpaceTab((int) (d1 / 4D));
                if (s.length() > 0) {
                    stringbuffer.append("<td>" + s + "</td>");
                }
                stringbuffer.append("</tr></table>");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  stringbuffer  Description of the Parameter
         */
        public void closeTags(StringBuffer stringbuffer) {
            closeSubsetTags(stringbuffer);
            closeTag(alignment, -1, "div", stringbuffer);
            alignment = -1;
            closeIndentTable(stringbuffer, oldLeftIndent, oldRightIndent);
        }
 
        /**
         *  Description of the Method
         *
         *@param  stringbuffer  Description of the Parameter
         */
        private void closeSubsetTags(StringBuffer stringbuffer) {
            closeTag(bold, "b", stringbuffer);
            closeTag(italic, "i", stringbuffer);
            closeTag(underline, "u", stringbuffer);
            closeTag(color, "font", stringbuffer);
            closeTag(fontName, "font", stringbuffer);
            closeTag(size, -1, "font", stringbuffer);
            bold = false;
            italic = false;
            underline = false;
            color = null;
            fontName = "";
            size = -1;
        }
 
        /**
         *  Description of the Method
         *
         *@param  flag          Description of the Parameter
         *@param  s             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         */
        private void closeTag(
                boolean flag,
                String s,
                StringBuffer stringbuffer) {
            if (flag) {
                stringbuffer.append("</" + s + ">");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  color1        Description of the Parameter
         *@param  s             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         */
        private void closeTag(
                Color color1,
                String s,
                StringBuffer stringbuffer) {
            if (color1 != null) {
                stringbuffer.append("</" + s + ">");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  s             Description of the Parameter
         *@param  s1            Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         */
        private void closeTag(String s, String s1, StringBuffer stringbuffer) {
            if (s.length() > 0) {
                stringbuffer.append("</" + s1 + ">");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  i             Description of the Parameter
         *@param  j             Description of the Parameter
         *@param  s             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         */
        private void closeTag(
                int i,
                int j,
                String s,
                StringBuffer stringbuffer) {
            if (i > j) {
                stringbuffer.append("</" + s + ">");
            }
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  k             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@return               Description of the Return Value
         */
        private int updateAlignment(
                AttributeSet attributeset,
                int k,
                StringBuffer stringbuffer) {
            int i = k;
            Object obj = attributeset.getAttribute(StyleConstants.Alignment);
            if (obj == null)
                return i;
            int j = ((Integer) obj).intValue();
            if (j == 3) {
                j = 0;
            }
            if (j != i && j >= 0 && j <= 2) {
                if (i > -1) {
                    stringbuffer.append("</div>");
                }
                stringbuffer.append("<div align=\"" + alignNames[j] + "\">");
                i = j;
            }
            return i;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  color3        Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@return               Description of the Return Value
         */
        private Color updateFontColor(
                AttributeSet attributeset,
                Color color3,
                StringBuffer stringbuffer) {
            Color color1 = color3;
            Object obj = attributeset.getAttribute(StyleConstants.Foreground);
            if (obj == null)
                return color1;
            Color color2 = (Color) obj;
            if (color2 != color1) {
                if (color1 != null) {
                    stringbuffer.append("</font>");
                }
                if (color2 != null) {
                    stringbuffer.append(
                            "<font color=\"#" + makeColorString(color2) + "\">");
                }
            }
            color1 = color2;
            return color1;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  s2             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@return               Description of the Return Value
         */
        private String updateFontName(
                AttributeSet attributeset,
                String s2,
                StringBuffer stringbuffer) {
            String s = s2;
            Object obj = attributeset.getAttribute(StyleConstants.FontFamily);
            if (obj == null)
                return s;
            String s1 = (String) obj;
            if (!s1.equals(s)) {
                if (!s.equals("")) {
                    stringbuffer.append("</font>");
                }
                stringbuffer.append("<font face=\"" + s1 + "\">");
            }
            s = s1;
            return s;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  d2            Description of the Parameter
         *@param  obj           Description of the Parameter
         *@return               Description of the Return Value
         */
        private double updateDouble(
                AttributeSet attributeset,
                double d2,
                Object obj) {
            double d = d2;
            Object obj1 = attributeset.getAttribute(obj);
            if (obj1 != null) {
                d = ((Float) obj1).floatValue();
            }
            return d;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  k             Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@return               Description of the Return Value
         */
        private int updateFontSize(
                AttributeSet attributeset,
                int k,
                StringBuffer stringbuffer) {
            int i = k;
            Object obj = attributeset.getAttribute(StyleConstants.FontSize);
            if (obj == null)
                return i;
            int j = ((Integer) obj).intValue();
            if (j != i) {
                if (i != -1) {
                    stringbuffer.append("</font>");
                }
                stringbuffer.append("<font size=\"" + j / 4 + "\">");
            }
            i = j;
            return i;
        }
 
        /**
         *  Description of the Method
         *
         *@param  attributeset  Description of the Parameter
         *@param  obj           Description of the Parameter
         *@param  s             Description of the Parameter
         *@param  flag2          Description of the Parameter
         *@param  stringbuffer  Description of the Parameter
         *@return               Description of the Return Value
         */
        private boolean updateBoolean(
                AttributeSet attributeset,
                Object obj,
                String s,
                boolean flag2,
                StringBuffer stringbuffer) {
            boolean flag = flag2;
            Object obj1 = attributeset.getAttribute(obj);
            if (obj1 != null) {
                boolean flag1 = ((Boolean) obj1).booleanValue();
                if (flag1 != flag) {
                    if (flag1) {
                        stringbuffer.append("<" + s + ">");
                    } else {
                        stringbuffer.append("</" + s + ">");
                    }
                }
                flag = flag1;
            }
            return flag;
        }
 
        /**
         *  Description of the Method
         *
         *@param  color1  Description of the Parameter
         *@return         Description of the Return Value
         */
        private String makeColorString(Color color1) {
            String s = Long.toString(color1.getRGB() & 0xffffff, 16);
            if (s.length() < 6) {
                StringBuffer stringbuffer = new StringBuffer();
                for (int i = s.length(); i < 6; i++) {
                    stringbuffer.append("0");
                }
                stringbuffer.append(s);
                s = stringbuffer.toString();
            }
            return s;
        }
 
        /**
         *  Description of the Method
         *
         *@param  s2  Description of the Parameter
         *@return    Description of the Return Value
         */
        public String performFirstLineIndent(String s2) {
            String s = s2;
            if (firstLine) {
                if (firstLineIndent != 0.0D) {
                    int i = (int) (firstLineIndent / 4D);
                    s = getSpaceTab(i) + s;
                }
                firstLine = false;
            }
            return s;
        }
 
        /**
         *  Gets the spaceTab attribute of the HTMLStateMachine object
         *
         *@param  i  Description of the Parameter
         *@return    The spaceTab value
         */
        public String getSpaceTab(int i) {
            StringBuffer stringbuffer = new StringBuffer();
            for (int j = 0; j < i; j++) {
                stringbuffer.append("&nbsp;");
            }
            return stringbuffer.toString();
        }
 
    }
 
    /**
     *  Constructor for the RTF2HTML object
     */
    public RTF2HTML() {
    }
 
    private int sizeCount = 0;
 
    /**
     *  Description of the Method
     *
     *@return    Description of the Return Value
     */
    public int originalSize() {
        return sizeCount;
    }
 
    /**
     *  Description of the Method
     *
     *@param  input            Description of the Parameter
     *@return                  Description of the Return Value
     *@exception  IOException  Description of the Exception
     */
    public String convertRTFToHTML(Reader input) throws IOException {
        BufferedReader strm = new BufferedReader(input);
        StringBuffer sb = new StringBuffer();
        int s;
        while ((s = strm.read()) != -1) {
            sb.append((char) s);
        }
        return convertRTFToHTML(sb.toString());
    }
 
    /**
     *  Description of the Method
     *
     *@param  s4  Description of the Parameter
     *@return     Description of the Return Value
     */
    public String convertRTFToHTML(String s4) {
        String s2 = s4;
        sizeCount = s2.length();
        HTMLStateMachine htmlstatemachine = new HTMLStateMachine();
        s2 = convertRTFStringToHTML(s2, htmlstatemachine);
        String s3 = new String("<html><body>");
        StringTokenizer st = new StringTokenizer(s2);
        while (st.hasMoreTokens()) {
            String s = st.nextToken();
            if (s.startsWith("http://")) {
                s = "<a href='" + s + "'>" + s + "</a>";
            }
            s3 += s + " ";
        }
        return s3 + "</body></html>";
    }
 
    /**
     *  Description of the Method
     *
     *@param  input            Description of the Parameter
     *@return                  Description of the Return Value
     *@exception  IOException  Description of the Exception
     */
    public String convertRTFToHTML(InputStream input) throws IOException {
        BufferedReader strm = new BufferedReader(new InputStreamReader(input));
        StringBuffer sb = new StringBuffer();
        int s;
        while ((s = strm.read()) != -1) {
            sb.append((char) s);
        }
        return convertRTFToHTML(sb.toString());
    }
 
    /**
     *  Description of the Method
     *
     *@param  input            Description of the Parameter
     *@return                  Description of the Return Value
     *@exception  IOException  Description of the Exception
     */
    public String convertRTFToHTML(File input) throws IOException {
        BufferedReader strm = new BufferedReader(new FileReader(input));
        StringBuffer sb = new StringBuffer();
        int s;
        while ((s = strm.read()) != -1) {
            sb.append((char) s);
        }
        String rtfStr = sb.toString();
        //判断rtf文件中是否包含表格
        if (rtfStr.contains(TABLE)) {
            System.out.println("文件:" + input.getName() + "中包含表格,暂时无法转换!");
            return null;
        }
        //判断rtf文件中是否包含图片
        if (rtfStr.contains(PICTURE)) {
            System.out.println("文件:" + input.getName() + "中包含图片,暂时无法转换!");
            return null;
        }
        return convertRTFToHTML(rtfStr);
    }
 
    /**
     *  Description of the Method
     *
     *@param  input            Description of the Parameter
     *@return                  Description of the Return Value
     *@exception  IOException  Description of the Exception
     */
    public String convertRTFToHTML(URL input) throws IOException {
        HttpURLConnection conn = (HttpURLConnection) input.openConnection();
        conn.setAllowUserInteraction(false);
        conn.setRequestProperty(
                "User-agent",
                "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98");
        conn.setInstanceFollowRedirects(true);
        conn.connect();
        BufferedReader strm =
                new BufferedReader(new InputStreamReader(conn.getInputStream()));
        StringBuffer sb = new StringBuffer();
        int s;
        while ((s = strm.read()) != -1) {
            sb.append((char) s);
        }
        return convertRTFToHTML(sb.toString());
    }
 
    /**
     *  Description of the Method
     *
     *@param  s  Description of the Parameter
     *@return    Description of the Return Value
     */
    public String convertRTFStringToHTML(String s) {
        HTMLStateMachine htmlstatemachine = new HTMLStateMachine();
        RTFEditorKit rtfeditorkit = new RTFEditorKit();
        DefaultStyledDocument defaultstyleddocument =
                new DefaultStyledDocument();
        readString(s, defaultstyleddocument, rtfeditorkit);
        return scanDocument(defaultstyleddocument, htmlstatemachine);
    }
 
    /**
     *  Description of the Method
     *
     *@param  s2                 Description of the Parameter
     *@param  htmlstatemachine  Description of the Parameter
     *@return                   Description of the Return Value
     */
    private String convertRTFStringToHTML(
            String s2,
            HTMLStateMachine htmlstatemachine) {
        String s = s2;
        RTFEditorKit rtfeditorkit = new RTFEditorKit();
        DefaultStyledDocument defaultstyleddocument =
                new DefaultStyledDocument();
        readString(s, defaultstyleddocument, rtfeditorkit);
        s = scanDocument(defaultstyleddocument, htmlstatemachine);
        return s;
    }
 
    /**
     *  Description of the Method
     *
     *@param  s             Description of the Parameter
     *@param  document      Description of the Parameter
     *@param  rtfeditorkit  Description of the Parameter
     */
    private void readString(
            String s,
            Document document,
            RTFEditorKit rtfeditorkit) {
        try {
            ByteArrayInputStream bytearrayinputstream =
                    new ByteArrayInputStream(s.getBytes());
            rtfeditorkit.read(bytearrayinputstream, document, 0);
        } catch (Exception exception) {
            return;
            //            exception.printStackTrace();
        }
    }
 
    /**
     *  Description of the Method
     *
     *@param  document          Description of the Parameter
     *@param  htmlstatemachine  Description of the Parameter
     *@return                   Description of the Return Value
     */
    private String scanDocument(
            Document document,
            HTMLStateMachine htmlstatemachine) {
        String s = "";
        try {
            StringBuffer stringbuffer = new StringBuffer();
            Element element = document.getDefaultRootElement();
            recurseElements(element, document, stringbuffer, htmlstatemachine);
            htmlstatemachine.closeTags(stringbuffer);
            s = stringbuffer.toString();
        } catch (Exception exception) {
            return s;
            //            exception.printStackTrace();
        }
        return s;
    }
 
    /**
     *  Description of the Method
     *
     *@param  element           Description of the Parameter
     *@param  document          Description of the Parameter
     *@param  stringbuffer      Description of the Parameter
     *@param  htmlstatemachine  Description of the Parameter
     */
    private void recurseElements(
            Element element,
            Document document,
            StringBuffer stringbuffer,
            HTMLStateMachine htmlstatemachine) {
        for (int i = 0; i < element.getElementCount(); i++) {
            Element element1 = element.getElement(i);
            scanAttributes(element1, document, stringbuffer, htmlstatemachine);
            recurseElements(element1, document, stringbuffer, htmlstatemachine);
        }
    }
 
    /**
     *  Description of the Method
     *
     *@param  element           Description of the Parameter
     *@param  document          Description of the Parameter
     *@param  stringbuffer      Description of the Parameter
     *@param  htmlstatemachine  Description of the Parameter
     */
    private void scanAttributes(
            Element element,
            Document document,
            StringBuffer stringbuffer,
            HTMLStateMachine htmlstatemachine) {
        try {
            int i = element.getStartOffset();
            int j = element.getEndOffset();
            String s = document.getText(i, j - i);
            javax.swing.text.AttributeSet attributeset =
                    element.getAttributes();
            htmlstatemachine.updateState(attributeset, stringbuffer, element);
            String s1 = element.getName();
            if (s1.equalsIgnoreCase("content")) {
                s = s.replaceAll("\\t", htmlstatemachine.getSpaceTab(8));
                s = s.replaceAll("\\n", "<br />\n");
                s = htmlstatemachine.performFirstLineIndent(s);
                stringbuffer.append(s);
            }
        } catch (BadLocationException badlocationexception) {
            return;
            //            badlocationexception.printStackTrace();
        }
    }
 
    /**
     *  Description of the Method
     *
     *@param  in             Description of the Parameter
     *@return                Description of the Return Value
     *@exception  Exception  Description of the Exception
     */
    public InputStream parse(File in) throws Exception {
        return parse(new FileInputStream(in));
    }
 
    /**
     *  Description of the Method
     *
     *@param  in             Description of the Parameter
     *@return                Description of the Return Value
     *@exception  Exception  Description of the Exception
     */
    public InputStream parse(URL in) throws Exception {
        HttpURLConnection conn = (HttpURLConnection) in.openConnection();
        conn.setAllowUserInteraction(false);
        conn.setRequestProperty("User-agent", "www.tumba.pt");
        conn.setInstanceFollowRedirects(true);
        conn.connect();
        return parse(conn.getInputStream());
    }
 
    /**
     *  Arguments are: 0. Name of input SWF
     *
     *@param  in             Description of the Parameter
     *@return                Description of the Return Value
     *@exception  Exception  Description of the Exception
     */
    public InputStream parse(InputStream in) throws Exception {
        BufferedReader strm = new BufferedReader(new InputStreamReader(in));
        StringBuffer sb = new StringBuffer();
        int s;
        while ((s = strm.read()) != -1) {
            sb.append((char) s);
        }
        String s2 = convertRTFToHTML(sb.toString());
        return new ByteArrayInputStream(s2.getBytes());
    }
 
}

测试类

public class wwww {
    public static void main(String[] args) {
        RtfToTextExtUtil rtfToTextExtUtil=new RtfToTextExtUtil();
        String a="{\\rtf1\\ansi\\ansicpg936\\deff0{\\fonttbl{\\f0\\fmodern\\fprq6\\fcharset134 \\'cb\\'ce\\'cc\\'e5;}{\\f1\\fnil\\fcharset0 MS Sans Serif;}}\n" +
                "\\viewkind4\\uc1\\pard\\lang2052\\f0\\fs18\\'ce\\'ca\\'cc\\'e2\\'c3\\'e8\\'ca\\'f6*\n" +
                "\\par     ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b2\\'bb\\'c4\\'dc\\'ca\\'b9\\'d3\\'c3F5\\'b2\\'e9\\'d1\\'af\\'a1\\'a1\n" +
                "\\par \\'ce\\'ca\\'cc\\'e2\\'d3\\'b0\\'cf\\'ec*\n" +
                "\\par     ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b2\\'bb\\'c4\\'dc\\'ca\\'b9\\'d3\\'c3F5\\'b2\\'e9\\'d1\\'af\n" +
                "\\par \\'cf\\'d6\\'b3\\'a1\\'c1\\'d9\\'ca\\'b1\\'bd\\'e2\\'be\\'f6\\'b7\\'bd\\'b0\\'b8*\n" +
                "\\par     \\'d4\\'f6\\'bc\\'d3ETF\\'bf\\'cd\\'bb\\'a7\\'b6\\'cb\\'b5\\'c7\\'c2\\'bd\\'c8\\'a8\\'cf\\'de\\f1\\fs17 \n" +
                "\\par }\n";
        String s = rtfToTextExtUtil.readRtf(a);
        System.out.println(s);
    }
}


目录
相关文章
|
7月前
|
安全 Go
Golang深入浅出之-Go语言模板(text/template):动态生成HTML
【4月更文挑战第24天】Go语言标准库中的`text/template`包用于动态生成HTML和文本,但不熟悉其用法可能导致错误。本文探讨了三个常见问题:1) 忽视模板执行错误,应确保正确处理错误;2) 忽视模板安全,应使用`html/template`包防止XSS攻击;3) 模板结构不合理,应合理组织模板以提高可维护性。理解并运用这些最佳实践,能提升Go语言模板编程的效率和安全性,助力构建稳健的Web应用。
101 0
|
7月前
|
前端开发 Java 测试技术
《手把手教你》系列技巧篇(十三)-java+ selenium自动化测试-元素定位大法之By partial link text(详细教程)
【4月更文挑战第5天】本文介绍了Partial Link Text,这是一种用于网页自动化测试的方法,允许部分匹配超链接文本,尤其适用于长链接不便完全输入的情况。为确保准确性,建议选择具有唯一性的字符组合。文章列举了8种常用的定位方法,包括id、name、class name等,并重点讲解了partial link text。实战部分展示了如何通过 PartialLinkText 在百度首页找到并点击“打哭伊藤!孙颖莎:过瘾”这一热搜链接的代码示例。
119 2
|
7月前
|
前端开发 Java 测试技术
《手把手教你》系列技巧篇(十二)-java+ selenium自动化测试-元素定位大法之By link text(详细教程)
【4月更文挑战第4天】本文介绍了link text在自动化测试中的应用。Link text是指网页中链接的文字描述,点击可跳转至其他页面。文章列举了8种常用的定位方法,其中着重讲解了link text定位,并通过实例展示了如何使用Java代码实现点击百度首页的“奥运奖牌榜 最新排名”链接,进入相应页面。如果link text不准确,则无法定位到元素,这说明linkText是精准匹配,而非模糊匹配。文章还提到了partial link text作为link text的模糊匹配版本,将在后续内容中介绍。
126 4
|
3月前
|
XML JavaScript Java
NekoHTML 是一个基于Java的HTML扫描器和标签补全器
**NekoHTML** 是一个基于Java的HTML扫描器和标签补全器(tag balancer),由J. Andrew Clark开发。它主要用于解析HTML文档,并能够“修正”许多在编写HTML文档过程中常犯的错误,如增补缺失的父元素、自动用结束标签关闭相应的元素,以及处理不匹配的内嵌元素标签等。这使得程序能够以标准的XML接口来访问HTML文档中的信息。 ### NekoHTML的主要特点包括: 1. **错误修正**:能够自动修正HTML中的常见错误,如未闭合的标签等。 2. **DOM树生成**:将HTML源代码转化为DOM(Document Object Model)结构,便
|
2月前
|
机器学习/深度学习 JSON JavaScript
LangChain-21 Text Splitters 内容切分器 支持多种格式 HTML JSON md Code(JS/Py/TS/etc) 进行切分并输出 方便将数据进行结构化后检索
LangChain-21 Text Splitters 内容切分器 支持多种格式 HTML JSON md Code(JS/Py/TS/etc) 进行切分并输出 方便将数据进行结构化后检索
42 0
|
7月前
|
设计模式 前端开发 Java
Java与HTML的深度融合:技术解析与应用实践
Java与HTML的深度融合:技术解析与应用实践
443 1
|
6月前
|
Java Maven
Java在线预览(word转html)--强势推荐
Java在线预览(word转html)--强势推荐
165 0
|
6月前
|
前端开发 安全 Java
家政系统(用户端)介绍Java18+前端html+后端springboot
家政系统(用户端)介绍Java18+前端html+后端springboot
60 0
|
7月前
|
前端开发 JavaScript Java
java使用jodd操作html
java使用jodd操作html
75 2
|
7月前
|
JSON Java 数据格式
No converter for [class java.util.LinkedHashMap] with preset Content-Type 'text/json;charset=UTF-8']问题
【5月更文挑战第21天】No converter for [class java.util.LinkedHashMap] with preset Content-Type 'text/json;charset=UTF-8']问题
2418 0