Dom4j SAXReader Constructors

简介: Dom4j读取xml:eg1: package xml; import java.io.File; import org.dom4j.DocumentException; import org.

 

Dom4j读取xml:
eg1:

package xml;

import java.io.File;

import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

public class XmlReader_Dom4j {
    public static void main(String[] args)  {
        String path = "D:\\test\\中文文件夹名\\namespaces.xml";
        readXml(path);//will throw exception
        File xmlFile=new File(path);
        readXml(xmlFile);
        path = "D:\\test\\path withWhiteSpace\\namespaces.xml";
        readXml(path);
        
        path = "D:\\test\\normal\\namespaces.xml";
        readXml(path);
    }

    private static void readXml(String path) {
        SAXReader saxReader=new SAXReader();
        try {
            saxReader.read(path);
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }
    
    private static void readXml(File xmlFile) {
        SAXReader saxReader=new SAXReader();
        try {
            saxReader.read(xmlFile);
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }
    
    
    

}

Output:

org.dom4j.DocumentException: unknown protocol: d Nested exception: unknown protocol: d
    at org.dom4j.io.SAXReader.read(SAXReader.java:484)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
Nested exception: 
java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source)
    at org.dom4j.io.SAXReader.read(SAXReader.java:465)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
Nested exception: java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source)
    at org.dom4j.io.SAXReader.read(SAXReader.java:465)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
success
success
success

 

Source code:

    /**
     * <p>
     * Reads a Document from the given URL or filename using SAX.
     * </p>
     * 
     * <p>
     * If the systemId contains a <code>':'</code> character then it is
     * assumed to be a URL otherwise its assumed to be a file name. If you want
     * finer grained control over this mechansim then please explicitly pass in
     * either a {@link URL}or a {@link File}instance instead of a {@link
     * String} to denote the source of the document.
     * </p>
     * 
     * @param systemId
     *            is a URL for a document or a file name.
     * 
     * @return the newly created Document instance
     * 
     * @throws DocumentException
     *             if an error occurs during parsing.
     */
    public Document read(String systemId) throws DocumentException {
        InputSource source = new InputSource(systemId);
        if (this.encoding != null) {
            source.setEncoding(this.encoding);
        }

        return read(source);
    }

 

eg2:

    private static void testWithUrl() throws MalformedURLException {
        System.out.println("=============testWithUrlBegin=============");
        
        String path = "file:///D:\\test\\中文文件夹名\\namespaces.xml";
        newUrl(path);
        readXml(path);
        
        path = "D:\\test\\中文文件夹名\\namespaces.xml";
        newUrl(path);
        
        System.out.println("=============testWithUrlEnd=============");
    }

    private static void newUrl(String path) throws MalformedURLException {
        try {
            new URL(path);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void readXml(String path) {
        SAXReader saxReader=new SAXReader();
        try {
            Document document=saxReader.read(path);
            System.out.println("document.hasContent():"+document.hasContent());
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }

 

Output:

=============testWithUrlBegin=============
document.hasContent():true
success
java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at xml.XmlReader_Dom4j.newUrl(XmlReader_Dom4j.java:50)
    at xml.XmlReader_Dom4j.testWithUrl(XmlReader_Dom4j.java:43)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:13)
=============testWithUrlEnd=============

 

saxReader.read(xmlFile)不报错的原因:

    /**
     * <p>
     * Reads a Document from the given <code>File</code>
     * </p>
     * 
     * @param file
     *            is the <code>File</code> to read from.
     * 
     * @return the newly created Document instance
     * 
     * @throws DocumentException
     *             if an error occurs during parsing.
     */
    public Document read(File file) throws DocumentException {
        try {
            /*
             * We cannot convert the file to an URL because if the filename
             * contains '#' characters, there will be problems with the URL in
             * the InputSource (because a URL like
             * http://myhost.com/index#anchor is treated the same as
             * http://myhost.com/index) Thanks to Christian Oetterli
             */
            InputSource source = new InputSource(new FileInputStream(file));
            if (this.encoding != null) {
                source.setEncoding(this.encoding);
            }
            String path = file.getAbsolutePath();

            if (path != null) {
                // Code taken from Ant FileUtils
                StringBuffer sb = new StringBuffer("file://");

                // add an extra slash for filesystems with drive-specifiers
                if (!path.startsWith(File.separator)) {
                    sb.append("/");
                }

                path = path.replace('\\', '/');
                sb.append(path);

                source.setSystemId(sb.toString());
            }

            return read(source);
        } catch (FileNotFoundException e) {
            throw new DocumentException(e.getMessage(), e);
        }
    }

 

java.net.URL.java中抛异常的位置:

    /**
     * Creates a <code>URL</code> object from the specified
     * <code>protocol</code>, <code>host</code>, <code>port</code>
     * number, <code>file</code>, and <code>handler</code>. Specifying
     * a <code>port</code> number of <code>-1</code> indicates that
     * the URL should use the default port for the protocol. Specifying
     * a <code>handler</code> of <code>null</code> indicates that the URL
     * should use a default stream handler for the protocol, as outlined
     * for:
     *     java.net.URL#URL(java.lang.String, java.lang.String, int,
     *                      java.lang.String)
     *
     * <p>If the handler is not null and there is a security manager,
     * the security manager's <code>checkPermission</code>
     * method is called with a
     * <code>NetPermission("specifyStreamHandler")</code> permission.
     * This may result in a SecurityException.
     *
     * No validation of the inputs is performed by this constructor.
     *
     * @param      protocol   the name of the protocol to use.
     * @param      host       the name of the host.
     * @param      port       the port number on the host.
     * @param      file       the file on the host
     * @param       handler    the stream handler for the URL.
     * @exception  MalformedURLException  if an unknown protocol is specified.
     * @exception  SecurityException
     *        if a security manager exists and its
     *        <code>checkPermission</code> method doesn't allow
     *        specifying a stream handler explicitly.
     * @see        java.lang.System#getProperty(java.lang.String)
     * @see        java.net.URL#setURLStreamHandlerFactory(
     *            java.net.URLStreamHandlerFactory)
     * @see        java.net.URLStreamHandler
     * @see        java.net.URLStreamHandlerFactory#createURLStreamHandler(
     *            java.lang.String)
     * @see        SecurityManager#checkPermission
     * @see        java.net.NetPermission
     */
    public URL(String protocol, String host, int port, String file,
           URLStreamHandler handler) throws MalformedURLException {
    if (handler != null) {
            SecurityManager sm = System.getSecurityManager();
            if (sm != null) {
                // check for permission to specify a handler
                checkSpecifyHandler(sm);
            }
        }

    protocol = protocol.toLowerCase();
        this.protocol = protocol;
     if (host != null) {

            /**
         * if host is a literal IPv6 address,
             * we will make it conform to RFC 2732
         */
            if (host != null && host.indexOf(':') >= 0
                    && !host.startsWith("[")) {
                host = "["+host+"]";
            }
            this.host = host;

        if (port < -1) {
        throw new MalformedURLException("Invalid port number :" +
                                                    port);
        }
            this.port = port;
        authority = (port == -1) ? host : host + ":" + port;
    }

    Parts parts = new Parts(file);
        path = parts.getPath();
        query = parts.getQuery();

        if (query != null) {
            this.file = path + "?" + query;
        } else {
            this.file = path;
        }
    ref = parts.getRef();    

    // Note: we don't do validation of the URL here. Too risky to change
        // right now, but worth considering for future reference. -br
        if (handler == null &&
            (handler = getURLStreamHandler(protocol)) == null) {
            throw new MalformedURLException("unknown protocol: " + protocol);
        }
        this.handler = handler;
    }

 

目录
打赏
0
0
0
0
95
分享
相关文章
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
HTML DOM 节点树
HTML DOM 节点是指在 HTML 文档对象模型中,文档中的所有内容都被视为节点。整个文档是一个文档节点,每个 HTML 元素是元素节点,元素内的文本是文本节点,属性是属性节点,注释是注释节点。DOM 将文档表示为节点树,节点之间有父子和同胞关系。
HTML DOM 节点
HTML DOM(文档对象模型)将HTML文档视为节点树,其中每个部分都是节点:文档本身是文档节点,HTML元素是元素节点,元素内的文本是文本节点,属性是属性节点,注释是注释节点。节点间存在父子及同胞关系,形成层次结构。
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)