Dom4j读取xml:
eg1:
package xml; import java.io.File; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; public class XmlReader_Dom4j { public static void main(String[] args) { String path = "D:\\test\\中文文件夹名\\namespaces.xml"; readXml(path);//will throw exception File xmlFile=new File(path); readXml(xmlFile); path = "D:\\test\\path withWhiteSpace\\namespaces.xml"; readXml(path); path = "D:\\test\\normal\\namespaces.xml"; readXml(path); } private static void readXml(String path) { SAXReader saxReader=new SAXReader(); try { saxReader.read(path); System.out.println("success"); } catch (DocumentException e) { e.printStackTrace(); } } private static void readXml(File xmlFile) { SAXReader saxReader=new SAXReader(); try { saxReader.read(xmlFile); System.out.println("success"); } catch (DocumentException e) { e.printStackTrace(); } } }
Output:
org.dom4j.DocumentException: unknown protocol: d Nested exception: unknown protocol: d at org.dom4j.io.SAXReader.read(SAXReader.java:484) at org.dom4j.io.SAXReader.read(SAXReader.java:321) at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24) at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11) Nested exception: java.net.MalformedURLException: unknown protocol: d at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at org.dom4j.io.SAXReader.read(SAXReader.java:465) at org.dom4j.io.SAXReader.read(SAXReader.java:321) at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24) at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11) Nested exception: java.net.MalformedURLException: unknown protocol: d at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source) at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source) at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at org.dom4j.io.SAXReader.read(SAXReader.java:465) at org.dom4j.io.SAXReader.read(SAXReader.java:321) at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24) at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11) success success success
Source code:
/** * <p> * Reads a Document from the given URL or filename using SAX. * </p> * * <p> * If the systemId contains a <code>':'</code> character then it is * assumed to be a URL otherwise its assumed to be a file name. If you want * finer grained control over this mechansim then please explicitly pass in * either a {@link URL}or a {@link File}instance instead of a {@link * String} to denote the source of the document. * </p> * * @param systemId * is a URL for a document or a file name. * * @return the newly created Document instance * * @throws DocumentException * if an error occurs during parsing. */ public Document read(String systemId) throws DocumentException { InputSource source = new InputSource(systemId); if (this.encoding != null) { source.setEncoding(this.encoding); } return read(source); }
eg2:
private static void testWithUrl() throws MalformedURLException { System.out.println("=============testWithUrlBegin============="); String path = "file:///D:\\test\\中文文件夹名\\namespaces.xml"; newUrl(path); readXml(path); path = "D:\\test\\中文文件夹名\\namespaces.xml"; newUrl(path); System.out.println("=============testWithUrlEnd============="); } private static void newUrl(String path) throws MalformedURLException { try { new URL(path); } catch (Exception e) { e.printStackTrace(); } } private static void readXml(String path) { SAXReader saxReader=new SAXReader(); try { Document document=saxReader.read(path); System.out.println("document.hasContent():"+document.hasContent()); System.out.println("success"); } catch (DocumentException e) { e.printStackTrace(); } }
Output:
=============testWithUrlBegin============= document.hasContent():true success java.net.MalformedURLException: unknown protocol: d at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at java.net.URL.<init>(Unknown Source) at xml.XmlReader_Dom4j.newUrl(XmlReader_Dom4j.java:50) at xml.XmlReader_Dom4j.testWithUrl(XmlReader_Dom4j.java:43) at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:13) =============testWithUrlEnd=============
saxReader.read(xmlFile)不报错的原因:
/** * <p> * Reads a Document from the given <code>File</code> * </p> * * @param file * is the <code>File</code> to read from. * * @return the newly created Document instance * * @throws DocumentException * if an error occurs during parsing. */ public Document read(File file) throws DocumentException { try { /* * We cannot convert the file to an URL because if the filename * contains '#' characters, there will be problems with the URL in * the InputSource (because a URL like * http://myhost.com/index#anchor is treated the same as * http://myhost.com/index) Thanks to Christian Oetterli */ InputSource source = new InputSource(new FileInputStream(file)); if (this.encoding != null) { source.setEncoding(this.encoding); } String path = file.getAbsolutePath(); if (path != null) { // Code taken from Ant FileUtils StringBuffer sb = new StringBuffer("file://"); // add an extra slash for filesystems with drive-specifiers if (!path.startsWith(File.separator)) { sb.append("/"); } path = path.replace('\\', '/'); sb.append(path); source.setSystemId(sb.toString()); } return read(source); } catch (FileNotFoundException e) { throw new DocumentException(e.getMessage(), e); } }
java.net.URL.java中抛异常的位置:
/** * Creates a <code>URL</code> object from the specified * <code>protocol</code>, <code>host</code>, <code>port</code> * number, <code>file</code>, and <code>handler</code>. Specifying * a <code>port</code> number of <code>-1</code> indicates that * the URL should use the default port for the protocol. Specifying * a <code>handler</code> of <code>null</code> indicates that the URL * should use a default stream handler for the protocol, as outlined * for: * java.net.URL#URL(java.lang.String, java.lang.String, int, * java.lang.String) * * <p>If the handler is not null and there is a security manager, * the security manager's <code>checkPermission</code> * method is called with a * <code>NetPermission("specifyStreamHandler")</code> permission. * This may result in a SecurityException. * * No validation of the inputs is performed by this constructor. * * @param protocol the name of the protocol to use. * @param host the name of the host. * @param port the port number on the host. * @param file the file on the host * @param handler the stream handler for the URL. * @exception MalformedURLException if an unknown protocol is specified. * @exception SecurityException * if a security manager exists and its * <code>checkPermission</code> method doesn't allow * specifying a stream handler explicitly. * @see java.lang.System#getProperty(java.lang.String) * @see java.net.URL#setURLStreamHandlerFactory( * java.net.URLStreamHandlerFactory) * @see java.net.URLStreamHandler * @see java.net.URLStreamHandlerFactory#createURLStreamHandler( * java.lang.String) * @see SecurityManager#checkPermission * @see java.net.NetPermission */ public URL(String protocol, String host, int port, String file, URLStreamHandler handler) throws MalformedURLException { if (handler != null) { SecurityManager sm = System.getSecurityManager(); if (sm != null) { // check for permission to specify a handler checkSpecifyHandler(sm); } } protocol = protocol.toLowerCase(); this.protocol = protocol; if (host != null) { /** * if host is a literal IPv6 address, * we will make it conform to RFC 2732 */ if (host != null && host.indexOf(':') >= 0 && !host.startsWith("[")) { host = "["+host+"]"; } this.host = host; if (port < -1) { throw new MalformedURLException("Invalid port number :" + port); } this.port = port; authority = (port == -1) ? host : host + ":" + port; } Parts parts = new Parts(file); path = parts.getPath(); query = parts.getQuery(); if (query != null) { this.file = path + "?" + query; } else { this.file = path; } ref = parts.getRef(); // Note: we don't do validation of the URL here. Too risky to change // right now, but worth considering for future reference. -br if (handler == null && (handler = getURLStreamHandler(protocol)) == null) { throw new MalformedURLException("unknown protocol: " + protocol); } this.handler = handler; }