des...
四种解析方式:
DOM
SAX
JDOM
DOM4J
解析目标-books.xml文件
<?xml version="1.0" encoding="UTF-8" ?> <books> <book id="1"> <name>XML深入浅出</name> <author>Imooc</author> <year>2014</year> <price>89</price> </book> <book id="2"> <name>Java从入门到精通</name> <author>Imooc</author> <price>369</price> </book> </books>
1. DOM方式解析
package imooc import org.junit.Test import org.xml.sax.SAXException import java.io.IOException import javax.xml.parsers.DocumentBuilderFactory import javax.xml.parsers.ParserConfigurationException import javax.xml.soap.Node /** * @author futao * Created on 2017/11/2 - 18:19. */ class XML { @Test fun DOM() { //1.创建一个DocumentBuilderFactory对象 val builderFactory = DocumentBuilderFactory.newInstance() try { //2.创建一个DocumentBuilder val documentBuilder = builderFactory.newDocumentBuilder() //通过DocumentBuilder对象的parse方法加载books.xml到当前项目下 val parse = documentBuilder.parse("D:\\src\\springhibernate\\springshibernate\\src\\test\\kotlin\\imooc\\books.xml") //获取节点(book)的集合 val bookNodeList = parse.getElementsByTagName("book") println("book节点个数为${bookNodeList.length}") //遍历每一个book节点 for (i in 0 until bookNodeList.length) { //遍历每个book节点的所有属性的集合 val bookNodeAttributes = bookNodeList.item(i).attributes for (j in 0 until bookNodeAttributes.length) { println("第${i + 1} 个book节点公有${bookNodeList.item(i).attributes.length}个属性") println("属性:" + bookNodeAttributes.item(j)) println(bookNodeAttributes.item(j).nodeName) println(bookNodeAttributes.item(j).nodeValue) //获取当前book节点的子节点集合 val bookNodeChildNodes = bookNodeList.item(i).childNodes //会把空格和换行符也当成节点 println("第${i + 1} 本书共有${(bookNodeChildNodes.length - 1) / 2}个子节点") // for (k in 0 until (bookNodeChildNodes.length - 1) / 2) { (0 until bookNodeChildNodes.length - 1) .filter { bookNodeChildNodes.item(it).nodeType == Node.ELEMENT_NODE } .forEach { print("子节点" + bookNodeChildNodes.item(it).nodeName) //null // println(bookNodeChildNodes.item(it).nodeValue) println(" 对应的值为 " + bookNodeChildNodes.item(it).firstChild.nodeValue) // println(" 对应的值为 " + bookNodeChildNodes.item(it).textContent) } } } } catch (e: ParserConfigurationException) { e.printStackTrace() } catch (e: IOException) { e.printStackTrace() } catch (e: SAXException) { e.printStackTrace() } } }
结果
book节点个数为2 第1 个book节点公有1个属性 属性:id="1" id 1 第1 本书共有4个子节点 子节点name 对应的值为 XML深入浅出 子节点author 对应的值为 Imooc 子节点year 对应的值为 2014 子节点price 对应的值为 89 第2 个book节点公有1个属性 属性:id="2" id 2 第2 本书共有3个子节点 子节点name 对应的值为 Java从入门到精通 子节点author 对应的值为 Imooc 子节点price 对应的值为 369
2.SAX方式解析
Book.class
package imooc; /** * @author futao * Created on 2017/11/3 - 11:05. */ public class Book { private int id; private String name; private String author; private String year; private String price; public int getId() { return id; } public void setId(int id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getYear() { return year; } public void setYear(String year) { this.year = year; } public String getPrice() { return price; } public void setPrice(String price) { this.price = price; } }
@Test fun bySAX() { val sAXParserFactory = SAXParserFactory.newInstance() val sAXParser = sAXParserFactory.newSAXParser() //需要编写处理类MySAXParserHandler sAXParser.parse("D:\\src\\springhibernate\\springshibernate\\src\\test\\kotlin\\imooc\\books.xml", MySAXParserHandler()) } class MySAXParserHandler : DefaultHandler() { //遍历xml的book节点的索引 private var bookIndex = 0 //保存book对象 private var bookList = ArrayList<Book>() //当前的book对象 var book = Book() //当前文本内容 var trim = "" /** * 遍历xml的文件开始标签 */ override fun startElement(uri: String?, localName: String?, qName: String?, attributes: Attributes?) { super.startElement(uri, localName, qName, attributes) if (qName!! == "book") { book = Book() bookList.add(book) bookIndex++ println("开始遍历第$bookIndex 本书") // val value = attributes!!.getValue("id") // println(value) for (i in 0 until attributes!!.length) { print("属性名为:" + attributes.getQName(i)) println("属性值为:" + attributes.getValue(i)) book.id = attributes.getValue(i).toInt() } } else if (qName != "book" && qName != "books") { print("子节点名字为: $qName 子节点的值为: ") } } /** * 遍历xml文件的结束标签 */ override fun endElement(uri: String?, localName: String?, qName: String?) { super.endElement(uri, localName, qName) when (qName) { "name" -> book.name = trim "author" -> book.author = trim "year" -> book.year = trim "price" -> book.price = trim } if (qName == "book") { println("========================第$bookIndex 本书遍历完成") } } /** * 标志解析开始 */ override fun startDocument() { super.startDocument() println("SAX解析开始...") } /** * 标志解析结束 */ override fun endDocument() { super.endDocument() println("SAX解析结束...") println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(bookList)) } /** * 解析内容,获取文本 */ override fun characters(ch: CharArray?, start: Int, length: Int) { super.characters(ch, start, length) val strings = String(ch!!, start, length) trim = strings.trim() if (trim != "") { println(trim) } }
结果
SAX解析开始... 开始遍历第1 本书 属性名为:id属性值为:1 子节点名字为: name 子节点的值为: XML深入浅出 子节点名字为: author 子节点的值为: Imooc 子节点名字为: year 子节点的值为: 2014 子节点名字为: price 子节点的值为: 89 ========================第1 本书遍历完成 开始遍历第2 本书 属性名为:id属性值为:2 子节点名字为: name 子节点的值为: Java从入门到精通 子节点名字为: author 子节点的值为: Imooc 子节点名字为: price 子节点的值为: 369 ========================第2 本书遍历完成 SAX解析结束... [ { "id": 1, "name": "XML深入浅出", "author": "Imooc", "year": "2014", "price": "89" }, { "id": 2, "name": "Java从入门到精通", "author": "Imooc", "year": null, "price": "369" } ]
3.JDOM
Maven依赖
<!-- https://mvnrepository.com/artifact/org.jdom/jdom --> <dependency> <groupId>org.jdom</groupId> <artifactId>jdom</artifactId> <version>2.0.2</version> </dependency>
主要代码
package imooc.imooc.jdom import com.google.gson.GsonBuilder import imooc.Book import org.jdom2.input.SAXBuilder import org.junit.Test import java.io.FileInputStream import java.io.InputStreamReader /** * @author futao * Created on 2017/11/3 - 13:41. */ class Jdom { @Test fun testJdom() { val list = ArrayList<Book>() var book = Book() //1.创建一个SAXBuilder对象 val saxBuilder = SAXBuilder() /* * 乱码解决方案 * 1.修改xml文件的编码格式 * 2.用InputStreamReader代替FileInputStream,设置编码格式 * */ //2.通过输入流的方式加载xml文件到saxBuilder中 // val document = saxBuilder.build(FileInputStream("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml")) val document = saxBuilder.build(InputStreamReader(FileInputStream("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml"), "UTF-8")) //3.获取根节点 val rootElement = document.rootElement //4.根节点的子节点集合 val rootChildElement = rootElement.children for (i in rootChildElement) { println("==================== 开始解析第${rootChildElement.indexOf(i) + 1}本书==================== ") book = Book() //解析根节点的所有属性 for (k in i.attributes) { println(k.name + " : " + k.value) assignment(book, k.name, k.value) } val children = i.children //解析子节点和子节点的内容 for (j in children) { println(j.name + " : " + j.value) assignment(book, j.name, j.value) } list.add(book) } println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(list)) } private fun assignment(book: Book, property: String, value: String): Book { when (property) { "id" -> book.id = value.toInt() "name" -> book.name = value "author" -> book.author = value "year" -> book.year = value "price" -> book.price = value } return book } }
结果
==================== 开始解析第1本书==================== id : 1 name : XML深入浅出 author : Imooc year : 2014 price : 89 ==================== 开始解析第2本书==================== id : 2 name : Java从入门到精通 author : Imooc price : 369 Disconnected from the target VM, address: '127.0.0.1:59271', transport: 'socket' [ { "id": 1, "name": "XML深入浅出", "author": "Imooc", "year": "2014", "price": "89" }, { "id": 2, "name": "Java从入门到精通", "author": "Imooc", "year": null, "price": "369" } ]
4.DOM4J
maven依赖
<!-- https://mvnrepository.com/artifact/dom4j/dom4j --> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency>
主要代码
package imooc.imooc.dom4j import com.google.gson.GsonBuilder import imooc.Book import org.dom4j.Attribute import org.dom4j.Element import org.dom4j.io.SAXReader import org.junit.Test import java.io.File /** * @author futao * Created on 2017/11/3 - 14:37. */ class DOM4JTest { @Test fun testDOM4J() { val list = ArrayList<Book>() var book = Book() val saxReader = SAXReader() val document = saxReader.read(File("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml")) //根节点 val rootElement = document.rootElement //通过Element的elementIterator()方法获取迭代器 val elementIterator = rootElement.elementIterator() //遍历迭代器,获取根节点的子节点信息 for ((index, i) in elementIterator.withIndex()) { val element = i as Element // println(element.name) println("===============开始解析第${index + 1}本书===============") book = Book() //遍历子节点的属性 i.attributes() .map { it as Attribute } .forEach { println(it.name + " : " + it.value) assignment(book, it.name, it.value) } //遍历子节点的子节点和内容 for (k in i.elementIterator()) { k as Element println(k.name + " : " + k.textTrim) assignment(book, k.name, k.textTrim) } list.add(book) } println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(list)) } private fun assignment(book: Book, property: String, value: String): Book { when (property) { "id" -> book.id = value.toInt() "name" -> book.name = value "author" -> book.author = value "year" -> book.year = value "price" -> book.price = value } return book } }
结果
===============开始解析第1本书=============== id : 1 name : deep name : XML深入浅出 author : Imooc year : 2014 price : 89 ===============开始解析第2本书=============== id : 2 name : Java从入门到精通 author : Imooc price : 369 [ { "id": 1, "name": "XML深入浅出", "author": "Imooc", "year": "2014", "price": "89" }, { "id": 2, "name": "Java从入门到精通", "author": "Imooc", "year": null, "price": "369" } ]
读取效率
@Test fun efficiencyCompare() { val startTime1 = System.currentTimeMillis() testDOM4J() println("DOM4J耗时: ${System.currentTimeMillis() - startTime1}") val startTime2 = System.currentTimeMillis() Jdom().testJdom() println("JDOM耗时: ${System.currentTimeMillis() - startTime2}") val startTime3 = System.currentTimeMillis() XML().byDOM() println("DOM耗时: ${System.currentTimeMillis() - startTime3}") val startTime4 = System.currentTimeMillis() XML().bySAX() println("SAX耗时: ${System.currentTimeMillis() - startTime4}") }
结果
DOM4J耗时: 379 JDOM耗时: 141 DOM耗时: 52 SAX耗时: 32