des...
四种解析方式:
DOM
SAX
JDOM
DOM4J
解析目标-books.xml文件
<?xml version="1.0" encoding="UTF-8" ?>
<books>
<book id="1">
<name>XML深入浅出</name>
<author>Imooc</author>
<year>2014</year>
<price>89</price>
</book>
<book id="2">
<name>Java从入门到精通</name>
<author>Imooc</author>
<price>369</price>
</book>
</books>
1. DOM方式解析
package imooc
import org.junit.Test
import org.xml.sax.SAXException
import java.io.IOException
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.parsers.ParserConfigurationException
import javax.xml.soap.Node
/**
* @author futao
* Created on 2017/11/2 - 18:19.
*/
class XML {
@Test
fun DOM() {
//1.创建一个DocumentBuilderFactory对象
val builderFactory = DocumentBuilderFactory.newInstance()
try {
//2.创建一个DocumentBuilder
val documentBuilder = builderFactory.newDocumentBuilder()
//通过DocumentBuilder对象的parse方法加载books.xml到当前项目下
val parse = documentBuilder.parse("D:\\src\\springhibernate\\springshibernate\\src\\test\\kotlin\\imooc\\books.xml")
//获取节点(book)的集合
val bookNodeList = parse.getElementsByTagName("book")
println("book节点个数为${bookNodeList.length}")
//遍历每一个book节点
for (i in 0 until bookNodeList.length) {
//遍历每个book节点的所有属性的集合
val bookNodeAttributes = bookNodeList.item(i).attributes
for (j in 0 until bookNodeAttributes.length) {
println("第${i + 1} 个book节点公有${bookNodeList.item(i).attributes.length}个属性")
println("属性:" + bookNodeAttributes.item(j))
println(bookNodeAttributes.item(j).nodeName)
println(bookNodeAttributes.item(j).nodeValue)
//获取当前book节点的子节点集合
val bookNodeChildNodes = bookNodeList.item(i).childNodes
//会把空格和换行符也当成节点
println("第${i + 1} 本书共有${(bookNodeChildNodes.length - 1) / 2}个子节点")
// for (k in 0 until (bookNodeChildNodes.length - 1) / 2) {
(0 until bookNodeChildNodes.length - 1)
.filter { bookNodeChildNodes.item(it).nodeType == Node.ELEMENT_NODE }
.forEach {
print("子节点" + bookNodeChildNodes.item(it).nodeName)
//null
// println(bookNodeChildNodes.item(it).nodeValue)
println(" 对应的值为 " + bookNodeChildNodes.item(it).firstChild.nodeValue)
// println(" 对应的值为 " + bookNodeChildNodes.item(it).textContent)
}
}
}
} catch (e: ParserConfigurationException) {
e.printStackTrace()
} catch (e: IOException) {
e.printStackTrace()
} catch (e: SAXException) {
e.printStackTrace()
}
}
}
结果
book节点个数为2
第1 个book节点公有1个属性
属性:id="1"
id
1
第1 本书共有4个子节点
子节点name 对应的值为 XML深入浅出
子节点author 对应的值为 Imooc
子节点year 对应的值为 2014
子节点price 对应的值为 89
第2 个book节点公有1个属性
属性:id="2"
id
2
第2 本书共有3个子节点
子节点name 对应的值为 Java从入门到精通
子节点author 对应的值为 Imooc
子节点price 对应的值为 369
2.SAX方式解析
Book.class
package imooc;
/**
* @author futao
* Created on 2017/11/3 - 11:05.
*/
public class Book {
private int id;
private String name;
private String author;
private String year;
private String price;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
}
@Test
fun bySAX() {
val sAXParserFactory = SAXParserFactory.newInstance()
val sAXParser = sAXParserFactory.newSAXParser()
//需要编写处理类MySAXParserHandler
sAXParser.parse("D:\\src\\springhibernate\\springshibernate\\src\\test\\kotlin\\imooc\\books.xml", MySAXParserHandler())
}
class MySAXParserHandler : DefaultHandler() {
//遍历xml的book节点的索引
private var bookIndex = 0
//保存book对象
private var bookList = ArrayList<Book>()
//当前的book对象
var book = Book()
//当前文本内容
var trim = ""
/**
* 遍历xml的文件开始标签
*/
override fun startElement(uri: String?, localName: String?, qName: String?, attributes: Attributes?) {
super.startElement(uri, localName, qName, attributes)
if (qName!! == "book") {
book = Book()
bookList.add(book)
bookIndex++
println("开始遍历第$bookIndex 本书")
// val value = attributes!!.getValue("id")
// println(value)
for (i in 0 until attributes!!.length) {
print("属性名为:" + attributes.getQName(i))
println("属性值为:" + attributes.getValue(i))
book.id = attributes.getValue(i).toInt()
}
} else if (qName != "book" && qName != "books") {
print("子节点名字为: $qName 子节点的值为: ")
}
}
/**
* 遍历xml文件的结束标签
*/
override fun endElement(uri: String?, localName: String?, qName: String?) {
super.endElement(uri, localName, qName)
when (qName) {
"name" -> book.name = trim
"author" -> book.author = trim
"year" -> book.year = trim
"price" -> book.price = trim
}
if (qName == "book") {
println("========================第$bookIndex 本书遍历完成")
}
}
/**
* 标志解析开始
*/
override fun startDocument() {
super.startDocument()
println("SAX解析开始...")
}
/**
* 标志解析结束
*/
override fun endDocument() {
super.endDocument()
println("SAX解析结束...")
println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(bookList))
}
/**
* 解析内容,获取文本
*/
override fun characters(ch: CharArray?, start: Int, length: Int) {
super.characters(ch, start, length)
val strings = String(ch!!, start, length)
trim = strings.trim()
if (trim != "") {
println(trim)
}
}
结果
SAX解析开始...
开始遍历第1 本书
属性名为:id属性值为:1
子节点名字为: name 子节点的值为: XML深入浅出
子节点名字为: author 子节点的值为: Imooc
子节点名字为: year 子节点的值为: 2014
子节点名字为: price 子节点的值为: 89
========================第1 本书遍历完成
开始遍历第2 本书
属性名为:id属性值为:2
子节点名字为: name 子节点的值为: Java从入门到精通
子节点名字为: author 子节点的值为: Imooc
子节点名字为: price 子节点的值为: 369
========================第2 本书遍历完成
SAX解析结束...
[
{
"id": 1,
"name": "XML深入浅出",
"author": "Imooc",
"year": "2014",
"price": "89"
},
{
"id": 2,
"name": "Java从入门到精通",
"author": "Imooc",
"year": null,
"price": "369"
}
]
3.JDOM
Maven依赖
<!-- https://mvnrepository.com/artifact/org.jdom/jdom -->
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>2.0.2</version>
</dependency>
主要代码
package imooc.imooc.jdom
import com.google.gson.GsonBuilder
import imooc.Book
import org.jdom2.input.SAXBuilder
import org.junit.Test
import java.io.FileInputStream
import java.io.InputStreamReader
/**
* @author futao
* Created on 2017/11/3 - 13:41.
*/
class Jdom {
@Test
fun testJdom() {
val list = ArrayList<Book>()
var book = Book()
//1.创建一个SAXBuilder对象
val saxBuilder = SAXBuilder()
/*
* 乱码解决方案
* 1.修改xml文件的编码格式
* 2.用InputStreamReader代替FileInputStream,设置编码格式
* */
//2.通过输入流的方式加载xml文件到saxBuilder中
// val document = saxBuilder.build(FileInputStream("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml"))
val document = saxBuilder.build(InputStreamReader(FileInputStream("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml"), "UTF-8"))
//3.获取根节点
val rootElement = document.rootElement
//4.根节点的子节点集合
val rootChildElement = rootElement.children
for (i in rootChildElement) {
println("==================== 开始解析第${rootChildElement.indexOf(i) + 1}本书==================== ")
book = Book()
//解析根节点的所有属性
for (k in i.attributes) {
println(k.name + " : " + k.value)
assignment(book, k.name, k.value)
}
val children = i.children
//解析子节点和子节点的内容
for (j in children) {
println(j.name + " : " + j.value)
assignment(book, j.name, j.value)
}
list.add(book)
}
println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(list))
}
private fun assignment(book: Book, property: String, value: String): Book {
when (property) {
"id" -> book.id = value.toInt()
"name" -> book.name = value
"author" -> book.author = value
"year" -> book.year = value
"price" -> book.price = value
}
return book
}
}
结果
==================== 开始解析第1本书====================
id : 1
name : XML深入浅出
author : Imooc
year : 2014
price : 89
==================== 开始解析第2本书====================
id : 2
name : Java从入门到精通
author : Imooc
price : 369
Disconnected from the target VM, address: '127.0.0.1:59271', transport: 'socket'
[
{
"id": 1,
"name": "XML深入浅出",
"author": "Imooc",
"year": "2014",
"price": "89"
},
{
"id": 2,
"name": "Java从入门到精通",
"author": "Imooc",
"year": null,
"price": "369"
}
]
4.DOM4J
maven依赖
<!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
主要代码
package imooc.imooc.dom4j
import com.google.gson.GsonBuilder
import imooc.Book
import org.dom4j.Attribute
import org.dom4j.Element
import org.dom4j.io.SAXReader
import org.junit.Test
import java.io.File
/**
* @author futao
* Created on 2017/11/3 - 14:37.
*/
class DOM4JTest {
@Test
fun testDOM4J() {
val list = ArrayList<Book>()
var book = Book()
val saxReader = SAXReader()
val document = saxReader.read(File("D:\\src\\springhibernate\\springshibernate\\src\\test\\resources\\books.xml"))
//根节点
val rootElement = document.rootElement
//通过Element的elementIterator()方法获取迭代器
val elementIterator = rootElement.elementIterator()
//遍历迭代器,获取根节点的子节点信息
for ((index, i) in elementIterator.withIndex()) {
val element = i as Element
// println(element.name)
println("===============开始解析第${index + 1}本书===============")
book = Book()
//遍历子节点的属性
i.attributes()
.map { it as Attribute }
.forEach {
println(it.name + " : " + it.value)
assignment(book, it.name, it.value)
}
//遍历子节点的子节点和内容
for (k in i.elementIterator()) {
k as Element
println(k.name + " : " + k.textTrim)
assignment(book, k.name, k.textTrim)
}
list.add(book)
}
println(GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(list))
}
private fun assignment(book: Book, property: String, value: String): Book {
when (property) {
"id" -> book.id = value.toInt()
"name" -> book.name = value
"author" -> book.author = value
"year" -> book.year = value
"price" -> book.price = value
}
return book
}
}
结果
===============开始解析第1本书===============
id : 1
name : deep
name : XML深入浅出
author : Imooc
year : 2014
price : 89
===============开始解析第2本书===============
id : 2
name : Java从入门到精通
author : Imooc
price : 369
[
{
"id": 1,
"name": "XML深入浅出",
"author": "Imooc",
"year": "2014",
"price": "89"
},
{
"id": 2,
"name": "Java从入门到精通",
"author": "Imooc",
"year": null,
"price": "369"
}
]
读取效率
@Test
fun efficiencyCompare() {
val startTime1 = System.currentTimeMillis()
testDOM4J()
println("DOM4J耗时: ${System.currentTimeMillis() - startTime1}")
val startTime2 = System.currentTimeMillis()
Jdom().testJdom()
println("JDOM耗时: ${System.currentTimeMillis() - startTime2}")
val startTime3 = System.currentTimeMillis()
XML().byDOM()
println("DOM耗时: ${System.currentTimeMillis() - startTime3}")
val startTime4 = System.currentTimeMillis()
XML().bySAX()
println("SAX耗时: ${System.currentTimeMillis() - startTime4}")
}
结果
DOM4J耗时: 379
JDOM耗时: 141
DOM耗时: 52
SAX耗时: 32