前言
最近有个同事说一个xml解析,说是很复杂。然后让我给看看。我给解析完了。现在分享下,希望如果遇到同样问题的朋友随时可以借鉴参考。
所谓的复杂xml
<?xml version="1.0" encoding="UTF-8"?> <getOutptTplAsnResponse xmlns="http://tempuri.org/"> <getOutptTplAsnResult> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata" id="NewDataSet"> <xs:element name="NewDataSet" msdata:IsDataSet="true" msdata:UseCurrentLocale="true"> <xs:complexType> <xs:choice minOccurs="0" maxOccurs="unbounded"> <xs:element name="ds"> <xs:complexType> <xs:sequence> <xs:element name="SHPMT_NBR" type="xs:string" minOccurs="0"/> <xs:element name="TO_WHSE" type="xs:string" minOccurs="0"/> <xs:element name="ASN_NBR" type="xs:string" minOccurs="0"/> <xs:element name="ASN_NBR_DTL" type="xs:string" minOccurs="0"/> <xs:element name="SEASON" type="xs:string" minOccurs="0"/> <xs:element name="SIZE_DESC" type="xs:string" minOccurs="0"/> <xs:element name="REC_QTY" type="xs:decimal" minOccurs="0"/> <xs:element name="BATCH_NBR" type="xs:string" minOccurs="0"/> <xs:element name="MFG_DATE" type="xs:dateTime" minOccurs="0"/> <xs:element name="XPIRE_DATE" type="xs:dateTime" minOccurs="0"/> <xs:element name="INV_LOCK" type="xs:string" minOccurs="0"/> <xs:element name="PROC_STAT_CODE" type="xs:decimal" minOccurs="0"/> <xs:element name="CREATE_DATE_TIME" type="xs:dateTime" minOccurs="0"/> <xs:element name="USER_ID" type="xs:string" minOccurs="0"/> <xs:element name="SKU_ATTR_1" type="xs:string" minOccurs="0"/> <xs:element name="ASN_ORGN_TYPE" type="xs:string" minOccurs="0"/> <xs:element name="OUT_NBR" type="xs:string" minOccurs="0"/> <xs:element name="UNITS_RCVD" type="xs:decimal" minOccurs="0"/> <xs:element name="CARRIER_DATE" type="xs:dateTime" minOccurs="0"/> <xs:element name="TO_DATE" type="xs:dateTime" minOccurs="0"/> <xs:element name="CARRIER_COMPANY" type="xs:string" minOccurs="0"/> <xs:element name="CARRIER_CITY" type="xs:string" minOccurs="0"/> <xs:element name="MAX_DATE" type="xs:decimal" minOccurs="0"/> <xs:element name="CARRIER_TYPE" type="xs:string" minOccurs="0"/> <xs:element name="CASE" type="xs:decimal" minOccurs="0"/> <xs:element name="UNITS" type="xs:decimal" minOccurs="0"/> <xs:element name="TEMPERATURE" type="xs:decimal" minOccurs="0"/> <xs:element name="TEMP_OUT" type="xs:string" minOccurs="0"/> <xs:element name="COLD_CARRIER" type="xs:string" minOccurs="0"/> <xs:element name="SAMP_CASES" type="xs:decimal" minOccurs="0"/> <xs:element name="SAMP_UNITS" type="xs:decimal" minOccurs="0"/> <xs:element name="OUTPT_ASN_ID" type="xs:decimal" minOccurs="0"/> <xs:element name="CARTON_TYPE" type="xs:string" minOccurs="0"/> </xs:sequence> </xs:complexType> </xs:element> </xs:choice> </xs:complexType> </xs:element> </xs:schema> <diffgr:diffgram xmlns:diffgr="urn:schemas-microsoft-com:xml-diffgram-v1" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata"> <NewDataSet xmlns=""> <ds diffgr:id="ds1" msdata:rowOrder="0"> <SHPMT_NBR>000009459</SHPMT_NBR> <TO_WHSE>SY1</TO_WHSE> <ASN_NBR_DTL>000009459</ASN_NBR_DTL> <SEASON>CS</SEASON> <SIZE_DESC>000008</SIZE_DESC> <REC_QTY>5</REC_QTY> <BATCH_NBR>20210824</BATCH_NBR> <MFG_DATE>2021-02-01T00:00:00+08:00</MFG_DATE> <XPIRE_DATE>2029-02-02T00:00:00+08:00</XPIRE_DATE> <INV_LOCK>1</INV_LOCK> <PROC_STAT_CODE>0</PROC_STAT_CODE> <CREATE_DATE_TIME>2021-08-24T16:12:01+08:00</CREATE_DATE_TIME> <USER_ID>xxx</USER_ID> <ASN_ORGN_TYPE>P</ASN_ORGN_TYPE> <UNITS_RCVD>5</UNITS_RCVD> <MAX_DATE>0</MAX_DATE> <CASE>0</CASE> <UNITS>0</UNITS> <TEMPERATURE>0</TEMPERATURE> <SAMP_CASES>0</SAMP_CASES> <SAMP_UNITS>0</SAMP_UNITS> <OUTPT_ASN_ID>20500</OUTPT_ASN_ID> </ds> <ds diffgr:id="ds2" msdata:rowOrder="1"> <SHPMT_NBR>CSP0058229</SHPMT_NBR> <TO_WHSE>SY1</TO_WHSE> <ASN_NBR>JD-2022-05-24-0003</ASN_NBR> <ASN_NBR_DTL>620</ASN_NBR_DTL> <SEASON>CS</SEASON> <SIZE_DESC>020100003</SIZE_DESC> <REC_QTY>8</REC_QTY> <BATCH_NBR>2022-403</BATCH_NBR> <MFG_DATE>2021-02-01T00:00:00+08:00</MFG_DATE> <XPIRE_DATE>2029-03-03T00:00:00+08:00</XPIRE_DATE> <INV_LOCK>1</INV_LOCK> <PROC_STAT_CODE>0</PROC_STAT_CODE> <CREATE_DATE_TIME>2022-05-24T10:35:15+08:00</CREATE_DATE_TIME> <USER_ID>xx</USER_ID> <ASN_ORGN_TYPE>P</ASN_ORGN_TYPE> <UNITS_RCVD>10</UNITS_RCVD> <MAX_DATE>0</MAX_DATE> <CASE>0</CASE> <UNITS>0</UNITS> <TEMPERATURE>0</TEMPERATURE> <SAMP_CASES>0</SAMP_CASES> <SAMP_UNITS>0</SAMP_UNITS> <OUTPT_ASN_ID>26174</OUTPT_ASN_ID> </ds> <ds diffgr:id="ds3" msdata:rowOrder="2"> <SHPMT_NBR>CSP0058229</SHPMT_NBR> <TO_WHSE>SY1</TO_WHSE> <ASN_NBR>JD-2022-05-24-0003</ASN_NBR> <ASN_NBR_DTL>620</ASN_NBR_DTL> <SEASON>CS</SEASON> <SIZE_DESC>020100003</SIZE_DESC> <REC_QTY>2</REC_QTY> <BATCH_NBR>20323-1</BATCH_NBR> <MFG_DATE>2021-02-03T00:00:00+08:00</MFG_DATE> <XPIRE_DATE>2029-04-02T00:00:00+08:00</XPIRE_DATE> <INV_LOCK>1</INV_LOCK> <PROC_STAT_CODE>0</PROC_STAT_CODE> <CREATE_DATE_TIME>2022-05-24T10:35:15+08:00</CREATE_DATE_TIME> <USER_ID>xxxxx</USER_ID> <ASN_ORGN_TYPE>P</ASN_ORGN_TYPE> <UNITS_RCVD>10</UNITS_RCVD> <MAX_DATE>0</MAX_DATE> <CASE>0</CASE> <UNITS>0</UNITS> <TEMPERATURE>0</TEMPERATURE> <SAMP_CASES>0</SAMP_CASES> <SAMP_UNITS>0</SAMP_UNITS> <OUTPT_ASN_ID>26175</OUTPT_ASN_ID> </ds> </NewDataSet> </diffgr:diffgram> </getOutptTplAsnResult> </getOutptTplAsnResponse>
解析思路
使用dom4j技术,以 >为标示符,进行分割,获取到想要获取数据的上一层,然后通过getElement获取一个元素或者getElements获取一组相同标签数据。
解析实战
1.第一步引入 dom4j
<dependency> <groupId>org.dom4j</groupId> <artifactId>dom4j</artifactId> <version>2.1.3</version> </dependency>
2.引入我的xml解析工具类
import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.List; /** * Dom4j读取Element * * @author hfl */ public class XmlUtil { private static Logger logger = LoggerFactory.getLogger(XmlUtil.class.getName()); public static List<Element> getElements(Document document, String path) { return getElement(document, path).elements(); } public static Element getElement(Document document, String path) { String[] paths = path.split(">"); Element result = document.getRootElement(); for (String item : paths) { try { result = result.element(item); } catch (Exception e) { logger.error("get element error: ", e); } } return result; } public static String getValue(Document document, String path) { String[] paths = path.split(">"); Element result = document.getRootElement(); for (String item : paths) { try { result = result.element(item); } catch (Exception e) { logger.error("get element error: ", e); } } return result.getText(); } public static Document create() { // 创建一个xml文档 Document doc = DocumentHelper.createDocument(); Element university = doc.addElement("university"); university.addAttribute("name", "tsu"); // 注释 university.addComment("这个是根节点"); Element college = university.addElement("college"); college.addAttribute("name", "cccccc"); college.setText("text"); return doc; } public static void write(Document doc) { try { File file = new File("src/dom4j-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); XMLWriter out = new XMLWriter(new FileWriter(file)); out.write(doc); out.flush(); out.close(); } catch (IOException e) { e.printStackTrace(); } } }
3.开始解析测试类
@Slf4j public class DomTest { @Test public void xmlToList() throws Exception { String path = "D:\\comp\\springboot2\\02\\vehicle-business\\vehicle-business-service\\src\\test\\java\\com\\wuzheng\\vehicle\\business\\domtest\\xmltest.xml"; Document dom = new SAXReader().read(new File(path)); List<Element> elist = XmlUtil.getElements(dom, "getOutptTplAsnResult>diffgram>NewDataSet"); for (Element element : elist) { log.info("recordlist ----------------"); Map<String, String> map = element.elements().stream().collect(Collectors.toMap(Element::getName, Element::getTextTrim)); log.info("recordlist = {}", new Gson().toJson(map)); } } }
4.查看效果:
3条全部解析完毕!!!
大功告成!!