首先说一下,由于这篇文章主要是自己随性学习写的,所以读者看起来可能很乱,呵呵。可以给大家稍微推荐一篇:http://www.cnblogs.com/xuxm2007/archive/2011/01/16/1936610.html 稍微清晰一点
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
#coding=utf-8
#解析xml文件中的所有的link标签
from
xml.dom
import
minidom
from
xml.dom.minidom
import
getDOMImplementation
doc
=
minidom.parse(
"d:\\hello.html"
)
nodes
=
doc.getElementsByTagName(
"link"
)
for
node
in
nodes:
print
"<"
,node.tagName,
print
"type=\""
,node.getAttribute(
"type"
),
"\""
,
print
"rel=\""
,node.getAttribute(
"rel"
),
"\""
,
print
"href=\""
,node.getAttribute(
"href"
),
"\""
,
print
"/>"
print
"通过另外一种方式获得link标签"
linknodes
=
doc.getElementsByTagName(
"link"
)
for
i
in
range
(
len
(linknodes)):
print
linknodes[i].getAttribute(
"type"
),
print
linknodes[i].getAttribute(
"rel"
),
print
linknodes[i].getAttribute(
"href"
)
#操作节点
node
=
linknodes[
0
]
print
dir
(node)
print
node.parentNode
print
node.prefix
print
node.nodeType,node.nodeValue,node.nodeName
print
node.localName
print
node.childNodes
print
node.firstChild,node.lastChild
print
node.attributes
print
node.namespaceURI
print
node.nextSibling
print
"--"
*
10
print
node.tagName
print
"==="
*
20
impl
=
getDOMImplementation()
newdoc
=
impl.createDocument(
None
,
"some_tag"
,
None
)
top_element
=
newdoc.documentElement
node1
=
newdoc.createTextNode(
"node1"
)
node2
=
newdoc.createTextNode(
"node2"
)
node3
=
newdoc.createTextNode(
"node3"
)
top_element.appendChild(node1)
top_element.appendChild(node2)
top_element.appendChild(node3)
top_element.removeChild(node3)
top_element.insertBefore(node3,node2)
print
top_element.childNodes
|
运行结果:
< link
type
=
" text/css "
rel
=
" stylesheet "
href
=
" http://www.cnblogs.com/css/common.css "
/
>
< link
type
=
" text/css "
rel
=
" stylesheet "
href
=
" http://www.cnblogs.com/Skins/kubrick/style.css "
/
>
< link
type
=
" text/css "
rel
=
" stylesheet "
href
=
" http://www.cnblogs.com/css/common2.css "
/
>
< link
type
=
" text/css "
rel
=
" stylesheet "
href
=
" http://common.cnblogs.com/css/shCore.css "
/
>
< link
type
=
" text/css "
rel
=
" stylesheet "
href
=
" http://common.cnblogs.com/css/shThemeDefault.css "
/
>
< link
type
=
" application/rss+xml "
rel
=
" alternate "
href
=
" http://www.cnblogs.com/rollenholt/rss "
/
>
< link
type
=
" application/rsd+xml "
rel
=
" EditURI "
href
=
" http://www.cnblogs.com/rollenholt/rsd.xml "
/
>
< link
type
=
" application/wlwmanifest+xml "
rel
=
" wlwmanifest "
href
=
" http://www.cnblogs.com/rollenholt/wlwmanifest.xml "
/
>
通过另外一种方式获得link标签
text
/
css stylesheet http:
/
/
www.cnblogs.com
/
css
/
common.css
text
/
css stylesheet http:
/
/
www.cnblogs.com
/
Skins
/
kubrick
/
style.css
text
/
css stylesheet http:
/
/
www.cnblogs.com
/
css
/
common2.css
text
/
css stylesheet http:
/
/
common.cnblogs.com
/
css
/
shCore.css
text
/
css stylesheet http:
/
/
common.cnblogs.com
/
css
/
shThemeDefault.css
application
/
rss
+
xml alternate http:
/
/
www.cnblogs.com
/
rollenholt
/
rss
application
/
rsd
+
xml EditURI http:
/
/
www.cnblogs.com
/
rollenholt
/
rsd.xml
application
/
wlwmanifest
+
xml wlwmanifest http:
/
/
www.cnblogs.com
/
rollenholt
/
wlwmanifest.xml
[
'ATTRIBUTE_NODE'
,
'CDATA_SECTION_NODE'
,
'COMMENT_NODE'
,
'DOCUMENT_FRAGMENT_NODE'
,
'DOCUMENT_NODE'
,
'DOCUMENT_TYPE_NODE'
,
'ELEMENT_NODE'
,
'ENTITY_NODE'
,
'ENTITY_REFERENCE_NODE'
,
'NOTATION_NODE'
,
'PROCESSING_INSTRUCTION_NODE'
,
'TEXT_NODE'
,
'__doc__'
,
'__init__'
,
'__module__'
,
'__nonzero__'
,
'__repr__'
,
'_attrs'
,
'_attrsNS'
,
'_call_user_data_handler'
,
'_child_node_types'
,
'_get_attributes'
,
'_get_childNodes'
,
'_get_firstChild'
,
'_get_lastChild'
,
'_get_localName'
,
'_get_tagName'
,
'_magic_id_nodes'
,
'appendChild'
,
'attributes'
,
'childNodes'
,
'cloneNode'
,
'firstChild'
,
'getAttribute'
,
'getAttributeNS'
,
'getAttributeNode'
,
'getAttributeNodeNS'
,
'getElementsByTagName'
,
'getElementsByTagNameNS'
,
'getInterface'
,
'getUserData'
,
'hasAttribute'
,
'hasAttributeNS'
,
'hasAttributes'
,
'hasChildNodes'
,
'insertBefore'
,
'isSameNode'
,
'isSupported'
,
'lastChild'
,
'localName'
,
'namespaceURI'
,
'nextSibling'
,
'nodeName'
,
'nodeType'
,
'nodeValue'
,
'normalize'
,
'ownerDocument'
,
'parentNode'
,
'prefix'
,
'previousSibling'
,
'removeAttribute'
,
'removeAttributeNS'
,
'removeAttributeNode'
,
'removeAttributeNodeNS'
,
'removeChild'
,
'replaceChild'
,
'schemaType'
,
'setAttribute'
,
'setAttributeNS'
,
'setAttributeNode'
,
'setAttributeNodeNS'
,
'setIdAttribute'
,
'setIdAttributeNS'
,
'setIdAttributeNode'
,
'setUserData'
,
'tagName'
,
'toprettyxml'
,
'toxml'
,
'unlink'
,
'writexml'
]
<DOM Element: head at
0x1b3e968
>
None
1
None
link
link
[]
None
None
<xml.dom.minidom.NamedNodeMap
object
at
0x01B4D648
>
http:
/
/
www.w3.org
/
1999
/
xhtml
<DOM Text node
"u'\n'"
>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
link
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
[<DOM Text node
"'node1'"
>, <DOM Text node
"'node3'"
>, <DOM Text node
"'node2'"
>]
|