python解析xml模块封装代码

1411次阅读  |  发布于5年以前

有如下的xml文件:

复制代码 代码如下:

<?xml version="1.0" encoding="utf-8" ?>

1 2

下面介绍python解析xml文件的几种方法,使用python模块实现。

方式1,python模块实现自动遍历所有节点:

复制代码 代码如下:

!/usr/bin/env python

-- coding: utf-8 --

from xml.sax.handler import ContentHandler
from xml.sax import parse
class TestHandle(ContentHandler):
def init(self, inlist):
self.inlist = inlist

def startElement(self,name,attrs):    
    print 'name:',name, 'attrs:',attrs.keys()    

def endElement(self,name):    
    print 'endname',name    

def characters(self,chars):    
    print 'chars',chars    
    self.inlist.append(chars)    

if name == 'main':
lt = []
parse('test.xml', TestHandle(lt))
print lt

结果:
[html] view plaincopy
name: root attrs: []
chars

name: childs attrs: []
chars

name: child attrs: [u'name']
chars 1
endname child
chars

name: child attrs: [u'value']
chars 2
endname child
chars

endname childs
chars

endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

方式2,python模块实现获取根节点,按需查找指定节点:

复制代码 代码如下:

!/usr/bin/env python

-- coding: utf-8 --

from xml.dom import minidom
xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?>

/2/photos/square/type.xml 21301 auth faild! ''' def doxml(xmlstr): dom = minidom.parseString(xmlstr) print 'Dom:' print dom.toxml()
root = dom.firstChild        
print 'root:'        
print root.toxml()      

childs = root.childNodes      
for child in childs:    
    print child.toxml()    
    if child.nodeType == child.TEXT_NODE:    
        pass    
    else:    
        print 'child node attribute name:', child.getAttribute('name')    
        print 'child node name:', child.nodeName    
        print 'child node len:',len(child.childNodes)    
        print 'child data:',child.childNodes[0].data    
        print '======================================='    
        print 'more help info to see:'    
        for med in dir(child):    
            print help(med)        

if name == 'main':
doxml(xmlstr)

结果:
[html] view plaincopy
Dom:
<?xml version="1.0" ?>

/2/photos/square/type.xml 21301 auth faild! root: /2/photos/square/type.xml 21301 auth faild! /2/photos/square/type.xml child node attribute name: first child node name: request child node len: 1 child data: /2/photos/square/type.xml ======================================= more help info to see: 两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。

本节是 python解析xml模块封装代码 的第二部分。
====写xml内容的分割线=========

复制代码 代码如下:

!\urs\bin\env python

encoding: utf-8

from xml.dom import minidom

class xmlwrite:
def init(self, resultfile):
self.resultfile = resultfile
self.rootname = 'api'
self.__create_xml_dom()

def __create_xml_dom(self):    
    xmlimpl = minidom.getDOMImplementation()    
    self.dom = xmlimpl.createDocument(None, self.rootname, None)    
    self.root = self.dom.documentElement    

def __get_spec_node(self, xpath):    
    patharr = xpath.split(r'/')    
    parentnode = self.root    
    exist = 1    
    for nodename in patharr:    
        if nodename.strip() == '':    
            continue    
        if not exist:    
            return None    
        spcindex = nodename.find('[')    
        if spcindex > -1:    
            index = int(nodename[spcindex+1:-1])    
        else:    
            index = 0    
        count = 0    
        childs = parentnode.childNodes    
        for child in childs:    
            if child.nodeName == nodename[:spcindex]:    
                if count == index:    
                    parentnode = child    
                    exist = 1    
                    break    
                count += 1    
                continue    
            else:    
                exist = 0    
    return parentnode    


def write_node(self, parent, nodename, value, attribute=None, CDATA=False):    
    node = self.dom.createElement(nodename)    
    if value:    
        if CDATA:    
            nodedata = self.dom.createCDATASection(value)    
        else:    
            nodedata = self.dom.createTextNode(value)    
        node.appendChild(nodedata)    
        if attribute and isinstance(attribute, dict):    
            for key, value in attribute.items():    
                node.setAttribute(key, value)       
    try:    
        parentnode = self.__get_spec_node(parent)    
    except:    
        print 'Get parent Node Fail, Use the Root as parent Node'    
        parentnode = self.root    
    parentnode.appendChild(node)    


def write_start_time(self, time):    
    self.write_node('/','StartTime', time)    

def write_end_time(self, time):    
    self.write_node('/','EndTime', time)        

def write_pass_count(self, count):    
    self.write_node('/','PassCount', count)       

def write_fail_count(self, count):    
    self.write_node('/','FailCount', count)       

def write_case(self):    
    self.write_node('/','Case', None)       

def write_case_no(self, index, value):    
    self.write_node('/Case[%s]/' % index,'No', value)    

def write_case_url(self, index, value):    
    self.write_node('/Case[%s]/' % index,'URL', value)    

def write_case_dbdata(self, index, value):    
    self.write_node('/Case[%s]/' % index,'DBData', value)    

def write_case_apidata(self, index, value):    
    self.write_node('/Case[%s]/' % index,'APIData', value)    

def write_case_dbsql(self, index, value):    
    self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)    

def write_case_apixpath(self, index, value):    
    self.write_node('/Case[%s]/' % index,'APIXPath', value)           

def save_xml(self):    
    myfile = file(self.resultfile, 'w')    
    self.dom.writexml(myfile, encoding='utf-8')    
    myfile.close()    

if name == 'main':
xr = xmlwrite(r'D:\test.xml')
xr.write_start_time('2223')
xr.write_end_time('444')
xr.write_pass_count('22')
xr.write_fail_count('33')
xr.write_case()
xr.write_case()
xr.write_case_no(0, '0')
xr.write_case_url(0, 'http://www.google.com')
xr.write_case_url(0, 'http://www.google.com')
xr.write_case_dbsql(0, 'select * from ')
xr.write_case_dbdata(0, 'dbtata')
xr.write_case_apixpath(0, '/xpath')
xr.write_case_apidata(0, 'apidata')
xr.write_case_no(1, '1')
xr.write_case_url(1, 'http://www.baidu.com')
xr.write_case_url(1, 'http://www.baidu.com')
xr.write_case_dbsql(1, 'select 1 from ')
xr.write_case_dbdata(1, 'dbtata1')
xr.write_case_apixpath(1, '/xpath1')
xr.write_case_apidata(1, 'apidata1')
xr.save_xml()

以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。
比如:/root/child[1], 表示root的第2个child节点。

Copyright© 2013-2020

All Rights Reserved 京ICP备2023019179号-8