编程 Python

python解析xml模块封装代码

Posted in Python onFebruary 07, 2014

有如下的xml文件：

<?xml version="1.0" encoding="utf-8" ?>  
<root>  
<childs>  
<child name='first' >1</child>  
<child value="2">2</child>  
</childs>  
</root>

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

#!/usr/bin/env python  
# -*- coding: utf-8 -*-  
from xml.sax.handler import ContentHandler  
from xml.sax import parse
class TestHandle(ContentHandler):  
    def __init__(self, inlist):  
        self.inlist = inlist      def startElement(self,name,attrs):  
        print 'name:',name, 'attrs:',attrs.keys()  
    def endElement(self,name):  
        print 'endname',name  
    def characters(self,chars):  
        print 'chars',chars  
        self.inlist.append(chars)  
              
if __name__ == '__main__':  
    lt = []  
    parse('test.xml', TestHandle(lt))  
    print lt

结果：
[html] view plaincopy
name: root attrs: []
chars

endname childs
chars

endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

方式2，python模块实现获取根节点，按需查找指定节点：

#!/usr/bin/env python    
# -*- coding: utf-8 -*-    
from xml.dom import minidom    
xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?> 
<hash> 
    <request name='first'>/2/photos/square/type.xml</request> 
    <error_code>21301</error_code> 
    <error>auth faild!</error> 
</hash> 
'''  
def doxml(xmlstr):  
    dom = minidom.parseString(xmlstr)      
    print 'Dom:'      
    print dom.toxml()        root = dom.firstChild      
    print 'root:'      
    print root.toxml()    
    childs = root.childNodes    
    for child in childs:  
        print child.toxml()  
        if child.nodeType == child.TEXT_NODE:  
            pass  
        else:  
            print 'child node attribute name:', child.getAttribute('name')  
            print 'child node name:', child.nodeName  
            print 'child node len:',len(child.childNodes)  
            print 'child data:',child.childNodes[0].data  
            print '======================================='  
            print 'more help info to see:'  
            for med in dir(child):  
                print help(med)      
                
if __name__ == '__main__':    
    doxml(xmlstr)

结果：
[html] view plaincopy
Dom:
<?xml version="1.0" ?><hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>
root:
<hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>

<request name="first">/2/photos/square/type.xml</request>
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========

#!\urs\bin\env python  
#encoding: utf-8  
from xml.dom import minidom  class xmlwrite:  
    def __init__(self, resultfile):  
        self.resultfile = resultfile  
        self.rootname = 'api'  
        self.__create_xml_dom()  
    def __create_xml_dom(self):  
        xmlimpl = minidom.getDOMImplementation()  
        self.dom = xmlimpl.createDocument(None, self.rootname, None)  
        self.root = self.dom.documentElement  
    def __get_spec_node(self, xpath):  
        patharr = xpath.split(r'/')  
        parentnode = self.root  
        exist = 1  
        for nodename in patharr:  
            if nodename.strip() == '':  
                continue  
            if not exist:  
                return None  
            spcindex = nodename.find('[')  
            if spcindex > -1:  
                index = int(nodename[spcindex+1:-1])  
            else:  
                index = 0  
            count = 0  
            childs = parentnode.childNodes  
            for child in childs:  
                if child.nodeName == nodename[:spcindex]:  
                    if count == index:  
                        parentnode = child  
                        exist = 1  
                        break  
                    count += 1  
                    continue  
                else:  
                    exist = 0  
        return parentnode  
          
    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):  
        node = self.dom.createElement(nodename)  
        if value:  
            if CDATA:  
                nodedata = self.dom.createCDATASection(value)  
            else:  
                nodedata = self.dom.createTextNode(value)  
            node.appendChild(nodedata)  
            if attribute and isinstance(attribute, dict):  
                for key, value in attribute.items():  
                    node.setAttribute(key, value)     
        try:  
            parentnode = self.__get_spec_node(parent)  
        except:  
            print 'Get parent Node Fail, Use the Root as parent Node'  
            parentnode = self.root  
        parentnode.appendChild(node)  
      
    def write_start_time(self, time):  
        self.write_node('/','StartTime', time)  
    def write_end_time(self, time):  
        self.write_node('/','EndTime', time)      
    def write_pass_count(self, count):  
        self.write_node('/','PassCount', count)     
    def write_fail_count(self, count):  
        self.write_node('/','FailCount', count)     
    def write_case(self):  
        self.write_node('/','Case', None)     
    def write_case_no(self, index, value):  
        self.write_node('/Case[%s]/' % index,'No', value)  
    def write_case_url(self, index, value):  
        self.write_node('/Case[%s]/' % index,'URL', value)  
    def write_case_dbdata(self, index, value):  
        self.write_node('/Case[%s]/' % index,'DBData', value)  
    def write_case_apidata(self, index, value):  
        self.write_node('/Case[%s]/' % index,'APIData', value)  
    def write_case_dbsql(self, index, value):  
        self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)  
    def write_case_apixpath(self, index, value):  
        self.write_node('/Case[%s]/' % index,'APIXPath', value)         
    def save_xml(self):  
        myfile = file(self.resultfile, 'w')  
        self.dom.writexml(myfile, encoding='utf-8')  
        myfile.close()  
if __name__ == '__main__':  
      xr = xmlwrite(r'D:\test.xml')  
      xr.write_start_time('2223')  
      xr.write_end_time('444')        
      xr.write_pass_count('22')  
      xr.write_fail_count('33')    
      xr.write_case()  
      xr.write_case()  
      xr.write_case_no(0, '0')  
      xr.write_case_url(0, 'http://www.google.com')     
      xr.write_case_url(0, 'http://www.google.com')     
      xr.write_case_dbsql(0, 'select * from ')  
      xr.write_case_dbdata(0, 'dbtata')  
      xr.write_case_apixpath(0, '/xpath')  
      xr.write_case_apidata(0, 'apidata')  
      xr.write_case_no(1, '1')         
      xr.write_case_url(1, 'http://www.baidu.com')     
      xr.write_case_url(1, 'http://www.baidu.com')     
      xr.write_case_dbsql(1, 'select 1 from ')  
      xr.write_case_dbdata(1, 'dbtata1')  
      xr.write_case_apixpath(1, '/xpath1')  
      xr.write_case_apidata(1, 'apidata1')  
      xr.save_xml()

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。
比如：/root/child[1], 表示root的第2个child节点。

python解析xml模块封装代码

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

将Python代码打包为jar软件的简单方法

Aug 04 Python

Python列出一个文件夹及其子目录的所有文件

Jun 30 Python

Python设计模式之抽象工厂模式

Aug 25 Python

python爬虫之自动登录与验证码识别

Jun 15 Python

python按照多个条件排序的方法

Feb 08 Python

在python里面运用多继承方法详解

Jul 01 Python

linux中如何使用python3获取ip地址

Jul 15 Python

使用turtle绘制五角星、分形树

Oct 06 Python

pytorch-RNN进行回归曲线预测方式

Jan 14 Python

Python新手学习标准库模块命名

May 29 Python

python中numpy.empty()函数实例讲解

Feb 05 Python

Golang Web 框架Iris安装部署

Aug 14 Python

python 解析XML python模块xml.dom解析xml实例代码

Feb 07 #Python

python合并文本文件示例

Feb 07 #Python

python实现哈希表

Feb 07 #Python

python处理cookie详解

Feb 07 #Python

urllib2自定义opener详解

Feb 07 #Python

python解析html开发库pyquery使用方法

Feb 07 #Python

python3.3实现乘法表示例

Feb 07 #Python