python操作xml文件示例

1018次阅读  |  发布于5年以前

复制代码 代码如下:

def get_seed_data(filename):
dom = minidom.parse(filename)
root = dom.documentElement
system_nodes = root.getElementsByTagName("system")
k = 0
seed_list = []
for system_node in system_nodes:

print system_node.nodeName+' id='+system_node.getAttribute('id')

system_id = system_node.getAttribute("id")  
system_name = system_node.getAttribute("name")  
#print 'system_name:%s'%system_name  
section_nodes = system_node.getElementsByTagName("section")  
for section_node in section_nodes:  
        section_id = section_node.getAttribute('id')  
        section_name = section_node.getAttribute('name')  
        #print ' '+section_node.nodeName+' id='+section_id+' name='+section_name  
        crawl_cycle_node = section_node.getElementsByTagName("crawl_cycle")  
        crawl_cycle = crawl_cycle_node[0].childNodes[0].nodeValue  
        #print '  '+crawl_cycle_node[0].nodeName+'='+crawl_cycle  
        seed_nodes = section_node.getElementsByTagName('seed')  
        for seed_node in seed_nodes:  
            seed = {}  
            seed['crawl_cycle'] = crawl_cycle  
            seed['system_id'] = int(system_id)  
            seed['system_name'] = system_name  
            seed['section_id'] = int(section_id)  
            seed['section_name'] = section_name  
            seed_id = seed_node.getAttribute('id')  
            seed['seed_id'] = int(seed_id)  
            #print '  '+seed_node.nodeName+' '+'id='+seed_id  
            userblog_url_node = seed_node.getElementsByTagName('userblog_url')  
            userblog_url = userblog_url_node[0].childNodes[0].nodeValue  
            seed['userblog_url'] = userblog_url  
            #print '   '+'userblog_url'+' '+userblog_url  
            print '-------------------------------------------'  
            print 'system_id:%d' % seed['system_id']  
            print 'system_name:%s'%seed['system_name']  
            print ' section_id:%d' % seed['section_id']  
            print ' section_name:%s' % seed['section_name']  
            print '  seed_id:%d' %seed['seed_id']  
            print '  userblog_url:%s' %seed['userblog_url']  
            print '========================='  
            seed_list.append(seed)  
            print seed_list[k]  
            k += 1  
            os.system('pause')  

return seed_list

复制代码 代码如下:

<?xml version="1.0" encoding="utf-8" ?>

http://aaa.com.cn/loveissuuny http://aaa.com.cn/loveissuuny http://aaa.com.cn/sanxiazaixian
http://aaa.com.cn/twocold http://aaa.com.cn/u/1233526741

Copyright© 2013-2020

All Rights Reserved 京ICP备2023019179号-8