zoukankan      html  css  js  c++  java
  • Python3实现xml转json文件

    使用了Python的 xml.etree.ElementTree 库,Python版本Python 3.6.6

    from xml.etree import ElementTree
    import json
    
    LISTTYPE = 1
    DICTTYPE = 0
    
    def getDictResults(res_dicts, iters):
        result_dicts = {}
        for iter in iters.getchildren():
            iterxml(iter, result_dicts)
    
        if result_dicts:
            res_dicts[iters.tag].update(result_dicts)
    
    def getListResults(res_dicts, iters):
        result_lists = []
        for iter in iters.getchildren():
            result_dicts = {}
            iterxml(iter, result_dicts)
            result_lists.append(result_dicts.copy())
            del(result_dicts)
        
        if result_lists:
            if len(res_dicts[iters.tag].items()) == 0:
                res_dicts[iters.tag] = result_lists.copy()
            else:
                for resobj in result_lists:
                    resobjkey = list(resobj.keys())[0]
                    if res_dicts[iters.tag].get(resobjkey) == None:
                        res_dicts[iters.tag].update(resobj)
                    else:
                        if type(res_dicts[iters.tag][resobjkey]) == list:
                            res_dicts[iters.tag][resobjkey].append(resobj[resobjkey].copy())
                        else:
                            old_value = res_dicts[iters.tag][resobjkey]
                            res_dicts[iters.tag][resobjkey] = []
                            res_dicts[iters.tag][resobjkey].append(old_value)
                            res_dicts[iters.tag][resobjkey].append(resobj[resobjkey].copy())
    
            del(result_lists)
    
    def checkxmlchildrentype(iters):
        taglist = []
        for iter in iters.getchildren():
            taglist.append(iter.tag)
    
        if len(set(taglist)) == len(taglist):
            return DICTTYPE
        else:
            return LISTTYPE
    
    def getResults(res_dicts, iters):
        if checkxmlchildrentype(iters):
            return getListResults(res_dicts, iters)
        else:
            return getDictResults(res_dicts, iters)
    
    #@res_dicts    {}
    def iterxml(iter, res_dicts):
        res_dicts[iter.tag] = {}
    
        if iter.attrib:
            for k,v in dict(iter.attrib).items():
                res_dicts[iter.tag].update({k : v})
        
        if iter.text is not None and iter.text.strip() != "":
            res_dicts[iter.tag].update({"__XmlTagText__" : iter.text.strip()})
        
        if iter.getchildren():
            getResults(res_dicts, iter)
    
    def parserxmltojson(file_path):
        try:
            tree = ElementTree.parse(file_path)
        except Exception as e:
            #multi-byte encodings are not supported    把字符集改成utf-8就可以
            #encoding specified in XML declaration is incorrect    xml encoding标识和文件的字符集不同
            #syntax error    语法错误,乱码等
            #not well-formed (invalid token)    编辑器点击后字符集被修改成ASCII等,或者文件本身字符集和xml encoding不相同
            print("Parser {} Error, Errmsg: {}".format(file_path, e))
            return ""
    
        if tree is None:
            print("{} is None.".format(file_path))
            return ""
    
        root = tree.getroot()
    
        report = {}
        iterxml(root, report)
        #return getDictResults(root)
    
        return report
    
    if __name__ == "__main__":
        jsonret = parserxmltojson("test.xml")
        with open("test.json", "w", encoding="utf-8") as fd:
            fd.write(json.dumps(jsonret, ensure_ascii=False, indent=4))
        print(json.dumps(jsonret, ensure_ascii=False, indent=4))

  • 相关阅读:
    QAbstractItemModel使用样例与解析(Model::index使用了createIndex,它会被销毁吗?被销毁了,因为栈对象出了括号就会被销毁)
    更多的人为了追求自己真正热爱的事,甚至会在职业生涯刚开始时拒绝许多高薪工作,这样的人最终都成了真正的赢家。
    MYSQL分库分表之sharding-jdbc第四篇
    MYSQL分库分表之 Sharding-JDBC第三篇
    MySQL分库分表之Sharding-JDBC第二篇
    MySQL分库分表之Sharding-JDBC第一篇
    增加复杂度的12危险信号
    ASP.NET-Core-Web-API-Best-Practices-Guide
    聚合
    浏览器输入www.baidu.com后干啥了-web性能优化指南
  • 原文地址:https://www.cnblogs.com/frisk/p/12634427.html
Copyright © 2011-2022 走看看