Python3实现xml转json文件

当前位置:
首页 > 编程开发 > 数据分析 >

Python3实现xml转json文件
使用了Python的 xml.etree.ElementTree 库，Python版本Python 3.6.6
from xml.etree import ElementTree
import json

LISTTYPE = 1
DICTTYPE = 0

def getDictResults(res_dicts, iters):
    result_dicts = {}
    for iter in iters.getchildren():
        iterxml(iter, result_dicts)

    if result_dicts:
        res_dicts[iters.tag].update(result_dicts)

def getListResults(res_dicts, iters):
    result_lists = []
    for iter in iters.getchildren():
        result_dicts = {}
        iterxml(iter, result_dicts)
        result_lists.append(result_dicts.copy())
        del(result_dicts)
    
    if result_lists:
        if len(res_dicts[iters.tag].items()) == 0:
            res_dicts[iters.tag] = result_lists.copy()
        else:
            for resobj in result_lists:
                resobjkey = list(resobj.keys())[0]
                if res_dicts[iters.tag].get(resobjkey) == None:
                    res_dicts[iters.tag].update(resobj)
                else:
                    if type(res_dicts[iters.tag][resobjkey]) == list:
                        res_dicts[iters.tag][resobjkey].append(resobj[resobjkey].copy())
                    else:
                        old_value = res_dicts[iters.tag][resobjkey]
                        res_dicts[iters.tag][resobjkey] = []
                        res_dicts[iters.tag][resobjkey].append(old_value)
                        res_dicts[iters.tag][resobjkey].append(resobj[resobjkey].copy())

        del(result_lists)

def checkxmlchildrentype(iters):
    taglist = []
    for iter in iters.getchildren():
        taglist.append(iter.tag)

    if len(set(taglist)) == len(taglist):
        return DICTTYPE
    else:
        return LISTTYPE

def getResults(res_dicts, iters):
    if checkxmlchildrentype(iters):
        return getListResults(res_dicts, iters)
    else:
        return getDictResults(res_dicts, iters)

#@res_dicts    {}
def iterxml(iter, res_dicts):
    res_dicts[iter.tag] = {}

    if iter.attrib:
        for k,v in dict(iter.attrib).items():
            res_dicts[iter.tag].update({k : v})
    
    if iter.text is not None and iter.text.strip() != "":
        res_dicts[iter.tag].update({"__XmlTagText__" : iter.text.strip()})
    
    if iter.getchildren():
        getResults(res_dicts, iter)

def parserxmltojson(file_path):
    try:
        tree = ElementTree.parse(file_path)
    except Exception as e:
        #multi-byte encodings are not supported    把字符集改成utf-8就可以
        #encoding specified in XML declaration is incorrect    xml encoding标识和文件的字符集不同
        #syntax error    语法错误，乱码等
        #not well-formed (invalid token)    编辑器点击后字符集被修改成ASCII等，或者文件本身字符集和xml encoding不相同
        print("Parser {} Error, Errmsg: {}".format(file_path, e))
        return ""

    if tree is None:
        print("{} is None.".format(file_path))
        return ""

    root = tree.getroot()

    report = {}
    iterxml(root, report)
    #return getDictResults(root)

    return report

if __name__ == "__main__":
    jsonret = parserxmltojson("test.xml")
    with open("test.json", "w", encoding="utf-8") as fd:
        fd.write(json.dumps(jsonret, ensure_ascii=False, indent=4))
    print(json.dumps(jsonret, ensure_ascii=False, indent=4))
栏目列表