zoukankan      html  css  js  c++  java
  • python读xml写到txt

    读取xml信息,写到txt中。

    这个是在当前路径执行的,只能操作当前路径下的文件。

    # -*- coding: utf-8 -*-
    
    import os
    import xml.dom.minidom
    
    def extract_xml_to_txt(srcdir, dstdir):
        num = 0
        filelist = os.listdir(srcdir)
        for i in range(0, len(filelist)):
            if filelist[i][-3:] == 'xml':
                file = os.path.join(srcdir, filelist[i])
                newfile = filelist[i].replace(".xml", ".txt")
                dstfile = os.path.join(dstdir, newfile)
                print("processing file", dstfile)
                # write file
                file_lineinfo = open(dstfile, 'w', encoding='utf-8')
                content_tree = xml.dom.minidom.parse(file)
                content = content_tree.documentElement
                print(content)
                LineInfos = content.getElementsByTagName('LineInfo')
                for lineinfo in LineInfos:
                    if lineinfo.hasAttribute("ptLTX"):
                        ltx = lineinfo.getAttribute("ptLTX")
                        print("LTX:", ltx)
                    if lineinfo.hasAttribute("ptLTY"):
                        lty = lineinfo.getAttribute("ptLTY")
                        print("LTY:", lty)
                    if lineinfo.hasAttribute("ptLBX"):
                        lbx = lineinfo.getAttribute("ptLBX")
                        print("LBX:", lbx)
                    if lineinfo.hasAttribute("ptLBY"):
                        lby = lineinfo.getAttribute("ptLBY")
                        print("LBY:", lby)
                    if lineinfo.hasAttribute("ptRTX"):
                        rtx = lineinfo.getAttribute("ptRTX")
                        print("RTX:", rtx)
                    if lineinfo.hasAttribute("ptRTY"):
                        rty = lineinfo.getAttribute("ptRTY")
                        print("RTY:", rty)
                    if lineinfo.hasAttribute("ptRBX"):
                        rbx = lineinfo.getAttribute("ptRBX")
                        print("RBX:", rbx)
                    if lineinfo.hasAttribute("ptRBY"):
                        rby = lineinfo.getAttribute("ptRBY")
                        print("RBY:", rby)
                    if lineinfo.hasAttribute("Chars"):
                        chars = lineinfo.getAttribute("Chars")
                        chars = chars.strip('
    ')
                        print("Chars:", chars)
                    line_info = [ltx, ',', lty, ',', lbx, ',', lby, ',', rtx, ',', rty, ',', rbx, ',', rby, ',', chars, '
    ']
                    file_lineinfo.writelines(line_info)
                file_lineinfo.close()
    
    
    if __name__ == '__main__':
        src_directory = os.getcwd()
        dst_directory = os.getcwd()
        extract_xml_to_txt(src_directory, dst_directory)

    这个是递归执行所有文件的。

    # -*- coding: utf-8 -*-
    
    import os
    import xml.dom.minidom
    
    def getFiles(path, suffix):
        return [os.path.join(root, file) for root, dirs, files in os.walk(path) for file in files if file.endswith(suffix)]
    
    def extract_xml_to_txt(srcdir, dstdir):
        num = 0
        filelist = getFiles(srcdir, '.xml')
        #filelist = os.listdir(srcdir)
        for i in range(0, len(filelist)):
            if filelist[i][-3:] == 'xml':
                file = os.path.join(srcdir, filelist[i])
                newfile = filelist[i].replace(".xml", ".txt")
                dstfile = os.path.join(dstdir, newfile)
                print("processing file", dstfile)
                # write file
                file_lineinfo = open(dstfile, 'w', encoding='utf-8')
                content_tree = xml.dom.minidom.parse(file)
                content = content_tree.documentElement
                print(content)
                LineInfos = content.getElementsByTagName('LineInfo')
                for lineinfo in LineInfos:
                    if lineinfo.hasAttribute("ptLTX"):
                        ltx = lineinfo.getAttribute("ptLTX")
                        print("LTX:", ltx)
                    if lineinfo.hasAttribute("ptLTY"):
                        lty = lineinfo.getAttribute("ptLTY")
                        print("LTY:", lty)
                    if lineinfo.hasAttribute("ptLBX"):
                        lbx = lineinfo.getAttribute("ptLBX")
                        print("LBX:", lbx)
                    if lineinfo.hasAttribute("ptLBY"):
                        lby = lineinfo.getAttribute("ptLBY")
                        print("LBY:", lby)
                    if lineinfo.hasAttribute("ptRTX"):
                        rtx = lineinfo.getAttribute("ptRTX")
                        print("RTX:", rtx)
                    if lineinfo.hasAttribute("ptRTY"):
                        rty = lineinfo.getAttribute("ptRTY")
                        print("RTY:", rty)
                    if lineinfo.hasAttribute("ptRBX"):
                        rbx = lineinfo.getAttribute("ptRBX")
                        print("RBX:", rbx)
                    if lineinfo.hasAttribute("ptRBY"):
                        rby = lineinfo.getAttribute("ptRBY")
                        print("RBY:", rby)
                    if lineinfo.hasAttribute("Chars"):
                        chars = lineinfo.getAttribute("Chars")
                        chars = chars.strip('
    ')
                        print("Chars:", chars)
                    if chars == "" or rby == '-1':
                        continue
                    line_info = [ltx, ',', lty, ',', lbx, ',', lby, ',', rtx, ',', rty, ',', rbx, ',', rby, ',', chars, '
    ']
                    file_lineinfo.writelines(line_info)
                file_lineinfo.close()
    
    
    if __name__ == '__main__':
        src_directory = os.getcwd()
        dst_directory = os.getcwd()
        extract_xml_to_txt(src_directory, dst_directory)
  • 相关阅读:
    拓扑排序
    最短路径(Dijkstra,SPFA,Floyd)
    最小生成树(Prim)
    最长公共子序列(DP)(二种数组实现+扩展)
    HDU3068(最长回文串)
    python pip 阿里云内网安装地址
    python matplotlib画图改为可写中文
    win10 安装 basemap
    Liunx 安装basemap
    Docker 命令大全
  • 原文地址:https://www.cnblogs.com/juluwangshier/p/13266461.html
Copyright © 2011-2022 走看看