zoukankan      html  css  js  c++  java
  • python 比较两文件夹的内容,具有通用性。

    #-*-coding:utf-8-*-    
     
    #===============================================================================  
    # 目录对比工具(包含子目录 ),并列出
    # 1、A比B多了哪些文件  
    # 2、B比A多了哪些文件  
    # 3、二者相同的文件:文件大小相同 VS 文件大小不同  (Size相同文件不打印:与Size不同文件显示未排序)
    # 4、可通过修改,比较文件名且包含文件格式或者只比较文件名但不包括格式后缀。
    #===============================================================================  
     
    import os, time,difflib,sys


    AFILES = []  #EE
    BFILES = []  #SVN
    COMMON = []  #EE & SVN
    def getPrettyTime(state):
        return time.strftime('%y-%m-%d %H:%M:%S', time.localtime(state.st_mtime))

    # def getpathsize(dir): #获取文件大小的函数,未用上,仅供学习.故注释掉
    #     size=0
    #     for root, dirs, files in os.walk(dir):
    #     #root:目录:str 如: C:CopySVNSystemObjectTopoProcedureBuilt-in
    #     #dirs:目录名称:列表: 如 ['Parsers']
    #     #files:名称:列表: 如 ['011D0961FB42416AA49D5E82945DE7E9.og',...]
    #     #file:目录:str, 如 011D0961FB42416AA49D5E82945DE7E9.og
    #         for file in files:
    #             path = os.path.join(root,file)
    #             size = os.path.getsize(path)
    #     return size

    def dirCompare(apath,bpath):
        afiles = []
        bfiles = []
        for root, dirs , files in os.walk(apath):
            print apath,'All files numbers:',len(files)
            for f in files:
                #比较文件名不含格式后缀
                #afiles.append(root + f[0:-4])
                
                #比较文件名含格式后缀
                afiles.append(root + f)
        for root, dirs , files in os.walk(bpath):
            print bpath,'All files numbers:',len(files)
            for f in files:
                #比较文件名不含格式后缀
                #bfiles.append(root + f[0:-4])
                
                #比较文件名含格式后缀
                bfiles.append(root + f)
                #sizeB = os.path.getsize(root + "/" + f) 此处定义的size无法在commonfiles进行比较. (A,B在各自的循环里面)

        # 去掉afiles中文件名的apath (拿A,B相同的路径文件名,做成集合,去找交集)
        apathlen = len(apath)
        aafiles = []
        for f in afiles:
            aafiles.append(f[apathlen:])

        # 去掉bfiles中文件名的bpath
        bpathlen = len(bpath)
        bbfiles = []
        for f in bfiles:
            bbfiles.append(f[bpathlen:])

        afiles = aafiles
        bfiles = bbfiles
        setA = set(afiles)
        setB = set(bfiles)
        #print('%$%'+str(len(setA)))
        #print('%%'+str(len(setB)))
        commonfiles = setA & setB  # 处理共有文件
        #print ("===============File with different size in '", apath, "' and '", bpath, "'===============")
        #将结果输出到本地
        #with open(os.getcwd()+'diff.txt','w') as di:
            #di.write("===============File with different size in '", apath, "' and '", bpath, "'===============")
        for f in sorted(commonfiles):
            sA=os.path.getsize(apath + "/" + f)
            sB=os.path.getsize(bpath + "/" + f)
            if sA==sB:  #共有文件的大小比较
                #pass #print (f + " " + getPrettyTime(os.stat(apath + "/" + f)) + " " + getPrettyTime(os.stat(bpath + "/" + f)))
                #以下代码是处理大小一致,但是内容可能不一致的情况
                #print("in sa=sb")
                #print(os.getcwd())
                saf=[]
                sbf=[]
                sAfile=open(apath + "/" + f)
                iter_f=iter(sAfile)
                for line in iter_f:
                    saf.append(line)
                sAfile.close()
                sBfile=open(bpath + "/" + f)
                iter_fb=iter(sBfile)
                for line in iter_fb:
                    sbf.append(line)
                sBfile.close()
                saf1=sorted(saf)
                sbf1=sorted(sbf)
                if(len(saf1)!=len(sbf1)):
                    with open(os.getcwd()+'/comment_diff.txt','a') as fp:
                        print(os.getcwd())
                        fp.write(apath + "/" + f+" lines size not equal "+bpath + '/' + f+' ')
                else:
                    for i in range(len(saf1)):
                        #print("into pre")
                        if(saf1[i]!=sbf1[i]):
                            print('into commont')
                            with open(os.getcwd()+'/comment_diff.txt','a') as fp1:
                                fp1.write(apath + "/" + f+" content not equal "+bpath + "/" + f+' ')
                                break


            else:
                with open (os.getcwd()+'/diff.txt','a') as di:
                    di.write("File Name=%s    EEresource file size:%d   !=  SVN file size:%d" %(f,sA,sB)+' ')

                #print ("File Name=%s    EEresource file size:%d   !=  SVN file size:%d" %(f,sA,sB))

        # 处理仅出现在一个目录中的文件
        onlyFiles = setA ^ setB
        aonlyFiles = []
        bonlyFiles = []
        for of in onlyFiles:
            if of in afiles:
                aonlyFiles.append(of)
            elif of in bfiles:
                bonlyFiles.append(of)
        
        print apath,'only files numbers:',len(aonlyFiles)
        print bpath,'only files numbers:',len(bonlyFiles)
        #print ("###################### EE resource ONLY ###########################")
        #print ("#only files in ", apath)
        if os.path.exists(os.getcwd()+'/Aonly.txt'):
            os.remove(os.getcwd()+'/Aonly.txt')
        if os.path.exists(os.getcwd()+'/Bonly.txt'):
            os.remove(os.getcwd()+'/Bonly.txt')


        for of in sorted(aonlyFiles):
            with open (os.getcwd()+'/Aonly.txt','a') as a:
                a.write(of+' ')

            #print (of)
        #print ("*"*20+"SVN ONLY+"+"*"*20)
        #print ("#only files in ", bpath)
        for of in sorted(bonlyFiles):
            with open (os.getcwd()+'/Bonly.txt','a') as b:
                b.write(of+' ')
            #print (of)

    if __name__ == '__main__':
        FolderEE = sys.argv[1]
        FolderSVN = sys.argv[2]
        dirCompare(FolderEE, FolderSVN)
        print("done!")

    ps:本文参考 http://www.cnblogs.com/luo-mao/p/5872532.html 猫儿爹,经修改符合自身使用,感谢作者。

  • 相关阅读:
    力拓题目 5-8-575,657,707,771
    力拓题目1-4-7,217,344,557
    解码,编码,文件的基本操作
    集合类型内置方法和拷贝浅拷贝深拷贝
    列表元祖字典内置方法
    数字类型内置方法
    字符串类型内置方法
    hdu2262 高斯消元
    hdu1757 构造矩阵
    poj1222 高斯消元
  • 原文地址:https://www.cnblogs.com/yangwithtao/p/6937349.html
Copyright © 2011-2022 走看看