zoukankan      html  css  js  c++  java
  • splitFile2SmallFile

     1. split file into several files

     1 """
     2 this is aa customizable version of the standard unix split command-line
     3 utility;because it is written in python,it also works on windows and can be 
     4 easily modifyed;because it export a function,its logic can also be imported
     5 and resued in other applications
     6 """
     7 import sys,os
     8 kilobytes =1024
     9 megabytes = kilobytes*1000
    10 chunksize = int(1.4* megabytes)                  #default roughtly a floppy
    11 
    12 def split(fromfile,todir,chunksize=chunksize):
    13     if not os.path.exists(todir):                 # caller handlers errors
    14         os.mkdir(todir)                          #make dir,read/write parts
    15     else:
    16         for fname in os.listdir(todir):
    17             os.remove(os.path.join(todir,fname))  #delete any exiting files
    18     partnum =0
    19     input = open(fromfile,'rb')
    20     while True:
    21         chunk = input.read(chunksize)
    22         if not chunk:break
    23         partnum +=1
    24         filename = os.path.join(todir,('part%04d' % partnum))
    25         fileobj = open(filename,'wb')
    26         fileobj.write(chunk)
    27         fileobj.close()
    28     input.close()
    29     assert partnum<=9999
    30     return partnum
    31 
    32 if __name__ =='__main__':
    33     if len(sys.argv) == 2 and sys.argv[1]== '-help':
    34         print('use:split.py [file to split target-dir [chunksize]]')
    35     else:
    36         if len(sys.argv) <3:
    37             interactive =True
    38             fromfile =input('File to be split?')
    39             todir = input('directory to store part files?')
    40         else:
    41             interactive = False
    42             fromfile,todir = sys.argv[1:3]
    43             if len(sys.argv) == 4:chunksize =int(sys.argv[3])
    44         absfrom,absto = map(os.path.abspath,[fromfile,todir])
    45         print('splitting',absfrom,'to',absto,'by',chunksize)
    46         try:
    47             parts = split(fromfile,todir,chunksize)
    48         except:
    49             print('error during split:')
    50             print(sys.exc_info()[0],sys.exc_info()[1])
    51         else:
    52             print('split finished:',parts,'parts are in ',absto)
    53         if interactive:
    54             input('press enter key')  #pause if clicked
    55             
    View Code

    split to 200k

    2.CopyAllFiles:

      1 """
      2 Usage: 'python cpall.py dirFrom dirTo'
      3 recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
      4 unix command,and assumes that dirFrom and dirTo are both directory.
      5 was written to get around fatal error messages under windows drag-and-drop
      6 copies(the first bad file ends the entire copy operation immediately).
      7 but also allows for coding more customized copy operations in python
      8 """
      9 
     10 import os,sys
     11 maxfileload =100000
     12 blksize =1024*500
     13 
     14 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
     15     """
     16     copy one file pathFrom to pathTo,byte for byte;
     17     use binary file mode to supress unicde decode and endline transform
     18     """
     19     if os.path.getsize(pathFrom) <=maxfileload:
     20         bytesFrom = open(pathFrom,'rb').read()
     21         open(pathTo,'wb').write(bytesFrom)
     22     else:
     23         fileFrom = open(pathFrom,'rb')
     24         fileTo = open(pathTo,'wb')
     25         while True:
     26             bytesFrom = fileFrom.read(blksize)
     27             if not bytesFrom:
     28                 break
     29             fileTo.write(bytesFrom)
     30             
     31 def copytree(dirFrom,dirTo,verbose=0):
     32     """
     33     copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
     34     may need to use bytes for dirnames if undecodable on other platforms;
     35     may need to do more file type checking on unix:skip links,fifos,etc;
     36     """
     37     fcount = dcount =0
     38     for filename in os.listdir(dirFrom):
     39         pathFrom = os.path.join(dirFrom,filename)
     40         pathTo = os.path.join(dirTo,filename)
     41         if not os.path.isdir(pathFrom):
     42             try:
     43                 if verbose >1:
     44                     print('copying',pathFrom,'to',pathTo)
     45                 copyfile(pathFrom,pathTo)
     46                 fcount +=1
     47             except:
     48                 print('error copying',pathFrom,'to',pathTo,'--skipped')
     49                 print(sys.exc_info()[0],sys.exc_info()[1])
     50         else:
     51             if verbose:
     52                 print('copying dir',pathFrom,'to',pathTo)
     53             try:
     54                 os.mkdir(pathTo)
     55                 below = copytree(pathFrom,pathTo)
     56                 fcount += below[0]
     57                 dcount += below[1]
     58                 dcount+=1
     59             except:
     60                 print('error creating',pathTo,'--skipped')
     61                 print(sys.exc_info()[0],sys.exc_info()[1])
     62     return (fcount,dcount)
     63 
     64 def getargs():
     65     """
     66     get and verify directory name arguments ,return default none on errors
     67     """
     68     try:
     69         dirFrom,dirTo = sys.argv[1:]
     70     except:
     71         print('usage error:cpall.py dirFrom dirTo')
     72     else:
     73         if not os.path.isdir(dirFrom):
     74             print('error:dirFrom is not a dir')
     75         elif not os.path.exists(dirTo):
     76             os.mkdir(dirTo)
     77             print('note:dirTo was created')
     78             return (dirfrom,dirTo)
     79         else:
     80             print('warning:dirto already xists')
     81             if hasattr(os.path,'samefile'):
     82                 same = os.path.samefile(dirFrom,dirTo)
     83             else:
     84                 same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
     85             if same:
     86                 print('error :dirfrom same as dirTo')
     87             else:
     88                 return (dirFrom,dirTo)
     89             
     90 if __name__ =='__main__':
     91     import time
     92     distuple =getargs()
     93     if distuple:
     94         print('copying...')
     95         start = time.clock()
     96         fcount,dcount = copytree(*distuple)
     97         print('copied',fcount,'files,',dcount,'directories')
     98         print('in ',time.clock()-start,' seconds')
     99     
    100             
    View Code

    3. compare directory and list all files if not same

     1 """
     2 ############################################################################
     3 usage :python dirdiff.py dir1-path dir2-path
     4 compare two directories to find files that exist in one but not the other
     5 this version uses the os.listdir function and list difference. note that
     6 this script checks only filenames,not file contents --see diffall.py for an 
     7 extension that does the latter by comparing .read() results         
     8 #############################################################################
     9 """
    10 
    11 import os,sys
    12 
    13 def reportdiffs(unique1,unique2,dir1,dir2):
    14     """
    15     generate diffs report for one dir: part of comparedirs output
    16     """
    17     if not (unique1 or unique2):
    18         print('directory lists are identical')
    19     else:
    20         if unique1:
    21             print('files unique to',dir1)
    22             for file in unique1:
    23                 print('......',file)
    24         if unique2:
    25             print('files unique to',dir2)
    26             for file in unique2:
    27                 print('......',file)            
    28                 
    29 def difference(seq1,seq2):
    30     """
    31     return all items in seq1 only
    32     a set(seq1) - set(seq2) would work too,but sets are randomly
    33     ordered,so any platform-depent directory order would be lost
    34     """
    35     return [item for item in seq1 if item not in seq2]
    36 
    37 def comparedirs(dir1,dir2,files1=None,files2=None):
    38     """
    39     compare directory contents,but not actual files;
    40     may need bytes listdir arg for undecodable filenames on some platforms
    41     """
    42     print('comparing',dir1,'to',dir2)
    43     files1 = os.listdir(dir1) if files1 is None else files1
    44     files2 = os.listdir(dir2) if files2 is None else files2
    45     unique1 = difference(files1,files2)
    46     unique2 = difference(files2,files1)
    47     reportdiffs(unique1,unique2,dir1,dir2)
    48     return not (unique1,unique2)
    49 
    50 def getargs():
    51     "args for command line mode"
    52     try:
    53         dir1,dir2 = sys.argv[1:]
    54     except:
    55         print('usage:dirdiff.py dir1 dir2')
    56         sys.exit(1)
    57     else:
    58         return dir1,dir2
    59 
    60 if __name__=='__main__':
    61     dir1,dir2 = getargs()
    62     comparedirs(dir1,dir2)
    63     
    View Code

  • 相关阅读:
    Linux基础命令(一)
    You've made choice
    protege推理
    字符编码
    第二次作业
    数据类型-集合set
    数据类型-元组&字典
    数据类型-列表
    数据类型-数值&字符串
    流程控制之for循环
  • 原文地址:https://www.cnblogs.com/lxk613/p/4827677.html
Copyright © 2011-2022 走看看