1. 将大文件拆分为小文件
I 通过二进制的方式将大文件读取出来,将其拆分存,以不同的文件方式存放在一个目录下面
II 提供两种操作方式交互式和命令行模式
#! usr/bin/python # -*- coding:utf-8 -*- import sys, os megebytes = 1024 * 1000 chunksize = int(1.4 * megebytes) def clear_dir(target_dir): """ 清空目录 :param targetdir:需要清空的目录 :return: None """ for fname in os.listdir(target_dir): path = os.path.join(target_dir, fname) if os.path.isfile(path): os.remove(path) else: os.rmdir(path) def split(fromfile, todir, chunksize=chunksize): if not os.path.exists(todir): os.mkdir(todir) else: clear_dir(todir) partnum = 0 with open(fromfile, "rb") as input: while True: tmpdata = input.read(chunksize) if not tmpdata:break partnum += 1 filename = os.path.join(todir, ('part{0:04d}'.format(partnum))) with open(filename, 'wb') as fileobj: fileobj.write(tmpdata) assert partnum <= 9999 return partnum def main(): global chunksize if len(sys.argv) == 2 and sys.argv[1] == '-help': print('Use:split_file.py [file-to-split target-dir [chunksize]]') else: if len(sys.argv) < 3: interactive = True fromfile = input('enter the file to split:') todir = input('enter the dir to hold the split info:') else: interactive = False fromfile, todir = sys.argv[1:3] if len(sys.argv) == 4:chunksize = int(sys.argv[3]) absfrom, absto = map(os.path.abspath, [fromfile, todir]) print('spliting from {0} to {1} by {2}'.format(absfrom, absto, chunksize)) try: parts = split(absfrom, absto, chunksize) except: print('error during split') else: print('split finished:{0} parts are in {1}'.format(parts, absto)) if interactive: print('input any key') if __name__ == '__main__': #clear_dir("../testdir") #split("../testdir1/test.pdf", "../testdir") main()
2 将拆分之后的文件重新合并
I 将拆分后的文件以二进制的方式读取,再以二进制的方式保存
II 提供两种操作方式交互式和命令行模式
import sys import os readsize = 1024 def join(fromdir, tofile): """ 将使用split_file分开的文件重新合并为原文件 :param fromdir: 分开的小文件 :param tofile: 原文件 :return: """ partfiles = os.listdir(fromdir) with open(tofile, 'wb') as output: for eachpart in partfiles: filepath = os.path.join(fromdir, eachpart) with open(filepath, 'rb') as fileobj: while True: bytes = fileobj.read(readsize) if not bytes:break output.write(bytes) if __name__ == '__main__': if len(sys.argv) == 2 and sys.argv[1] == '-help': print('using join [from dir nme] [to file name]') else: if len(sys.argv) != 3: fromdir = input('Enter the from dir') tofile = input('Enter the to file') else: fromdir = sys.argv[1] tofile = sys.argv[2] fromdir, tofile = map(os.path.abspath, [fromdir, tofile]) print('joining') try: join(fromdir, tofile) except: print("Error during joining file") else: print("joining completed")