zoukankan      html  css  js  c++  java
  • python 小程序大文件的拆分合并

    1. 将大文件拆分为小文件

        I 通过二进制的方式将大文件读取出来,将其拆分存,以不同的文件方式存放在一个目录下面

        II 提供两种操作方式交互式和命令行模式

    #! usr/bin/python
    # -*- coding:utf-8 -*-
    
    import sys, os
    
    megebytes = 1024 * 1000
    chunksize = int(1.4 * megebytes)
    
    
    def clear_dir(target_dir):
        """
        清空目录
        :param targetdir:需要清空的目录
        :return: None
        """
        for fname in os.listdir(target_dir):
            path = os.path.join(target_dir, fname)
            if os.path.isfile(path):
                os.remove(path)
            else:
                os.rmdir(path)
    
    
    def split(fromfile, todir, chunksize=chunksize):
        if not os.path.exists(todir):
            os.mkdir(todir)
        else:
            clear_dir(todir)
        partnum = 0
    
        with open(fromfile, "rb") as input:
            while True:
                tmpdata = input.read(chunksize)
                if not tmpdata:break
                partnum += 1
                filename = os.path.join(todir, ('part{0:04d}'.format(partnum)))
                with open(filename, 'wb') as fileobj:
                    fileobj.write(tmpdata)
    
        assert partnum <= 9999
        return partnum
    
    
    def main():
        global chunksize
        if len(sys.argv) == 2 and sys.argv[1] == '-help':
            print('Use:split_file.py [file-to-split target-dir [chunksize]]')
        else:
            if len(sys.argv) < 3:
                interactive = True
                fromfile = input('enter the file to split:')
                todir = input('enter the dir to hold the split info:')
            else:
                interactive = False
                fromfile, todir = sys.argv[1:3]
                if len(sys.argv) == 4:chunksize = int(sys.argv[3])
    
            absfrom, absto = map(os.path.abspath, [fromfile, todir])
            print('spliting from {0} to {1} by {2}'.format(absfrom, absto, chunksize))
    
        try:
            parts = split(absfrom, absto, chunksize)
        except:
            print('error during split')
        else:
            print('split finished:{0} parts are in {1}'.format(parts, absto))
        if interactive: print('input any key')
    if __name__ == '__main__':
        #clear_dir("../testdir")
        #split("../testdir1/test.pdf", "../testdir")
        main()
    

     2 将拆分之后的文件重新合并

        I 将拆分后的文件以二进制的方式读取,再以二进制的方式保存

        II 提供两种操作方式交互式和命令行模式

    import sys
    import os
    
    readsize = 1024
    
    
    def join(fromdir, tofile):
        """
        将使用split_file分开的文件重新合并为原文件
        :param fromdir: 分开的小文件
        :param tofile: 原文件
        :return:
        """
    
        partfiles = os.listdir(fromdir)
        with open(tofile, 'wb') as output:
            for eachpart in partfiles:
                filepath = os.path.join(fromdir, eachpart)
                with open(filepath, 'rb') as fileobj:
                    while True:
                        bytes = fileobj.read(readsize)
                        if not bytes:break
                        output.write(bytes)
    
    
    if __name__ == '__main__':
        if len(sys.argv) == 2 and sys.argv[1] == '-help':
            print('using join [from dir nme] [to file name]')
        else:
            if len(sys.argv) != 3:
                fromdir = input('Enter the from dir')
                tofile = input('Enter the to file')
            else:
                fromdir = sys.argv[1]
                tofile = sys.argv[2]
    
        fromdir, tofile = map(os.path.abspath, [fromdir, tofile])
        print('joining')
    
        try:
            join(fromdir, tofile)
        except:
            print("Error during joining file")
        else:
            print("joining completed")
    
  • 相关阅读:
    Python Revisited Day 13 (正则表达式)
    Python Revisited Day 06 (面向对象程序设计)
    Python Revisited (变量)
    Python Revisited Day 05(模块)
    Python Revisited Day 04 (控制结构与函数)
    Python Revisited Day 03 (组合数据类型)
    Numpy
    Python Revisited Day 01
    Python3使用openpyxl读写Excel文件
    Python3操作YAML文件
  • 原文地址:https://www.cnblogs.com/someoneHan/p/6246703.html
Copyright © 2011-2022 走看看