zoukankan      html  css  js  c++  java
  • python 小程序大文件的拆分合并

    1. 将大文件拆分为小文件

        I 通过二进制的方式将大文件读取出来,将其拆分存,以不同的文件方式存放在一个目录下面

        II 提供两种操作方式交互式和命令行模式

    #! usr/bin/python
    # -*- coding:utf-8 -*-
    
    import sys, os
    
    megebytes = 1024 * 1000
    chunksize = int(1.4 * megebytes)
    
    
    def clear_dir(target_dir):
        """
        清空目录
        :param targetdir:需要清空的目录
        :return: None
        """
        for fname in os.listdir(target_dir):
            path = os.path.join(target_dir, fname)
            if os.path.isfile(path):
                os.remove(path)
            else:
                os.rmdir(path)
    
    
    def split(fromfile, todir, chunksize=chunksize):
        if not os.path.exists(todir):
            os.mkdir(todir)
        else:
            clear_dir(todir)
        partnum = 0
    
        with open(fromfile, "rb") as input:
            while True:
                tmpdata = input.read(chunksize)
                if not tmpdata:break
                partnum += 1
                filename = os.path.join(todir, ('part{0:04d}'.format(partnum)))
                with open(filename, 'wb') as fileobj:
                    fileobj.write(tmpdata)
    
        assert partnum <= 9999
        return partnum
    
    
    def main():
        global chunksize
        if len(sys.argv) == 2 and sys.argv[1] == '-help':
            print('Use:split_file.py [file-to-split target-dir [chunksize]]')
        else:
            if len(sys.argv) < 3:
                interactive = True
                fromfile = input('enter the file to split:')
                todir = input('enter the dir to hold the split info:')
            else:
                interactive = False
                fromfile, todir = sys.argv[1:3]
                if len(sys.argv) == 4:chunksize = int(sys.argv[3])
    
            absfrom, absto = map(os.path.abspath, [fromfile, todir])
            print('spliting from {0} to {1} by {2}'.format(absfrom, absto, chunksize))
    
        try:
            parts = split(absfrom, absto, chunksize)
        except:
            print('error during split')
        else:
            print('split finished:{0} parts are in {1}'.format(parts, absto))
        if interactive: print('input any key')
    if __name__ == '__main__':
        #clear_dir("../testdir")
        #split("../testdir1/test.pdf", "../testdir")
        main()
    

     2 将拆分之后的文件重新合并

        I 将拆分后的文件以二进制的方式读取,再以二进制的方式保存

        II 提供两种操作方式交互式和命令行模式

    import sys
    import os
    
    readsize = 1024
    
    
    def join(fromdir, tofile):
        """
        将使用split_file分开的文件重新合并为原文件
        :param fromdir: 分开的小文件
        :param tofile: 原文件
        :return:
        """
    
        partfiles = os.listdir(fromdir)
        with open(tofile, 'wb') as output:
            for eachpart in partfiles:
                filepath = os.path.join(fromdir, eachpart)
                with open(filepath, 'rb') as fileobj:
                    while True:
                        bytes = fileobj.read(readsize)
                        if not bytes:break
                        output.write(bytes)
    
    
    if __name__ == '__main__':
        if len(sys.argv) == 2 and sys.argv[1] == '-help':
            print('using join [from dir nme] [to file name]')
        else:
            if len(sys.argv) != 3:
                fromdir = input('Enter the from dir')
                tofile = input('Enter the to file')
            else:
                fromdir = sys.argv[1]
                tofile = sys.argv[2]
    
        fromdir, tofile = map(os.path.abspath, [fromdir, tofile])
        print('joining')
    
        try:
            join(fromdir, tofile)
        except:
            print("Error during joining file")
        else:
            print("joining completed")
    
  • 相关阅读:
    环境配置文件 ① /etc/profile、② ~/.bash_profile、③ ~/.bashrc、④ /etc/bashrc
    RHEL 7.0已发布CentOS 7即将到来
    《上海交通大学饮水思源paper(论文)板实用手册(第二版)》出炉
    SCI论文投稿Cover Letter的写作
    grub.cfg —— Window、Fedora、CentOS
    SCI新手成长策略
    计算机类SCI前三区期刊
    SCI期刊——导航
    SCI收录的外文期刊(计算机类)
    《嵌入式开发》——三次作业
  • 原文地址:https://www.cnblogs.com/someoneHan/p/6246703.html
Copyright © 2011-2022 走看看