zoukankan      html  css  js  c++  java
  • python递归解压文件夹中所有压缩包

    1. 简述

        递归解压文件夹中的所有压缩包到指定文件夹,方便快速搜索文件和整理移动文件。

    2. 环境配置

        python解压rar文件需要安装依赖库 (python-unrar

        Windows:

    1. 在 RARLab 官方下载安装库文件 http://www.rarlab.com/rar/UnRARDLL.exe
    2. 默认路径伪 C:Program Files (x86)UnrarDLL
    3. 添加环境变量 UNRAR_LIB_PATH 键值 C:Program Files (x86)UnrarDLLx64UnRAR64.dll,如果是32位就是 C:Program Files (x86)UnrarDLLUnRAR.dll

        Linux:

    1. 下载库文件 https://www.rarlab.com/rar/unrarsrc-5.6.8.tar.gz
    2. $ make lib
      $ make install-lib
    3. 添加环境变量  export UNRAR_LIB_PATH=/usr/lib/libunrar.so

    3. 实现

        代码实现

      1 #!/usr/bin/env python3
      2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
      3 import os
      4 import zlib
      5 import unrar
      6 import shutil
      7 import zipfile
      8 import tarfile
      9 from time import sleep
     10 # from unrar import rarfile
     11 
     12 filepath = "./dirname"  #relative path
     13 
     14 class BaseTool(object):
     15     def __init__(self, path):
     16         self.path = path
     17         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
     18 
     19     def iszip(self,  file):
     20         for z in self.compress:
     21             if file.endswith(z):
     22                 return z
     23 
     24     def zip_to_path(self, file):
     25         for i in self.compress:
     26             file = file.replace(i,"")
     27         return file
     28 
     29     def error_record(self, info):
     30         with open("error.txt","a+") as r:
     31             r.write(info+"
    ")
     32 
     33     def un_zip(self, src, dst):
     34         """ src : aa/asdf.zip
     35             dst : unzip/aa/asdf.zip
     36         """
     37         try:
     38             zip_file = zipfile.ZipFile(src)
     39             uz_path = self.zip_to_path(dst)
     40             if not os.path.exists(uz_path):
     41                 os.makedirs(uz_path)
     42             for name in zip_file.namelist():
     43                 zip_file.extract(name, uz_path)
     44             zip_file.close()
     45         except zipfile.BadZipfile:
     46             pass
     47         except zlib.error:
     48             print("zlib error : "+src)
     49             self.error_record("zlib error : "+src)
     50 
     51     def un_rar(self, src, dst):
     52         try:
     53             rar = unrar.rarfile.RarFile(src)
     54             uz_path = self.zip_to_path(dst)
     55             rar.extractall(uz_path)
     56         except unrar.rarfile.BadRarFile:
     57             pass
     58         except Exception as e:
     59             print(e)
     60             self.error_record(str(e)+src)    
     61 
     62     def un_tar(self, src, dst):
     63         try:
     64             tar = tarfile.open(src)
     65             uz_path = self.zip_to_path(dst)
     66             tar.extractall(path = uz_path)
     67         except tarfile.ReadError:
     68             pass
     69         except Exception as e:
     70             print(e)
     71             self.error_record(str(e)+src)
     72 
     73 
     74 class UnZip(BaseTool):
     75     """ UnZip files """
     76     def __init__(self, path):
     77         super(UnZip, self).__init__(self)
     78         self.path = path
     79         self.output = "./unzip/"
     80         self.current_path = os.getcwd()+"/"
     81 
     82     def recursive_unzip(self, repath):
     83         """recursive unzip file
     84         """
     85         for (root, dirs, files) in os.walk(repath):
     86             for filename in files:
     87                 src = os.path.join(root,filename)
     88                 if self.iszip(src) == ".zip":
     89                     print("[+] child unzip: "+src)
     90                     self.un_zip(src, src)
     91                     os.remove(src)
     92                     self.recursive_unzip(self.zip_to_path(src))
     93                     sleep(0.1)
     94                 if self.iszip(src) == ".rar":
     95                     from unrar import rarfile
     96                     print("[+] child unrar : "+src)
     97                     self.un_rar(src,src) 
     98                     os.remove(src)
     99                     self.recursive_unzip(self.zip_to_path(src))
    100                     sleep(0.1)
    101                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    102                     print("[+] child untar : "+src)
    103                     self.un_tar(src,src)
    104                     os.remove(src)
    105                     self.recursive_unzip(self.zip_to_path(src))
    106                     sleep(0.1)
    107 
    108     def main_unzip(self):
    109         for (root, dirs, files) in os.walk(self.path):
    110             for filename in files:
    111                 zippath = os.path.join(self.output,root)
    112                 if not os.path.exists(zippath):
    113                     os.makedirs(zippath)
    114                 src = os.path.join(root,filename)
    115                 dst = os.path.join(self.output,root,filename)
    116                 if self.iszip(src) == ".zip":
    117                     print("[+] main unzip : "+src)
    118                     self.un_zip(src,dst)
    119                 if self.iszip(src) == ".rar":
    120                     from unrar import rarfile
    121                     print("[+] main unrar : "+src)
    122                     self.un_rar(src,dst)
    123                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    124                     print("[+] main untar : "+src)
    125                     self.un_tar(src,dst)
    126                 else:
    127                     try:
    128                         shutil.copyfile(src,dst)
    129                     except OSError as e:
    130                         print(str(e))
    131                         self.error_record(str(e))
    132                     
    133         self.recursive_unzip(self.output+self.path)
    134 
    135 
    136 def main():
    137     z = UnZip(filepath)   #relative path
    138     z.main_unzip()
    139 
    140 if __name__ == '__main__':
    141     main()

     

    4. 多线程

      1 #!/usr/bin/env python3
      2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
      3 import os
      4 import zlib
      5 import unrar
      6 import shutil
      7 import zipfile
      8 import tarfile
      9 import argparse
     10 import time
     11 import threading
     12 from time import sleep
     13 from itertools import chain
     14 from unrar import rarfile
     15 
     16 
     17 filepath = "./filepath"  #relative path
     18 thread_num = 1
     19 
     20 class BaseTool(object):
     21     def __init__(self):
     22         super(BaseTool, self).__init__()
     23         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
     24 
     25     def run_threads(self, threads_number: int, target_function: any, *args, **kwargs) -> None:
     26         """ Run function across specified number of threads
     27         :param int thread_number: number of threads that should be executed
     28         :param func target_function: function that should be executed accross specified number of threads
     29         :param any args: args passed to target_function
     30         :param any kwargs: kwargs passed to target function
     31         :return None
     32         """
     33 
     34         threads = []
     35         threads_running = threading.Event()
     36         threads_running.set()
     37 
     38         for thread_id in range(int(threads_number)):
     39             thread = threading.Thread(
     40                 target=target_function,
     41                 args=chain((threads_running,), args),
     42                 kwargs=kwargs,
     43                 name="thread-{}".format(thread_id),
     44             )
     45             threads.append(thread)
     46 
     47             # print("{} thread is starting...".format(thread.name))
     48             thread.start()
     49 
     50         start = time.time()
     51         try:
     52             while thread.isAlive():
     53                 thread.join(1)
     54 
     55         except KeyboardInterrupt:
     56             threads_running.clear()
     57 
     58         for thread in threads:
     59             thread.join()
     60             # print("{} thread is terminated.".format(thread.name))
     61 
     62         print("Elapsed time: {} seconds".format(time.time() - start))
     63 
     64     def iszip(self,  file):
     65         for z in self.compress:
     66             if file.endswith(z):
     67                 return z
     68 
     69     def zip_to_path(self, file):
     70         for i in self.compress:
     71             file = file.replace(i,"")
     72         return file
     73 
     74     def error_record(self, info):
     75         with open("error.txt","a+") as w:
     76             w.write(info+"
    ")
     77 
     78     def remove(self, filepath):
     79         if os.path.exists(self.zip_to_path(filepath)) and os.path.exists(filepath):
     80             os.remove(filepath)
     81 
     82     def un_zip(self, src, dst):
     83         """ src : aa/asdf.zip
     84             dst : unzip/aa/asdf.zip
     85         """
     86         try:
     87             zip_file = zipfile.ZipFile(src)
     88             uz_path = self.zip_to_path(dst)
     89             if not os.path.exists(uz_path):
     90                 os.makedirs(uz_path)
     91             for name in zip_file.namelist():
     92                 zip_file.extract(name, uz_path)
     93             zip_file.close()
     94         except zipfile.BadZipfile:
     95             pass
     96         except RuntimeError:
     97             self.error_record("pass required : "+src)
     98             return "PassRequired"
     99         except zlib.error:
    100             print("zlib error : "+src)
    101             self.error_record("zlib error : "+src)
    102         except Exception as e:
    103             print(e)
    104             self.error_record(str(e)+src)  
    105 
    106     def un_rar(self, src, dst):
    107         try:
    108             rar = unrar.rarfile.RarFile(src)
    109             uz_path = self.zip_to_path(dst)
    110             rar.extractall(uz_path)
    111         except unrar.rarfile.BadRarFile:
    112             pass
    113         except Exception as e:
    114             print(e)
    115             self.error_record(str(e)+src)    
    116 
    117     def un_tar(self, src, dst):
    118         try:
    119             tar = tarfile.open(src)
    120             uz_path = self.zip_to_path(dst)
    121             tar.extractall(path = uz_path)
    122         except tarfile.ReadError:
    123             pass
    124         except Exception as e:
    125             print(e)
    126             self.error_record(str(e)+src)
    127 
    128 
    129 class LockedIterator(object):
    130     def __init__(self, it):
    131         self.lock = threading.Lock()
    132         self.it = it.__iter__()
    133 
    134     def __iter__(self):
    135         return self
    136 
    137     def next(self):
    138         self.lock.acquire()
    139         try:
    140             item = next(self.it)
    141 
    142             if type(item) is tuple:
    143                 return (item[0].strip(), item[1].strip(), item[2].strip())
    144             elif type(item) is str:
    145                 return item.strip()
    146 
    147             return item
    148         finally:
    149             self.lock.release()
    150 
    151 
    152 class UnZip(BaseTool):
    153     """ UnZip files """
    154     def __init__(self, path):
    155         super(UnZip, self).__init__()
    156         self.path = path
    157         self.threads = thread_num
    158         self.output = "./unzip/"
    159         self.current_path = os.getcwd()+"/"
    160         self.parser = argparse.ArgumentParser()
    161         self.parser.add_argument("-v","--verbose", action="store_true", help="./zipperpro.py -v")
    162         self.args = self.parser.parse_args()
    163 
    164     def run(self):
    165         self.main_unzip(self.path)
    166 
    167     def recursive_unzip(self, repath):
    168         """recursive unzip file
    169         """
    170         task_list = []
    171         for (root, dirs, files) in os.walk(repath):
    172             for filename in files:
    173                 filename = filename.strip("./")
    174                 src = os.path.join("./"+root,filename)
    175                 data = (src, src, "child")
    176                 task_list.append(data)
    177         data = LockedIterator(chain(task_list))
    178         print("[+] child unzip ...")
    179         self.run_threads(self.threads, self.do_unzip, data)
    180                 
    181     def main_unzip(self, mainpath):
    182         task_list = []
    183         print("Initialization......")
    184         for (root, dirs, files) in os.walk(mainpath):
    185             for filename in files:
    186                 zippath = os.path.join(self.output,root)
    187                 if not os.path.exists(zippath):
    188                     os.makedirs(zippath)
    189                 src = os.path.join(root,filename)
    190                 dst = os.path.join(self.output,root,filename)
    191                 if not os.path.exists(self.zip_to_path(dst)):
    192                     data = ((src, dst, "main"))
    193                     task_list.append(data)
    194         data = LockedIterator(chain(task_list))
    195         print("[+] main unzip ...")
    196         self.run_threads(self.threads, self.do_unzip, data)
    197         self.recursive_unzip(self.output+self.path)
    198 
    199     def do_unzip(self, running, data):
    200         while running.is_set():
    201             try:
    202                 (src, dst, flag) = data.next()
    203                 if flag == "main":
    204                     if self.iszip(src) == ".zip":
    205                         if self.args.verbose:
    206                             print("[+] main unzip : "+src)
    207                         self.un_zip(src,dst)
    208                     elif self.iszip(src) == ".rar":
    209                         if self.args.verbose:
    210                             print("[+] main unrar : "+src)
    211                         self.un_rar(src,dst)
    212                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    213                         if self.args.verbose:
    214                             print("[+] main untar : "+src)
    215                         self.un_tar(src,dst)
    216                     else:
    217                         try:
    218                             shutil.copyfile(src,dst)
    219                         except OSError as e:
    220                             print(str(e))
    221                             self.error_record(str(e))
    222                 elif flag == "child":
    223                     if self.iszip(src) == ".zip":
    224                         if self.args.verbose:
    225                             print("[+] child unzip: "+src)
    226                         if not self.un_zip(src, src) == "PassRequired":
    227                             self.remove(src)
    228                             self.recursive_unzip(self.zip_to_path(src))
    229                         sleep(0.1)
    230                     elif self.iszip(src) == ".rar":
    231                         if self.args.verbose:
    232                             print("[+] child unrar : "+src)
    233                         self.un_rar(src,src) 
    234                         self.remove(src)
    235                         self.recursive_unzip(self.zip_to_path(src))
    236                         sleep(0.1)
    237                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    238                         if self.args.verbose:
    239                             print("[+] child untar : "+src)
    240                         self.un_tar(src,src)
    241                         self.remove(src)
    242                         self.recursive_unzip(self.zip_to_path(src))
    243                         sleep(0.1)
    244                     
    245             except StopIteration:
    246                 break
    247 
    248 
    249 def main():
    250     z = UnZip(filepath) 
    251     z.run()
    252     
    253 
    254 
    255 if __name__ == '__main__':
    256     main()

    5. 问题

    • 中文压缩包乱码,中文路径解压出错
    • rar解压遇加密文件卡死

    https://github.com/Gitmaninc/SmallTooools/tree/master/unzip-tool

  • 相关阅读:
    继承关系·
    对象第复制operator=
    关于类拷贝造函数
    静态数据成员与静态成员函数
    linux新内核的时钟机制代码
    RTC系统
    Android关机闹钟实现
    更改printk打印级别
    vncserver 配置全屏显示
    vnc里鼠标拖动终端就会产生ctrl+c终端
  • 原文地址:https://www.cnblogs.com/trojan-z/p/10043408.html
Copyright © 2011-2022 走看看