zoukankan      html  css  js  c++  java
  • python递归解压文件夹中所有压缩包

    1. 简述

        递归解压文件夹中的所有压缩包到指定文件夹,方便快速搜索文件和整理移动文件。

    2. 环境配置

        python解压rar文件需要安装依赖库 (python-unrar

        Windows:

    1. 在 RARLab 官方下载安装库文件 http://www.rarlab.com/rar/UnRARDLL.exe
    2. 默认路径伪 C:Program Files (x86)UnrarDLL
    3. 添加环境变量 UNRAR_LIB_PATH 键值 C:Program Files (x86)UnrarDLLx64UnRAR64.dll,如果是32位就是 C:Program Files (x86)UnrarDLLUnRAR.dll

        Linux:

    1. 下载库文件 https://www.rarlab.com/rar/unrarsrc-5.6.8.tar.gz
    2. $ make lib
      $ make install-lib
    3. 添加环境变量  export UNRAR_LIB_PATH=/usr/lib/libunrar.so

    3. 实现

        代码实现

      1 #!/usr/bin/env python3
      2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
      3 import os
      4 import zlib
      5 import unrar
      6 import shutil
      7 import zipfile
      8 import tarfile
      9 from time import sleep
     10 # from unrar import rarfile
     11 
     12 filepath = "./dirname"  #relative path
     13 
     14 class BaseTool(object):
     15     def __init__(self, path):
     16         self.path = path
     17         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
     18 
     19     def iszip(self,  file):
     20         for z in self.compress:
     21             if file.endswith(z):
     22                 return z
     23 
     24     def zip_to_path(self, file):
     25         for i in self.compress:
     26             file = file.replace(i,"")
     27         return file
     28 
     29     def error_record(self, info):
     30         with open("error.txt","a+") as r:
     31             r.write(info+"
    ")
     32 
     33     def un_zip(self, src, dst):
     34         """ src : aa/asdf.zip
     35             dst : unzip/aa/asdf.zip
     36         """
     37         try:
     38             zip_file = zipfile.ZipFile(src)
     39             uz_path = self.zip_to_path(dst)
     40             if not os.path.exists(uz_path):
     41                 os.makedirs(uz_path)
     42             for name in zip_file.namelist():
     43                 zip_file.extract(name, uz_path)
     44             zip_file.close()
     45         except zipfile.BadZipfile:
     46             pass
     47         except zlib.error:
     48             print("zlib error : "+src)
     49             self.error_record("zlib error : "+src)
     50 
     51     def un_rar(self, src, dst):
     52         try:
     53             rar = unrar.rarfile.RarFile(src)
     54             uz_path = self.zip_to_path(dst)
     55             rar.extractall(uz_path)
     56         except unrar.rarfile.BadRarFile:
     57             pass
     58         except Exception as e:
     59             print(e)
     60             self.error_record(str(e)+src)    
     61 
     62     def un_tar(self, src, dst):
     63         try:
     64             tar = tarfile.open(src)
     65             uz_path = self.zip_to_path(dst)
     66             tar.extractall(path = uz_path)
     67         except tarfile.ReadError:
     68             pass
     69         except Exception as e:
     70             print(e)
     71             self.error_record(str(e)+src)
     72 
     73 
     74 class UnZip(BaseTool):
     75     """ UnZip files """
     76     def __init__(self, path):
     77         super(UnZip, self).__init__(self)
     78         self.path = path
     79         self.output = "./unzip/"
     80         self.current_path = os.getcwd()+"/"
     81 
     82     def recursive_unzip(self, repath):
     83         """recursive unzip file
     84         """
     85         for (root, dirs, files) in os.walk(repath):
     86             for filename in files:
     87                 src = os.path.join(root,filename)
     88                 if self.iszip(src) == ".zip":
     89                     print("[+] child unzip: "+src)
     90                     self.un_zip(src, src)
     91                     os.remove(src)
     92                     self.recursive_unzip(self.zip_to_path(src))
     93                     sleep(0.1)
     94                 if self.iszip(src) == ".rar":
     95                     from unrar import rarfile
     96                     print("[+] child unrar : "+src)
     97                     self.un_rar(src,src) 
     98                     os.remove(src)
     99                     self.recursive_unzip(self.zip_to_path(src))
    100                     sleep(0.1)
    101                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    102                     print("[+] child untar : "+src)
    103                     self.un_tar(src,src)
    104                     os.remove(src)
    105                     self.recursive_unzip(self.zip_to_path(src))
    106                     sleep(0.1)
    107 
    108     def main_unzip(self):
    109         for (root, dirs, files) in os.walk(self.path):
    110             for filename in files:
    111                 zippath = os.path.join(self.output,root)
    112                 if not os.path.exists(zippath):
    113                     os.makedirs(zippath)
    114                 src = os.path.join(root,filename)
    115                 dst = os.path.join(self.output,root,filename)
    116                 if self.iszip(src) == ".zip":
    117                     print("[+] main unzip : "+src)
    118                     self.un_zip(src,dst)
    119                 if self.iszip(src) == ".rar":
    120                     from unrar import rarfile
    121                     print("[+] main unrar : "+src)
    122                     self.un_rar(src,dst)
    123                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    124                     print("[+] main untar : "+src)
    125                     self.un_tar(src,dst)
    126                 else:
    127                     try:
    128                         shutil.copyfile(src,dst)
    129                     except OSError as e:
    130                         print(str(e))
    131                         self.error_record(str(e))
    132                     
    133         self.recursive_unzip(self.output+self.path)
    134 
    135 
    136 def main():
    137     z = UnZip(filepath)   #relative path
    138     z.main_unzip()
    139 
    140 if __name__ == '__main__':
    141     main()

     

    4. 多线程

      1 #!/usr/bin/env python3
      2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
      3 import os
      4 import zlib
      5 import unrar
      6 import shutil
      7 import zipfile
      8 import tarfile
      9 import argparse
     10 import time
     11 import threading
     12 from time import sleep
     13 from itertools import chain
     14 from unrar import rarfile
     15 
     16 
     17 filepath = "./filepath"  #relative path
     18 thread_num = 1
     19 
     20 class BaseTool(object):
     21     def __init__(self):
     22         super(BaseTool, self).__init__()
     23         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
     24 
     25     def run_threads(self, threads_number: int, target_function: any, *args, **kwargs) -> None:
     26         """ Run function across specified number of threads
     27         :param int thread_number: number of threads that should be executed
     28         :param func target_function: function that should be executed accross specified number of threads
     29         :param any args: args passed to target_function
     30         :param any kwargs: kwargs passed to target function
     31         :return None
     32         """
     33 
     34         threads = []
     35         threads_running = threading.Event()
     36         threads_running.set()
     37 
     38         for thread_id in range(int(threads_number)):
     39             thread = threading.Thread(
     40                 target=target_function,
     41                 args=chain((threads_running,), args),
     42                 kwargs=kwargs,
     43                 name="thread-{}".format(thread_id),
     44             )
     45             threads.append(thread)
     46 
     47             # print("{} thread is starting...".format(thread.name))
     48             thread.start()
     49 
     50         start = time.time()
     51         try:
     52             while thread.isAlive():
     53                 thread.join(1)
     54 
     55         except KeyboardInterrupt:
     56             threads_running.clear()
     57 
     58         for thread in threads:
     59             thread.join()
     60             # print("{} thread is terminated.".format(thread.name))
     61 
     62         print("Elapsed time: {} seconds".format(time.time() - start))
     63 
     64     def iszip(self,  file):
     65         for z in self.compress:
     66             if file.endswith(z):
     67                 return z
     68 
     69     def zip_to_path(self, file):
     70         for i in self.compress:
     71             file = file.replace(i,"")
     72         return file
     73 
     74     def error_record(self, info):
     75         with open("error.txt","a+") as w:
     76             w.write(info+"
    ")
     77 
     78     def remove(self, filepath):
     79         if os.path.exists(self.zip_to_path(filepath)) and os.path.exists(filepath):
     80             os.remove(filepath)
     81 
     82     def un_zip(self, src, dst):
     83         """ src : aa/asdf.zip
     84             dst : unzip/aa/asdf.zip
     85         """
     86         try:
     87             zip_file = zipfile.ZipFile(src)
     88             uz_path = self.zip_to_path(dst)
     89             if not os.path.exists(uz_path):
     90                 os.makedirs(uz_path)
     91             for name in zip_file.namelist():
     92                 zip_file.extract(name, uz_path)
     93             zip_file.close()
     94         except zipfile.BadZipfile:
     95             pass
     96         except RuntimeError:
     97             self.error_record("pass required : "+src)
     98             return "PassRequired"
     99         except zlib.error:
    100             print("zlib error : "+src)
    101             self.error_record("zlib error : "+src)
    102         except Exception as e:
    103             print(e)
    104             self.error_record(str(e)+src)  
    105 
    106     def un_rar(self, src, dst):
    107         try:
    108             rar = unrar.rarfile.RarFile(src)
    109             uz_path = self.zip_to_path(dst)
    110             rar.extractall(uz_path)
    111         except unrar.rarfile.BadRarFile:
    112             pass
    113         except Exception as e:
    114             print(e)
    115             self.error_record(str(e)+src)    
    116 
    117     def un_tar(self, src, dst):
    118         try:
    119             tar = tarfile.open(src)
    120             uz_path = self.zip_to_path(dst)
    121             tar.extractall(path = uz_path)
    122         except tarfile.ReadError:
    123             pass
    124         except Exception as e:
    125             print(e)
    126             self.error_record(str(e)+src)
    127 
    128 
    129 class LockedIterator(object):
    130     def __init__(self, it):
    131         self.lock = threading.Lock()
    132         self.it = it.__iter__()
    133 
    134     def __iter__(self):
    135         return self
    136 
    137     def next(self):
    138         self.lock.acquire()
    139         try:
    140             item = next(self.it)
    141 
    142             if type(item) is tuple:
    143                 return (item[0].strip(), item[1].strip(), item[2].strip())
    144             elif type(item) is str:
    145                 return item.strip()
    146 
    147             return item
    148         finally:
    149             self.lock.release()
    150 
    151 
    152 class UnZip(BaseTool):
    153     """ UnZip files """
    154     def __init__(self, path):
    155         super(UnZip, self).__init__()
    156         self.path = path
    157         self.threads = thread_num
    158         self.output = "./unzip/"
    159         self.current_path = os.getcwd()+"/"
    160         self.parser = argparse.ArgumentParser()
    161         self.parser.add_argument("-v","--verbose", action="store_true", help="./zipperpro.py -v")
    162         self.args = self.parser.parse_args()
    163 
    164     def run(self):
    165         self.main_unzip(self.path)
    166 
    167     def recursive_unzip(self, repath):
    168         """recursive unzip file
    169         """
    170         task_list = []
    171         for (root, dirs, files) in os.walk(repath):
    172             for filename in files:
    173                 filename = filename.strip("./")
    174                 src = os.path.join("./"+root,filename)
    175                 data = (src, src, "child")
    176                 task_list.append(data)
    177         data = LockedIterator(chain(task_list))
    178         print("[+] child unzip ...")
    179         self.run_threads(self.threads, self.do_unzip, data)
    180                 
    181     def main_unzip(self, mainpath):
    182         task_list = []
    183         print("Initialization......")
    184         for (root, dirs, files) in os.walk(mainpath):
    185             for filename in files:
    186                 zippath = os.path.join(self.output,root)
    187                 if not os.path.exists(zippath):
    188                     os.makedirs(zippath)
    189                 src = os.path.join(root,filename)
    190                 dst = os.path.join(self.output,root,filename)
    191                 if not os.path.exists(self.zip_to_path(dst)):
    192                     data = ((src, dst, "main"))
    193                     task_list.append(data)
    194         data = LockedIterator(chain(task_list))
    195         print("[+] main unzip ...")
    196         self.run_threads(self.threads, self.do_unzip, data)
    197         self.recursive_unzip(self.output+self.path)
    198 
    199     def do_unzip(self, running, data):
    200         while running.is_set():
    201             try:
    202                 (src, dst, flag) = data.next()
    203                 if flag == "main":
    204                     if self.iszip(src) == ".zip":
    205                         if self.args.verbose:
    206                             print("[+] main unzip : "+src)
    207                         self.un_zip(src,dst)
    208                     elif self.iszip(src) == ".rar":
    209                         if self.args.verbose:
    210                             print("[+] main unrar : "+src)
    211                         self.un_rar(src,dst)
    212                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    213                         if self.args.verbose:
    214                             print("[+] main untar : "+src)
    215                         self.un_tar(src,dst)
    216                     else:
    217                         try:
    218                             shutil.copyfile(src,dst)
    219                         except OSError as e:
    220                             print(str(e))
    221                             self.error_record(str(e))
    222                 elif flag == "child":
    223                     if self.iszip(src) == ".zip":
    224                         if self.args.verbose:
    225                             print("[+] child unzip: "+src)
    226                         if not self.un_zip(src, src) == "PassRequired":
    227                             self.remove(src)
    228                             self.recursive_unzip(self.zip_to_path(src))
    229                         sleep(0.1)
    230                     elif self.iszip(src) == ".rar":
    231                         if self.args.verbose:
    232                             print("[+] child unrar : "+src)
    233                         self.un_rar(src,src) 
    234                         self.remove(src)
    235                         self.recursive_unzip(self.zip_to_path(src))
    236                         sleep(0.1)
    237                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
    238                         if self.args.verbose:
    239                             print("[+] child untar : "+src)
    240                         self.un_tar(src,src)
    241                         self.remove(src)
    242                         self.recursive_unzip(self.zip_to_path(src))
    243                         sleep(0.1)
    244                     
    245             except StopIteration:
    246                 break
    247 
    248 
    249 def main():
    250     z = UnZip(filepath) 
    251     z.run()
    252     
    253 
    254 
    255 if __name__ == '__main__':
    256     main()

    5. 问题

    • 中文压缩包乱码,中文路径解压出错
    • rar解压遇加密文件卡死

    https://github.com/Gitmaninc/SmallTooools/tree/master/unzip-tool

  • 相关阅读:
    Android——继续深造——从安装Android Studio 2.0开始(详)
    PHP——安装wampserver丢失MSVCR110.dll
    Marza Gift for GDC 2016
    Retrieve OpenGL Context from Qt 5.5 on OSX
    Space Time Varying Color Palette
    Screen Space Depth Varying Glow based on Heat Diffusion
    Visualization of Detail Point Set by Local Algebraic Sphere Fitting
    Glass Dragon
    Jump Flood Algorithms for Centroidal Voronoi Tessellation
    京都之行
  • 原文地址:https://www.cnblogs.com/trojan-z/p/10043408.html
Copyright © 2011-2022 走看看