zoukankan      html  css  js  c++  java
  • python并发统计s3目录大小

    from multiprocessing.pool import Pool
    
    import boto3
    import os
    
    client = boto3.client('s3')
    response = client.list_buckets()
    buckets = response['Buckets']
    
    bucket_list = [item['Name'] for item in buckets]
    
    
    # 从桶中获取二级目录,这里会把object也统计进来,如果不需要可以进行过滤
    def get_catalog_list(bucket='bucket'):
        s3_ls_cmd = "aws s3 ls s3://{bucket}".format(bucket=bucket)
        p = os.popen(s3_ls_cmd)
        ret = p.read()
        p.close()
        catalog_list = [item.lstrip() for item in ret.split('
    ')]
        s3_key_list = ["s3://{bucket}/{key}".format(bucket=bucket, key=catalog[4:]) for catalog in catalog_list if
                       catalog != '']
        print(s3_key_list)
        return s3_key_list
    
    
    # 从目录中获取存储大小
    def get_catalog_size(catalog='s3://bucket/key/'):
        s3_cmd = "aws s3 ls {catalog} --recursive ".format(catalog=catalog)
        print(s3_cmd)
        p = os.popen(s3_cmd)
        ret = p.read()
        p.close()
        r1 = ret.split('
    ')
        r2 = [int(rs.split()[2]) for rs in r1 if rs != '']
        return int(sum(r2) / 1024 / 1024)
    
    
    def save(bucket='bucket'):
        f = bucket + '.txt'
        with open(f, "a") as file:
            for catalog in get_catalog_list(bucket):
                size = get_catalog_size(catalog)
                str1 = catalog + "," + str(size) + "
    "
                print(str1)
                file.write(str1)
    
    
    if __name__ == '__main__':
        p = Pool(8)
        for bucket in bucket_list:
            p.apply_async(save, args=(bucket,))
        p.join()
        p.close()
  • 相关阅读:
    3372 选学霸
    3556 科技庄园
    1025 选菜
    UVA 437 The Tower of Babylon巴比伦塔
    3641 上帝选人
    tyvj P1175 机器人
    1692 子集和的目标值
    1689 建造高塔
    NOI2002 贪吃的九头龙
    NYOJ110 剑客决斗
  • 原文地址:https://www.cnblogs.com/wangbin2188/p/12652959.html
Copyright © 2011-2022 走看看