zoukankan      html  css  js  c++  java
  • bzip2 zip 压缩后体积比 0.8:1

     1、

    对.bz2 后缀文件 跳过不处理

    2、逐行同字段的json文件,压缩后大小为原文件的12.81%

    测试文件近似认为为逐行json文本数据,没有进行多文件重复测试,没有统计时间;

    {"uid":50013896,"uuid":"f32feacf-5f83-4866-8dfe-41bff794b8d4","ip":"666298884","site":0,"source":0,"address":"http://www.ijntv.cn/inews/55821723.html","engine":0,"referer":"","keyword":"","browser":11,"language":0,"screen_color":34,"screen_size":0,"system":14,"platform":61,"operator":2,"country":1,"province":0,"city":0,"flash":"0","java":"0","request_time":1547395198,"create_date":"2019-01-13 23:59:58"}
    {"uid":50015357,"uuid":"388b3676-8835-49b4-827b-5c1f3ddf6bc8","ip":"1973056862","site":0,"source":0,"address":"http://www.ijntv.cn/inews/55218551.html","engine":0,"referer":"","keyword":"","browser":11,"language":0,"screen_color":34,"screen_size":0,"system":14,"platform":61,"operator":2,"country":1,"province":0,"city":0,"flash":"0","java":"0","request_time":1547395198,"create_date":"2019-01-13 23:59:58"}
    {"uid":50016991,"uuid":"dbd44846-4b4a-4b26-aad2-8a70a7a31c74","ip":"2004569145","site":0,"source":0,"address":"http://www.ijntv.cn/inews/VVZv_q1-Hpas_pCYVW1sfg.html","engine":0,"referer":"","keyword":"","browser":11,"language":0,"screen_color":34,"screen_size":0,"system":14,"platform":61,"operator":1,"country":1,"province":6,"city":77,"flash":"0","java":"0","request_time":1547395198,"create_date":"2019-01-13 23:59:58"}
    {"uid":50001228,"uuid":"1b4908cd-1306-40e7-bd4e-df0372bcc749","ip":"3740751066","site":0,"source":0,"address":"http://www.ijntv.cn/inews/CKHzIMoRfJUYOkAwNZTfMg.html","engine":0,"referer":"","keyword":"","browser":11,"language":0,"screen_color":34,"screen_size":0,"system":14,"platform":61,"operator":1,"country":1,"province":14,"city":197,"flash":"0","java":"0","request_time":1547395199,"create_date":"2019-01-13 23:59:59"}
    
    {"ad_slots_id":1002,"uuid":"a369a303-1d70-49eb-9e73-7a2a8f028626","industry_pid":0,"industry_id":0,"ip":"1700604567","site":72,"address":"https://info.b2b168.com/s168-47325051.html","create_date":"2019-01-13 23:59:59","ad_id":"50012715","uid":"50012715","keyword":"u8bbeu5907","pageinfo":""}
    {"ad_slots_id":1002,"uuid":"a369a303-1d70-49eb-9e73-7a2a8f028626","industry_pid":0,"industry_id":0,"ip":"1700604567","site":72,"address":"https://info.b2b168.com/s168-47325051.html","create_date":"2019-01-13 23:59:59","ad_id":"50015314","uid":"50015314","keyword":"u5b81u6ce2u6536u94f6u8f6fu4ef6","pageinfo":""}
    {"ad_slots_id":1001,"uuid":"5eb7efec-9eb1-4493-9739-e466035606b4","industry_pid":0,"industry_id":0,"ip":"2029060375","site":70,"address":"http://www.jqw.com/Businfo/1688002049073.htm","create_date":"2019-01-13 23:59:59","ad_id":"50020536","uid":"50020536","keyword":"Supu53e3u7ea2u8272u53f7u63a8u8350","pageinfo":"u7545u9500u7684u56feu96c6u53f7u8fbd2011J606u63a8u8350 |u8ba2u8d2du56feu96c6u53f7u8fbd2011J606_u4f9bu6c42u5546u673a_u91d1u6cc9u7f51#|^#|^http://www.jqw.com/Businfo/1688002049073.htm"}
    {"ad_slots_id":1001,"uuid":"5eb7efec-9eb1-4493-9739-e466035606b4","industry_pid":0,"industry_id":0,"ip":"2029060375","site":70,"address":"http://www.jqw.com/Businfo/1688002049073.htm","create_date":"2019-01-13 23:59:59","ad_id":"34064333","uid":"34064333","keyword":"u8f6fu4ef6u8ba2u5236","pageinfo":""}
    {"ad_slots_id":1001,"uuid":"5eb7efec-9eb1-4493-9739-e466035606b4","industry_pid":0,"industry_id":0,"ip":"2029060375","site":70,"address":"http://www.jqw.com/Businfo/1688002049073.htm","create_date":"2019-01-13 23:59:59","ad_id":"50014483","uid":"50014483","keyword":"u5c71u6cc9u6c34u6279u53d1","pageinfo":""}
    {"ad_slots_id":1001,"uuid":"5eb7efec-9eb1-4493-9739-e466035606b4","industry_pid":0,"industry_id":0,"ip":"2029060375","site":70,"address":"http://www.jqw.com/Businfo/1688002049073.htm","create_date":"2019-01-13 23:59:59","ad_id":"34022975","uid":"34022975","keyword":"u718au638cu53f7","pageinfo":""}
    

      

      

    137M -rw-r--r-- 1 root root 137M Jan 10 11:45 visit-2019-01-10
    20M -rw-r--r-- 1 root root 20M Jan 10 11:48 visit-2019-01-10.zip

     bzip2  visit-2019-01-10

    16M -rw-r--r-- 1 root root 16M Jan 10 11:45 visit-2019-01-10.bz2
    20M -rw-r--r-- 1 root root 20M Jan 10 11:48 visit-2019-01-10.zip

    默认 bzip2 theFile 删除原文件,结果文件命名为theFile.bzip2

    压缩后的体积为zip的0.8

    bzip2 -9 visit-2019-01-03-u

    890M -rw-r--r-- 1 root root 890M Jan 10 11:59 visit-2019-01-03-u
    65M -rw-r--r-- 1 root root 65M Jan 10 11:59 visit-2019-01-03-u.bz2
    87M -rw-r--r-- 1 root root 87M Jan 10 12:00 visit-2019-01-03-u.zip

    压缩后的体积为zip的0.7475,为原始文件的0.0730

    用压缩后的文件覆盖原文件

    import sys, glob, os

    targetDir, passFeature = sys.argv[1], sys.argv[2]
    file_feature = '*-*-*'
    targetGlob = targetDir + file_feature
    LocalFiles = glob.glob(targetGlob)
    for i in LocalFiles:
    if passFeature in i:
    continue
    cmd = 'cd {};bzip2 -9 {}'.format(targetDir, i)
    print(cmd)
    os.system(cmd)


    [root@a data]# tree testBiz2Py/
    testBiz2Py/
    ├── 2-23-3
    ├── 2-23-a
    ├── 2-23-b
    ├── a
    └── b

    0 directories, 5 files
    [root@a data]# python bzip2Action/biz2SaveCost.py /data/testBiz2Py/ b
    cd /data/testBiz2Py/;bzip2 -9 /data/testBiz2Py/2-23-a
    cd /data/testBiz2Py/;bzip2 -9 /data/testBiz2Py/2-23-3
    [root@a data]# tree testBiz2Py/
    testBiz2Py/
    ├── 2-23-3.bz2
    ├── 2-23-a.bz2
    ├── 2-23-b
    ├── a
    └── b

    0 directories, 5 files

     cd /data;du --max-depth=2 -h ./;python bzip2Action/biz2SaveCost.py /data/visitlog/ 2019-01


    压缩前

    [root@a data]# du --max-depth=2 -h ./
    141G ./unionlog
    8.0K ./bzip2Action
    21G ./visitlog
    169G ./
    [root@a data]# tree visitlog/
    visitlog/
    ├── visit-2018-09-18
    ├── visit-2018-09-19
    ├── visit-2018-09-20

    [root@b ~]# cd /data;du --max-depth=2 -h ./;python bzip2Action/biz2SaveCost.py /data/visitlog/ 2019-01

    19G ./visitlog
    104G ./unionlog


    1.1T ./
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-09-19
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-09-25

    [root@c ~]# cd /data;du --max-depth=2 -h ./;python bzip2Action/biz2SaveCost.py /data/visitlog/ 2019-01

    21G ./visitlog
    141G ./unionlog


    940G ./
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-11-24
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-11-01
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-11-19
    cd /data/visitlog/;bzip2 -9 /data/visitlog/visit-2018-10-22

    统计压缩速度

    单个文件的平均速度

    总数据量的平均速度

    注意增加计算压缩率的功能代码

    # -*- coding: utf-8 -*-

    import sys, glob, os, time
    import random

    targetDir, passFeature = sys.argv[1], sys.argv[2]
    file_feature = '*-*-*'
    targetGlob = targetDir + file_feature
    LocalFiles = glob.glob(targetGlob)
    allMB, allSeconds, singleSeconds = 0, 0, []
    for i in LocalFiles:
    if passFeature in i:
    continue

    # 进入原文件目录,压缩后覆盖原文件
    cmd = 'cd {};bzip2 -9 {}'.format(targetDir, i)

    # 研究压缩速度

    fileMB = os.stat(i).st_size / 1024 / 1024
    t_start = time.time()
    print(cmd)
    # os.system(cmd)
    t = random.random()*10
    time.sleep(t)
    t_end = time.time()
    fileSeconds = t_end - t_start
    allMB += fileMB
    allSeconds += fileSeconds
    singleSeconds.append(fileMB / fileSeconds)

    # 按照速度大小由小到大排序
    singleSeconds = list(sorted(singleSeconds, reverse=True))
    singleSeconds = sorted(singleSeconds)
    print('averageSpeed(MB/s):', allMB / allSeconds)
    print('singleSeconds(MB/s):', singleSeconds)


    压缩:主要消耗cpu,计算密集型


    压缩后

    [root@b data]# cd /data;du --max-depth=2 -h ./;

    8.0K ./bzip2Action
    4.6G ./visitlog
    104G ./unionlog

    1016G ./
    [root@b data]#

    压缩前后比值19G:4.6G =1: 0.2421052631578947,

    a节点
    4.9G    ./visitlog

    21G:4.9G= 1:0.21904761904761902



    c节点

    4.9G ./visitlog

    同a节点

    [root@a data]# tree visitlog/ -h
    visitlog/
    ├── [6.2M] visit-2018-09-18.bz2
    ├── [8.4M] visit-2018-09-19.bz2
    ├── [8.3M] visit-2018-09-20.bz2
    ├── [8.8M] visit-2018-09-21.bz2
    ├── [8.7M] visit-2018-09-22.bz2
    ├── [7.5M] visit-2018-09-23.bz2
    ├── [7.4M] visit-2018-09-24.bz2
    ├── [8.8M] visit-2018-09-25.bz2
    ├── [9.3M] visit-2018-09-26.bz2
    ├── [9.6M] visit-2018-09-27.bz2
    ├── [ 12M] visit-2018-09-28.bz2
    ├── [ 15M] visit-2018-09-29.bz2
    ├── [ 15M] visit-2018-09-30.bz2
    ├── [ 13M] visit-2018-10-01.bz2
    ├── [ 13M] visit-2018-10-02.bz2
    ├── [ 14M] visit-2018-10-03.bz2
    ├── [ 14M] visit-2018-10-04.bz2
    ├── [ 15M] visit-2018-10-05.bz2
    ├── [ 15M] visit-2018-10-06.bz2
    ├── [ 15M] visit-2018-10-07.bz2
    ├── [ 17M] visit-2018-10-08.bz2
    ├── [ 16M] visit-2018-10-09.bz2
    ├── [ 17M] visit-2018-10-10.bz2
    ├── [ 15M] visit-2018-10-11.bz2
    ├── [ 16M] visit-2018-10-12.bz2
    ├── [ 16M] visit-2018-10-13.bz2
    ├── [ 23M] visit-2018-10-14.bz2
    ├── [ 28M] visit-2018-10-15.bz2
    ├── [ 25M] visit-2018-10-16.bz2
    ├── [ 21M] visit-2018-10-17.bz2
    ├── [ 23M] visit-2018-10-18.bz2
    ├── [ 21M] visit-2018-10-19.bz2
    ├── [ 21M] visit-2018-10-20.bz2
    ├── [ 24M] visit-2018-10-21.bz2
    ├── [ 18M] visit-2018-10-22.bz2
    ├── [ 20M] visit-2018-10-23.bz2
    ├── [ 20M] visit-2018-10-24.bz2
    ├── [ 20M] visit-2018-10-25.bz2
    ├── [ 21M] visit-2018-10-26.bz2
    ├── [ 20M] visit-2018-10-27.bz2
    ├── [ 18M] visit-2018-10-28.bz2
    ├── [ 21M] visit-2018-10-29.bz2
    ├── [ 22M] visit-2018-10-30.bz2
    ├── [ 21M] visit-2018-10-31.bz2
    ├── [ 22M] visit-2018-11-01.bz2
    ├── [ 21M] visit-2018-11-02.bz2
    ├── [9.8M] visit-2018-11-03.bz2
    ├── [7.6M] visit-2018-11-04.bz2
    ├── [9.7M] visit-2018-11-05.bz2
    ├── [9.6M] visit-2018-11-06.bz2
    ├── [9.5M] visit-2018-11-07.bz2
    ├── [ 19M] visit-2018-11-08.bz2
    ├── [ 12M] visit-2018-11-09.bz2
    ├── [ 12M] visit-2018-11-10.bz2
    ├── [ 11M] visit-2018-11-11.bz2
    ├── [ 13M] visit-2018-11-12.bz2
    ├── [ 14M] visit-2018-11-13.bz2
    ├── [ 16M] visit-2018-11-14.bz2
    ├── [ 16M] visit-2018-11-15.bz2
    ├── [ 15M] visit-2018-11-16.bz2
    ├── [ 15M] visit-2018-11-17.bz2
    ├── [ 17M] visit-2018-11-18.bz2
    ├── [ 18M] visit-2018-11-19.bz2
    ├── [ 16M] visit-2018-11-20.bz2
    ├── [ 20M] visit-2018-11-21.bz2
    ├── [ 22M] visit-2018-11-22.bz2
    ├── [ 13M] visit-2018-11-23.bz2
    ├── [ 11M] visit-2018-11-24.bz2
    ├── [ 11M] visit-2018-11-25.bz2
    ├── [ 11M] visit-2018-11-26.bz2
    ├── [9.7M] visit-2018-11-27.bz2
    ├── [8.0M] visit-2018-11-28.bz2
    ├── [ 12M] visit-2018-11-29.bz2
    ├── [ 15M] visit-2018-11-30.bz2
    ├── [ 15M] visit-2018-12-01.bz2
    ├── [ 16M] visit-2018-12-02.bz2
    ├── [ 20M] visit-2018-12-03.bz2
    ├── [ 21M] visit-2018-12-04.bz2
    ├── [ 23M] visit-2018-12-05.bz2
    ├── [ 25M] visit-2018-12-06.bz2
    ├── [ 32M] visit-2018-12-07.bz2
    ├── [ 36M] visit-2018-12-08.bz2
    ├── [ 35M] visit-2018-12-09.bz2
    ├── [ 37M] visit-2018-12-10.bz2
    ├── [ 38M] visit-2018-12-11.bz2
    ├── [ 35M] visit-2018-12-12.bz2
    ├── [ 35M] visit-2018-12-13.bz2
    ├── [ 30M] visit-2018-12-14.bz2
    ├── [ 32M] visit-2018-12-15.bz2
    ├── [ 31M] visit-2018-12-16.bz2
    ├── [ 39M] visit-2018-12-17.bz2
    ├── [ 39M] visit-2018-12-18.bz2
    ├── [ 38M] visit-2018-12-19.bz2
    ├── [ 29M] visit-2018-12-20.bz2
    ├── [ 43M] visit-2018-12-21.bz2
    ├── [ 37M] visit-2018-12-22.bz2
    ├── [ 35M] visit-2018-12-23.bz2
    ├── [ 38M] visit-2018-12-24.bz2
    ├── [ 38M] visit-2018-12-25.bz2
    ├── [ 36M] visit-2018-12-26.bz2
    ├── [ 38M] visit-2018-12-27.bz2
    ├── [ 38M] visit-2018-12-28.bz2
    ├── [ 37M] visit-2018-12-29.bz2
    ├── [ 30M] visit-2018-12-30.bz2
    ├── [ 35M] visit-2018-12-31.bz2
    ├── [296M] visit-2019-01-01
    ├── [345M] visit-2019-01-02
    ├── [397M] visit-2019-01-03
    ├── [331M] visit-2019-01-04
    ├── [300M] visit-2019-01-05
    ├── [312M] visit-2019-01-06
    ├── [311M] visit-2019-01-07
    ├── [154M] visit-2019-01-08
    ├── [173M] visit-2019-01-09
    └── [176M] visit-2019-01-10

    0 directories, 115 files
    [root@a data]#

    [root@a tmp]# ll -ash
    total 32K
    4.0K drwxr-xr-x 2 root root 4.0K Jan 11 14:22 .
    4.0K drwxr-xr-x 17 root root 4.0K Jan 10 16:51 ..
    24K -rw-r--r-- 1 root root 21K Jan 11 14:22 a
    [root@a tmp]# bzip2 -9 a
    [root@a tmp]# ll -as
    total 12
    4 drwxr-xr-x 2 root root 4096 Jan 11 14:22 .
    4 drwxr-xr-x 17 root root 4096 Jan 10 16:51 ..
    4 -rw-r--r-- 1 root root 1036 Jan 11 14:22 a.bz2
    [root@a tmp]# bzip2 -9 a.bz2
    bzip2: Input file a.bz2 already has .bz2 suffix.
    [root@a tmp]# ll -as
    total 12
    4 drwxr-xr-x 2 root root 4096 Jan 11 14:22 .
    4 drwxr-xr-x 17 root root 4096 Jan 10 16:51 ..
    4 -rw-r--r-- 1 root root 1036 Jan 11 14:22 a.bz2
    [root@a tmp]#

     cd /data;du --max-depth=2 -h ./;python bzip2Action/biz2SaveCost.py /data/visitlog/ 2019-01

    2019年1月14日

    c

    6.0G ./visitlog
    20G ./unionlog

    b

    5.8G ./visitlog
    18G ./unionlog

    a

    20G     ./unionlog

    6.0G    ./visitlog


    27M -rw-r--r-- 1 nginx nginx 27M Dec 30 23:59 visit-2018-12-30.bz2
    36M -rw-r--r-- 1 nginx nginx 36M Dec 31 23:59 visit-2018-12-31.bz2
    312M -rw-r--r-- 1 nginx nginx 312M Jan 6 23:59 visit-2019-01-06
    312M -rw-r--r-- 1 nginx nginx 312M Jan 7 23:59 visit-2019-01-07

    44M -rw-r--r-- 1 nginx nginx 44M Dec 30 23:59 visit-2018-12-30.bz2
    53M -rw-r--r-- 1 nginx nginx 53M Dec 31 23:59 visit-2018-12-31.bz2

    882M -rw-r--r-- 1 nginx nginx 882M Jan 11 23:59 visit-2019-01-11
    745M -rw-r--r-- 1 nginx nginx 745M Jan 12 23:59 visit-2019-01-12
    707M -rw-r--r-- 1 nginx nginx 707M Jan 13 23:59 visit-2019-01-13
    232M -rw-r--r-- 1 nginx nginx 232M Jan 14 09:21 visit-2019-01-14

    压缩率计算

    因为每日都有新文件写入,处理前的数据没有记录,如果不解压还原数据的话,无法计算准确的压缩率

     压缩速度计算

     认为cpu、内存资源充足

    控制台输出的日志

    cd /data/unionlog/;bzip2 -9 /data/unionlog/visit-2018-11-09.bz2
    bzip2: Input file /data/unionlog/visit-2018-11-09.bz2 already has .bz2 suffix.
    ('averageSpeed(MB/s):', 4.211187493937172)
    ('singleSeconds(MB/s):', [3.9369898031816426, 3.953846125040358, 3.9544741312123928, 3.9555894807291088, 3.96099337092276, 3.983298697446923, 3.9966511209824667, 4.007815560864753, 4.013902687515588, 4.015872734532144, 4.015899236549791, 4.015963246206192, 4.01612198327753, 4.023731445780551, 4.025416758738823, 4.025951959772834, 4.030831979910141, 4.039693901910457, 4.0399486196050765, 4.040242824350764, 4.040648424669689, 4.041098180762507, 4.043051325648554, 4.051655360512291, 4.056658593948987, 4.059627164614112, 4.070020953590698, 4.073870225127285, 4.07503751826594, 4.075686989285653, 4.080265084217549, 4.082345972466677, 4.090936968718271, 4.0944335040477275, 4.099429160013611, 4.102229025161095, 4.104974537958556, 4.110126096413723, 4.118484472726296, 4.119251467116442, 4.121534548809426, 4.125553711713982, 4.12775144900931, 4.129621429296399, 4.129656881725015, 4.1315901550586105, 4.131830165781944, 4.143680130292085, 4.145293603443776, 4.146942161873823, 4.147313376948774, 4.148370367740056, 4.151411958798099, 4.153755223178981, 4.161263788273014, 4.164412810381955, 4.166850751469844, 4.167063598601332, 4.169355624609407, 4.176170448673875, 4.1940635910827355, 4.195109540816128, 4.2000814466148055, 4.200333163905996, 4.2022824476243406, 4.202418248410636, 4.20572094512217, 4.212585249380411, 4.218441487185745, 4.427734600215904, 4.837932137856126, 5.076886456535105, 6.319574088013213, 6.375565540330376, 7.40075797478448, 8.700273234442928, 9.955987719965876, 10.49359459267714, 10.496288104978296, 10.910080297989559, 13.4994372035219, 14.011910382913635, 14.077535801136763, 14.982532672419739, 15.577184977610813, 16.567526614277405, 16.72303989453991, 17.031745290872077, 19.135089160791104, 19.481129833087913, 20.071232748258293, 20.639159774908073, 28.797696550990196, 29.481318443179987, 32.960199161359675, 35.348744965782345, 35.914231265432065, 44.91248986354474, 56.068395022554554, 63.90561770409619, 67.3045079377858, 80.84762211431958, 88.98706103023787, 175.14815255493826, 557.3579021970233])
    [root@a data]#
    

      

    cd /data/unionlog/;bzip2 -9 /data/unionlog/visit-2018-11-03.bz2
    bzip2: Input file /data/unionlog/visit-2018-11-03.bz2 already has .bz2 suffix.
    ('averageSpeed(MB/s):', 4.037645142862762)
    ('singleSeconds(MB/s):', [3.4115271073680473, 3.5042062998346606, 3.509713341704194, 3.525571499281982, 3.5898929553667154, 3.6505914130679624, 3.7138527066218354, 3.7231339152271996, 3.7267693810378284, 3.7292860405119153, 3.7299857191562316, 3.7326899795857953, 3.756952872366287, 3.757740179198384, 3.758101035864619, 3.762634699575258, 3.771730878546173, 3.7786267621034892, 3.796819445397061, 3.8048003527368794, 3.8085626615863237, 3.8112231318976035, 3.8156227214117053, 3.841775745310672, 3.848201931373685, 3.851350834838122, 3.8566428423319925, 3.857162507505528, 3.863292589421678, 3.863331261341491, 3.8643059756625355, 3.8917795293132476, 3.8927296353495002, 3.893436977500035, 3.8935765838449194, 3.8965081510857744, 3.90837814215203, 3.9189434690852534, 3.931661792967054, 3.9543185154898364, 3.962796230312998, 3.9670385630201, 4.002202845736961, 4.140499586487628, 4.2275292796865545, 4.606845720648893, 4.712329383339015, 4.723474167059724, 4.763673069508994, 4.8135033859300425, 4.842742592123715, 4.950956959538387, 4.964229453203472, 5.00257129469767, 7.159600281109767, 7.301770358234334, 7.7868991551617475, 8.339245078376065, 9.1340349502132, 9.325438851566286, 9.746514302246108, 10.417916214518563, 10.505980787495512, 10.672366971761184, 10.932940199850863, 10.976049990046109, 11.65977574461905, 11.955475170447237, 12.03807663466081, 12.591329733176506, 13.033587237840626, 13.06160835399656, 13.063173024665307, 13.130837040649459, 13.3820749715684, 13.770620023288442, 14.631663068222947, 15.281334268265432, 20.345541381319876, 23.357577176500726, 23.618352333724083, 26.66225279976816, 28.154498014416912, 28.77254085414158, 33.61942092676073, 38.447972182087994, 296.4583324183667, 1415.6438387404276])
    [root@b data]#
    

      

    cd /data/unionlog/;bzip2 -9 /data/unionlog/visit-2018-11-30.bz2
    bzip2: Input file /data/unionlog/visit-2018-11-30.bz2 already has .bz2 suffix.
    ('averageSpeed(MB/s):', 4.100344833546164)
    ('singleSeconds(MB/s):', [3.7181192973321773, 3.7257172341979174, 3.745482158633604, 3.7692114529613185, 3.7872264010472043, 3.7884661064039555, 3.8186428004503985, 3.82068338626644, 3.8231526245648015, 3.83125733853526, 3.8345807959000737, 3.8487431513676458, 3.8533984710523392, 3.8766502888766508, 3.8792671200198057, 3.8794958504143318, 3.8864832683027672, 3.8874083389735117, 3.890098049760509, 3.9004211061797065, 3.9012259669791716, 3.904081019180935, 3.909088169138795, 3.9100598586939057, 3.9182811647981137, 3.9223673146999176, 3.936911082605703, 3.938102928814517, 3.9431581709314845, 3.9469155257226864, 3.9477303083616584, 3.9510218414752734, 3.9544768734685007, 3.9561312758351868, 3.9603868364070123, 3.960529493355076, 3.973218434311659, 3.973952987812832, 3.9750079546493047, 3.9769556093199063, 3.990533382215301, 3.9908648419479373, 3.99253131026352, 3.9993173904820893, 4.000028933408353, 4.009540707394956, 4.0167451008623525, 4.01694265894807, 4.021467667067072, 4.025888190955974, 4.029591312996541, 4.034701091498445, 4.051077667021889, 4.051677223836611, 4.05476273834563, 4.063306221670503, 4.067358092550384, 4.068730730698932, 4.102108601845601, 4.106712519686551, 4.112994160199945, 4.123322845773183, 4.124306143488609, 4.14616216102037, 4.190121259265525, 4.2012301048613345, 4.231113928027722, 4.635490375664297, 4.672385583985039, 4.693718071514089, 4.723321575516211, 4.723548196405968, 4.786506177340032, 4.850884632133513, 4.859158112858001, 6.1953653787024, 9.018906346164437, 9.323122551505794, 10.900660226263645, 11.832227279482243, 12.166883663453696, 12.860438635558914, 13.911042528296319, 13.942537479159501, 14.251319697516088, 15.801254911294155, 18.116126384680413, 19.22717367957711, 20.43278105002856, 20.50204524305958, 21.19584266823886, 27.97025937406051, 28.08823466724362, 35.51835114999952, 36.066163946710745, 37.79058467225096, 46.753210686310574, 47.149060556499826, 55.56960474674869, 67.19344402304698, 67.72819158484143, 88.15080202857988, 90.13568800307546, 161.22373765616393, 184.2411337594747])
    [root@c data]#
    

      

    由于没在统计时标识或者过滤.biz2文件,认为压缩速度为4MB/s;

    # unionlog 假设从11号上午统计时,至14号上午统计时,空间增量为4个自然日,
    # 空间大小 (882+745+707)/3*4/1024
    res, ori, cut = [20, 18, 20], [141, 104, 141], (882 + 745 + 707) / 3 * 4 / 1024
    compression_ratio = []
    for i in range(0, len(res), 1):
    ii = (res[i] - cut) / ori[i]
    compression_ratio.append(ii)
    print(compression_ratio)
    print('压缩率均值', sum(compression_ratio) / len(compression_ratio))


    [0.12029033687943262, 0.14385516826923078, 0.12029033687943262]
    压缩率均值 0.128145280676032

    # -*- coding: utf-8 -*-
    import sys, glob, os, time

    targetDir, passFeature = sys.argv[1], sys.argv[2]
    file_feature = '*-*-*'
    targetGlob = targetDir + file_feature
    LocalFiles = glob.glob(targetGlob)
    allMBCompressed, allSeconds, singleSeconds = 0, 0, []
    allCompressionRatio, singleCompressionRatio = 0, []
    for i in LocalFiles:
    if not i.endswith('.bz2'):
    continue
    if passFeature in i:
    continue

    # 进入原文件目录,解压后覆盖原文件
    cmd = 'cd {};bzip2 -d {}'.format(targetDir, i)
    print(cmd)
    # 研究解压速度
    fileMBCompressed = os.stat(i).st_size / 1024 / 1024
    t_start = time.time()
    os.system(cmd)
    t_end = time.time()
    try:
    fileMBDecompressed = os.stat(i.strip('.bz2')).st_size / 1024 / 1024
    fileSeconds = t_end - t_start
    allMBCompressed += fileMBCompressed
    allSeconds += fileSeconds
    singleSeconds.append(fileMBCompressed / fileSeconds)
    singleCompressionRatio.append(fileMBCompressed / allMBCompressed)

    # 按照速度大小由大到小排序
    singleSeconds = list(sorted(singleSeconds, reverse=True))
    singleSeconds = sorted(singleSeconds)

    print('averageSpeed(MB/s):', allMBCompressed / allSeconds)
    print('singleSeconds(MB/s):', singleSeconds)

    print('singleCompressionRatio:', singleCompressionRatio)
    print('arithmeticAverageSingleCompressionRatio:', sum(singleCompressionRatio) / len(singleCompressionRatio))

    # 计算压缩率不考虑调和平均数,只考虑算术平均数
    except Exception as e:
    print(e)
    cd /data/unionlog/;bzip2 -9 /data/unionlog/visit-2018-11-03.bz2
    bzip2: Input file /data/unionlog/visit-2018-11-03.bz2 already has .bz2 suffix.
    ('averageSpeed(MB/s):', 4.037645142862762)
    ('singleSeconds(MB/s):', [3.4115271073680473, 3.5042062998346606, 3.509713341704194, 3.525571499281982, 3.5898929553667154, 3.6505914130679624, 3.7138527066218354, 3.7231339152271996, 3.7267693810378284, 3.7292860405119153, 3.7299857191562316, 3.7326899795857953, 3.756952872366287, 3.757740179198384, 3.758101035864619, 3.762634699575258, 3.771730878546173, 3.7786267621034892, 3.796819445397061, 3.8048003527368794, 3.8085626615863237, 3.8112231318976035, 3.8156227214117053, 3.841775745310672, 3.848201931373685, 3.851350834838122, 3.8566428423319925, 3.857162507505528, 3.863292589421678, 3.863331261341491, 3.8643059756625355, 3.8917795293132476, 3.8927296353495002, 3.893436977500035, 3.8935765838449194, 3.8965081510857744, 3.90837814215203, 3.9189434690852534, 3.931661792967054, 3.9543185154898364, 3.962796230312998, 3.9670385630201, 4.002202845736961, 4.140499586487628, 4.2275292796865545, 4.606845720648893, 4.712329383339015, 4.723474167059724, 4.763673069508994, 4.8135033859300425, 4.842742592123715, 4.950956959538387, 4.964229453203472, 5.00257129469767, 7.159600281109767, 7.301770358234334, 7.7868991551617475, 8.339245078376065, 9.1340349502132, 9.325438851566286, 9.746514302246108, 10.417916214518563, 10.505980787495512, 10.672366971761184, 10.932940199850863, 10.976049990046109, 11.65977574461905, 11.955475170447237, 12.03807663466081, 12.591329733176506, 13.033587237840626, 13.06160835399656, 13.063173024665307, 13.130837040649459, 13.3820749715684, 13.770620023288442, 14.631663068222947, 15.281334268265432, 20.345541381319876, 23.357577176500726, 23.618352333724083, 26.66225279976816, 28.154498014416912, 28.77254085414158, 33.61942092676073, 38.447972182087994, 296.4583324183667, 1415.6438387404276])
    

      


    bzip2, a block-sorting file compressor. Version 1.0.6, 6-Sept-2010.

    usage: bzip2 [flags and input files in any order]

    -h --help print this message
    -d --decompress force decompression
    -z --compress force compression
    -k --keep keep (don't delete) input files
    -f --force overwrite existing output files
    -t --test test compressed file integrity
    -c --stdout output to standard out
    -q --quiet suppress noncritical error messages
    -v --verbose be verbose (a 2nd -v gives more)
    -L --license display software version & license
    -V --version display software version & license
    -s --small use less memory (at most 2500k)
    -1 .. -9 set block size to 100k .. 900k
    --fast alias for -1
    --best alias for -9

    If invoked as `bzip2', default action is to compress.
    as `bunzip2', default action is to decompress.
    as `bzcat', default action is to decompress to stdout.

    If no file names are given, bzip2 compresses or decompresses
    from standard input to standard output. You can combine
    short flags, so `-v -4' means the same as -v4 or -4v, &c.








  • 相关阅读:
    JAVA 读取excel文件成List<Entity>
    JAVA 下载单个文件
    js替换指定位置字符串
    Java学习——继承
    Java——单例设计模式
    Java学习——static关键字
    个人成长阶段
    Android_xml背景色的值
    Android开发_关于点击事件
    Android开发_关于中英文切换
  • 原文地址:https://www.cnblogs.com/rsapaper/p/10249573.html
Copyright © 2011-2022 走看看