zoukankan      html  css  js  c++  java
  • backup-analysis-barcode-distribute-recv-py

    analysis_barcode.py

    ---------

      1 # -*- coding:utf-8 -*-
      2 
      3 # python3
      4 
      5 import sys
      6 import re
      7 
      8 # 分析下发和收回的条码
      9 # 1 下发过多少
     10 # 2 回收了多少
     11 # 3 下发过没有回收的有多少
     12 # 4 没下发过但收的有多少
     13 
     14 # 下发格式
     15 # barcode=1234567890, machineNo=8, allowToFillCode=1, ctime=2020-07-22 07:27:39
     16 
     17 # 回收的格式
     18 # time=2020-07-22 06:18:19, barcode=1234567890, machineNo=2, ...
     19 
     20 distribute_file_name_pattern = "RecordBarcodeDistribute-2020-{}.txt"
     21 recv_file_name_pattern = "RecordRecvData-2020-{}.txt"
     22 
     23 
     24 def read_distributed_info(date):
     25     '''
     26     处理下发的数据
     27     返回文件行数, 和 设备号和向该设备下发过的条码列表的字典
     28     (rowCount, {"1": ["barcode0", "barcode1", ...], ...})
     29     '''
     30 
     31     def parse_distribute_row_info(row):
     32         matchInfo = re.match(r'^barcode=(.{10}), machineNo=(d+), allowToFillCode=(d)', row)
     33         if matchInfo is None:
     34             return None
     35         else:
     36             return ( matchInfo.group(1), matchInfo.group(2), matchInfo.group(3))
     37 
     38     def append_to(rowInfo, resultDict):
     39         barcode, machineNo, _ = rowInfo
     40         if machineNo not in resultDict:
     41             resultDict[machineNo] = []
     42         resultDict[machineNo].append(barcode)
     43 
     44     infos = {}
     45     rowCount = 0
     46 
     47     fileName = distribute_file_name_pattern.format(date)
     48     with open(fileName, "r") as distributeFile:
     49         for row in distributeFile:
     50             rowCount += 1
     51 
     52             parseResult = parse_distribute_row_info(row)
     53             if parseResult is None:
     54                 print("[WARN] not matched distribute info:", row)
     55             elif parseResult[2] == "1":
     56                 append_to(parseResult, infos)
     57             # else ignore
     58 
     59     return (rowCount, infos)
     60 
     61 
     62 
     63 def read_received_info(date):
     64     """
     65     处理回收的数据
     66     返回文件行数, 和 设备号和该设备返回的信息
     67     (rowCount, {"1", [{"time": "yyyy-MM-dd HH:mm:ss",
     68                        "barcode": "barcode-value"},
     69                       ...],
     70                 ...
     71                 } )
     72     """
     73 
     74     def parse_recv_row_info(row):
     75         matchInfo = re.match(r'^time=(.{19}), barcode=([^,]+), machineNo=(d+)', row)
     76         if matchInfo is None:
     77             return None
     78         else: 
     79             return {"machineNo": matchInfo.group(3),
     80                     "barcode" : matchInfo.group(2),
     81                     "time": matchInfo.group(1)}
     82 
     83     def append_to(rowInfo, resultDict):
     84         machineNo = rowInfo["machineNo"]
     85         if machineNo not in resultDict:
     86             resultDict[machineNo] = []
     87         resultDict[machineNo].append({"barcode" : rowInfo["barcode"], "time" : rowInfo["time"]})
     88 
     89 
     90     infos = {}
     91     rowCount = 0
     92 
     93     fileName = recv_file_name_pattern.format(date)
     94     with open(fileName, "r") as recvFile:
     95         for row in recvFile:
     96             rowCount += 1
     97 
     98             parseResult = parse_recv_row_info(row)
     99             if parseResult is None:
    100                 print("[WARN] not matched recv info:", row)
    101             else:
    102                 append_to(parseResult, infos)
    103 
    104     return (rowCount, infos)
    105 
    106 
    107 
    108 def calculate_data_count(data):
    109     result = 0
    110     for a_list in data:
    111         result += len(a_list)
    112     return result
    113 
    114 def calculate_count_by_machine(distributeInfo, recvInfo):
    115     def merge_machine_no_set(machineNosA, machineNosB):
    116         result = list(set(machineNosA).union(set(machineNosB)))
    117         result.sort()
    118         return result
    119 
    120     result = []
    121     for machineNo in merge_machine_no_set(distributeInfo.keys(), recvInfo.keys()):
    122         distCount = len(distributedInfo[machineNo]) if machineNo in distributedInfo else 0
    123         recvCount = len(recvInfo[machineNo]) if machineNo in recvInfo else 0
    124         result.append({"machineNo": machineNo,
    125                        "distCount": distCount,
    126                        "recvCount": recvCount,
    127                        "diff" : recvCount - distCount})
    128 
    129     return result
    130 
    131 
    132 def get_repeated_recv_barcodes(recvInfo):
    133     # 去重
    134     def exists_in(item, itemList):
    135         # 判断当前的信息是否与之前的记录重复
    136         for i in itemList:
    137             if (i["barcode"] == item["barcode"]) and (i["time"] == item["time"]):
    138                 return True
    139         return False
    140 
    141     repeatedInfo = []
    142     deDuplicationInfo = {}
    143 
    144     for k, v in recvInfo.items():
    145         deDuplicationInfo[k] = []
    146         for item in v:
    147             if exists_in(item, deDuplicationInfo[k]):
    148                 repeatedInfo.append((k, item["barcode"], item["time"]))
    149             else:
    150                 deDuplicationInfo[k].append(item)
    151 
    152     return (deDuplicationInfo, repeatedInfo)
    153 
    154 
    155 def get_recv_but_not_distributed_barcodes(distributeInfo, recvInfo):
    156     result = {}
    157     distributeInfoCopy = {}
    158 
    159     for k, v in distributeInfo.items():
    160         distributeInfoCopy[k] = v.copy()
    161 
    162     for k, v in recvInfo.items():
    163         for item in v:
    164             if item["barcode"] in distributeInfoCopy[k]:
    165                 distributeInfoCopy[k].remove(item["barcode"])
    166             elif k in result:
    167                 result[k].append(item["barcode"])
    168             else:
    169                 result[k] = [item["barcode"]]
    170 
    171     return result
    172 
    173 
    174 def get_distributed_but_not_recv_barcodes(distributeInfo, recvInfo):
    175     # 下发但是没有回收的条码
    176     def exists_in(barcode, itemList):
    177         # 判断下发的条码是否回收
    178         for item in itemList:
    179             if barcode == item["barcode"]:
    180                 return True
    181         return False
    182 
    183     def remove_from(barcode, itemList):
    184         # 从接收的数据副本中移除对应条码的记录
    185         targetIdx = None
    186         for item in itemList:
    187             if barcode == item["barcode"]:
    188                 target = item
    189                 break
    190 
    191         if target is not None:
    192             itemList.remove(target)
    193         else:
    194             raise Exception("No target exists, in get barcodes distributed but not received.")
    195 
    196     result = {}
    197     recvInfoCopy = {}
    198 
    199     for k, v in recvInfo.items():
    200         recvInfoCopy[k] = v.copy()
    201 
    202     for k, v in distributeInfo.items():
    203         for barcode in v:
    204             if exists_in(barcode, recvInfoCopy[k]):
    205                 remove_from(barcode, recvInfoCopy[k])
    206             elif k in result:
    207                 result[k].append(barcode)
    208             else:
    209                 result[k] = [barcode]
    210 
    211     return result
    212 
    213 
    214 
    215 if __name__ == "__main__":
    216     # 计算哪个日期的文件, 月日, 04-01
    217     if len(sys.argv) == 1:
    218         print("no input, stopped.")
    219         sys.exit(0)
    220 
    221     date = sys.argv[1]
    222 
    223     # 读取文件
    224     disFileRowCount, distributedInfo = read_distributed_info(date)
    225     recvFileRowCount, recvInfo = read_received_info(date)
    226 
    227     # 显示文件总行数
    228     print("distributed file lines count:", disFileRowCount)
    229     print("received    file lines count:", recvFileRowCount)
    230 
    231     # 显示下发和回收的总数量
    232     print("
    distributed count:", calculate_data_count(distributedInfo.values()))
    233     print("received    count:", calculate_data_count(recvInfo.values()))
    234 
    235     # 每个设备的下发和回收数的统计, 及差值
    236     print("
    count by machine:")
    237     print("machine | distributed-count | recv-count | recv-count - distributed-count")
    238     for machineCountInfo in calculate_count_by_machine(distributedInfo, recvInfo):
    239         print("{machineNo:2} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo))
    240 
    241     # 收到的条码有哪些重复, 和去重后的回收结果
    242     print("
    repeated recv barcode:
    machine | barcode | time")
    243     deDuplicationRecvInfo, repeatInfo = get_repeated_recv_barcodes(recvInfo)
    244     for item in repeatInfo:
    245         print("{:2} | {:10} | {}".format(*item))
    246     print("
    count:", len(repeatInfo))
    247 
    248     # 去掉重复之后的单设备统计比较
    249     print("
    count by machine after de duplication:")
    250     print("machine | distributed-count | recv-count | recv-count - distributed-count")
    251     diffCountInfo = {"distCount" : 0, "recvCount": 0 , "diff" : 0}
    252     for machineCountInfo in calculate_count_by_machine(distributedInfo, deDuplicationRecvInfo):
    253         diffCountInfo["distCount"] += machineCountInfo["distCount"]
    254         diffCountInfo["recvCount"] += machineCountInfo["recvCount"]
    255         diffCountInfo["diff"] += machineCountInfo["diff"]
    256         print("{machineNo:5} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo))
    257     print("count | {distCount:4} | {recvCount:4} | {diff:4}".format(**diffCountInfo))
    258 
    259     # 没下发但是有回收的条码
    260     print("
    received but not distributed barcodes:")
    261     recvNonDistributedCount = 0
    262     for k, v in get_recv_but_not_distributed_barcodes(distributedInfo, deDuplicationRecvInfo).items():
    263         print(k, ": count:", len(v))
    264         recvNonDistributedCount += len(v)
    265         for barcode in v:
    266             print("    [{}]".format(barcode))
    267     print("count: ", recvNonDistributedCount)
    268 
    269     # 下发后没回收的条码
    270     print("
    distributed but not received barcodes:")
    271     distributedNonRecvCount = 0
    272     for k, v in get_distributed_but_not_recv_barcodes(distributedInfo, deDuplicationRecvInfo).items():
    273         print(k, ": count:", len(v))
    274         distributedNonRecvCount += len(v)
    275         for barcode in v:
    276             print("    [{}]".format(barcode))
    277     print("count: ", distributedNonRecvCount)

    --------- THE END ---------

  • 相关阅读:
    教你如何有效防止DDos攻击?
    IsNumeric 判断字符串是否为数字(使用Val函数实现),这个函数相当于Java的IsNaN函数
    Delphi全角转半角
    SetLocalTime API函数设置本地时间(DateTimeToSystemTime函数,把TDateTime转换成TSystemTime)
    StyleBook皮肤控件的使用
    Qt 学习之路 2(19):事件的接受与忽略(当重写事件回调函数时,时刻注意是否需要通过调用父类的同名函数来确保原有实现仍能进行!有好几个例子。为什么要这么做?而不是自己去手动调用这两个函数呢?因为我们无法确认父类中的这个处理函数有没有额外的操作)
    C# 8.0、.NET Framework 4.8与NET Standard 2.1的一个说明
    基于PaaS和SaaS研发平台体系
    Redis 当成数据库在使用和可靠的分布式锁,Redlock 真的可行么?
    NET Core + Ocelot + IdentityServer4 + Consul
  • 原文地址:https://www.cnblogs.com/shadow-abyss/p/13383746.html
Copyright © 2011-2022 走看看