zoukankan      html  css  js  c++  java
  • 找出n个数中重复最多的10个数

    题目很清晰,直接上python代码

    import pandas as pd
    import copy
    
    class BenchMark:
        def __init__(self):
            self.MIN = 10000
            self.data = 0
        def Reset(self):
            self.MIN = 10000
            self.data = 0
    
    dictCounts = {}
    dictTop10_D2C = {}
    BENCH_MARK = BenchMark()
    LAST_BENCH_MARK = BenchMark()
    run_count1 = 0
    run_count2 = 0
    
    def FindTop10(data):
        global BENCH_MARK, LAST_BENCH_MARK,run_count1,run_count2
        if(data in dictCounts):
            dictCounts[data] += 1
        else:
            dictCounts[data] = 1
    
        temp = dictCounts[data]
        
        #just record run times
        run_count1 += 1
        
        if LAST_BENCH_MARK.MIN != 10000 and temp< LAST_BENCH_MARK.MIN:
            return
    
        dictTop10_D2C[data] = temp
    
        if len(dictTop10_D2C)>10:
            BENCH_MARK.Reset()
            for item in dictTop10_D2C:
                
                #just record run times
                run_count2+=1
                
                if dictTop10_D2C[item] < BENCH_MARK.MIN:
                    BENCH_MARK.MIN = dictTop10_D2C[item]
                    BENCH_MARK.data = item
            LAST_BENCH_MARK = copy.deepcopy(BENCH_MARK)
            dictTop10_D2C.pop(BENCH_MARK.data)
    
    def PrintData2Count(aDict):
        for key in aDict:
            print('%.1f:%d' % (key, aDict[key]))
    
    if __name__ == '__main__':
        df = pd.read_csv('D:/data/ctp_data/rb/201709/rb1801_20170905.csv')
        for data in df['LastPx']:
            FindTop10(data)
    
        PrintData2Count(dictCounts)
        print("==============dictCounts length:", len(dictCounts))
        PrintData2Count(dictTop10_D2C)
    
        print("run_count1:%d,run_count2:%d" %(run_count1,run_count2))

    运行结果如下:

    。。。。。。

    4121.0:206
    4123.0:278
    4124.0:180
    4122.0:244
    4125.0:118
    4126.0:34
    4127.0:4
    4081.0:1366
    4080.0:1073
    4077.0:1072
    4078.0:1091
    4079.0:800
    4076.0:874
    4075.0:886
    4074.0:1108
    4071.0:719
    4073.0:1281
    4072.0:1049
    4070.0:567
    4069.0:442
    4068.0:290
    4067.0:199
    4066.0:204
    4065.0:109
    4064.0:60
    4063.0:80
    4062.0:57
    4061.0:70
    4060.0:70
    4059.0:32
    4057.0:6
    4058.0:22
    4129.0:6
    4137.0:2
    4135.0:2
    4133.0:2
    ==============dictCounts length: 75
    4109.0:2080
    4108.0:2047
    4095.0:3009
    4096.0:2785
    4094.0:2265
    4099.0:2573
    4098.0:2702
    4097.0:2491
    4100.0:2147
    4107.0:1809
    run_count1:70684,run_count2:19679

  • 相关阅读:
    taro 填坑之路(一)taro 项目回顾
    Redux遵循的三个原则是什么?
    解释一下 Flux
    MVC框架的主要问题是什么?
    与 ES5 相比,React 的 ES6 语法有何不同?
    你了解 Virtual DOM 吗?解释一下它的工作原理
    DOM 事件有哪些阶段?谈谈对事件代理的理解
    CSS:用Less实现栅格系统
    .NET:国际化和本地化
    自定义工作流 之 模型设计与实现
  • 原文地址:https://www.cnblogs.com/xuyuan77/p/7612941.html
Copyright © 2011-2022 走看看