zoukankan      html  css  js  c++  java
  • 统计日志的不同条数

    import collections
    import itertools
    import multiprocessing
    import bz2
    
    class MapReduce(object):
        def __init__(self,map_func,reduce_func,num_workers=None):
            self.map_func = map_func
            self.reduce_func = reduce_func
            self.pool = multiprocessing.Pool(num_workers)
    
        def partition(self,mapped_values):
            partition_data = collections.defaultdict(list)
            for key , value in  mapped_values:
                partition_data[key].append(value)
            return partition_data.items()
    
    
        def __call__(self, inputs,chunksize=1):
            mao_response = self.pool.map(self.map_func,inputs,chunksize=chunksize)
            partitioned_data = self.partition(itertools.chain(*mao_response))
            reduce_values = self.pool.map(self.reduce_func,partitioned_data)
            return reduce_values
    
    
    def mapper_match(one_file):
        output = []
        for line in bz2.BZ2File(one_file).readlines():
            line=line.rstrip().split()
            if line[3] == 'web' and line[5] == '0':
                output.append((line[4],1))
    
    def reduce_match(item):
        cookie,occurances = item
        return (cookie,sum(occurances))
    
    def mapper_count(item):
        _ , count = item
        return [(count,1)]
    
    def reducer_count(item):
        freq , occurances = item
        return ((freq,sum(occurances)))
    
    import glob
    import operator
    
    input_files='sssssss'
    
    mapper = MapReduce(mapper_match,reduce_match)
    cokkie_feq = mapper(input_files)
    mapper = MapReduce(reducer_count,reducer_count)
    cookie_fep = mapper(cokkie_feq)
    cookie_fep.sort (key = operator.itemgetter(1),reverse = True)
    for key ,value in cookie_fep:
        print(key,value)
    

      

  • 相关阅读:
    Individual Project
    最后的作业
    Reading Task 2 —— by12061154Joy
    Code Review —— by12061154Joy
    Pair Project —— Elevator Scheduler
    《移山之道》Reading Task——by12061154Joy
    Individual Project
    qa_model
    个人阅读作业2
    Personal Reading Assignment 2 -读推荐文章有感以及项目开发目前总结
  • 原文地址:https://www.cnblogs.com/1204guo/p/9166823.html
Copyright © 2011-2022 走看看