zoukankan      html  css  js  c++  java
  • 处理按第一列聚合第二列的代码示例

    #coding=gbk
    import os
    import sys
    import argparse
    import commands
    import logging
    
    
    status, username = commands.getstatusoutput('whoami')
    logging.basicConfig(level = logging.DEBUG,
            format=username + ' %(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
            datefmt = '%Y-%m-%d %H:%M:%S',
            filename = '.log' + __file__ ,
            filemode = 'a')
    
    def perror_and_exit(message, status = -1):
        sys.stderr.write(message + '\n')
        logging.info(message + 'sys exit')
        sys.exit(status)
    
    count = 0
    def handle_keywords(userid, keywords, fw):
        global count
        count += 1
        pass
    
    if __name__ == "__main__":
        directory = "/home/junfeng"
        if len(sys.argv) != 2:
            perror_and_exit("Usage:python %s keyword or idea" % (__file__ + ""))
        if sys.argv[1] == "keyword":
            filename = os.path.join(directory, "userid_showword.active.tradeid5401.20130329.txt")
            print filename
            fw = file("userid_showwords_brand.txt", "w")
            last_userid = None
            keywords = []
            for line in file(filename):
                line = line.strip().split()
                userid = line[0]
                keyword = line[1]
    
                if userid == last_userid:
                    keywords.append(keyword)
                else:
                    if len(keywords) > 0:
                        handle_keywords(last_userid, keywords, fw)
                        pass
                    last_userid = userid
                    keywords = [keyword]
            handle_keywords(last_userid, keywords, fw)
            print keywords
            global count
            print count
            pass
        elif sys.argv[1] == "idea":
            filename = os.path.join(directory, "userid_title_desc1_desc2.active.tradeid5401.20130329.txt")
            for line in file(filename):
                line = line.strip().split()
            pass
    


  • 相关阅读:
    next_permutation
    P1087 FBI树
    P4047 [JSOI2010]部落划分
    买礼物
    P2121 拆地毯
    Nebula Graph 在大规模数据量级下的实践和定制化开发
    深入了解kafka系列-消费者
    一分钟教你搭建WebRTC流媒体服务器Janus-gateway
    什么是"前端工程化"?
    斗鱼Juno 监控中心的设计与实现
  • 原文地址:https://www.cnblogs.com/xinyuyuanm/p/3003864.html
Copyright © 2011-2022 走看看