zoukankan      html  css  js  c++  java
  • mongodb mapredReduce 多个条件分组(group by)

    from:https://my.oschina.net/chiyong/blog/289138

    Mongodb 没有传统数据库的group函数,如果分组需要走MapReduce。这种MR与HadoopMR类似。下面看看Mongodb 的分组实现

    现在又一张 表它的数据格式如下: 

     "_id" : ObjectId("53b224e0a1ae72328a57702c"), 
     "title" : "SECJ0118E", 
     "criteria" : "未找到对应的错误码", 
     "actual" : "1", 
     "effect" : "可能引起重大问题", 
     "suggestion" : "请专家提供意见", 
     "severity" : "Normal", 
     "status" : "NotOK", 
     "rtype" : "FormLoginExte", 
     "comment" : "[8/2/12 17:28:21:231 GMT+08:00] 0000001e FormLoginExte E SECJ0118E: Authentication error during authentication for user rpt", 
     "category" : "logs", 
     "time" : "0008-02-12 17:28:21" 


     "_id" : ObjectId("53b224e0a1ae72328a577052"), 
     "title" : "", 
     "criteria" : "未找到对应的错误码", 
     "actual" : "1", 
     "effect" : "可能引起重大问题", 
     "suggestion" : "请专家提供意见", 
     "severity" : "Normal", 
     "status" : "NotOK", 
     "rtype" : "servlet", 
     "comment" : "[8/2/12 19:04:41:891 CST] 0000000b servlet E com.ibm.ws.webcontainer.servlet.ServletWrapper init Uncaught.init.exception.thrown.by.servlet", 
     "category" : "logs", 
     "time" : "0008-02-12 19:04:41" 


     "_id" : ObjectId("53b224e0a1ae72328a576fdc"), 
     "title" : "系统资源设置[processes]", 
     "criteria" : "unlimited", 
     "actual" : "unlimited", 
     "effect" : "如果对用户资源做了limits限制,有可能造成应用运行失败或系统性能下降。", 
     "suggestion" : "建议修改/etc/security/limits,编辑root相关参数部分都为-1。", 
     "severity" : "None", 
     "status" : "OK", 
     "rtype" : "系统参数设置检查", 
     "comment" : "", 
     "category" : "params" 

    1:单个条件分组 

     (1) 现在我们根据title进行分组 并且统计每个组的数量 

    db.runCommand({ mapreduce: "check_result", 
    map : function Map() { 
    //emit 函数中的key是唯一的,是分组条件 这里把title传递过来。
    emit( 
     this.title, 
     {count: 1} 
    ); 
    }, 
    reduce : function Reduce(key, values) { 
       total=0;//定义一个变量total , values是一个数组 
    for( var i in values){ 
     total +=values[i].count 


    return {"count":total}; 
    }, 
    finalize : function Finalize(key, reduced) { 
    return reduced; 
    }, 
    out : { inline : 1 } 
    }); 

    结果如下(结果中的_id键就是要分组的title 。value是分组后的值): 
     { "_id" : "" , "value" : { "count" : 113.0}} 
    { "_id" : "/tmp是否设置了t标志位" , "value" : { "count" : 21.0}} 
    { "_id" : "ASYN0080W" , "value" : { "count" : 120.0}} 
    { "_id" : "AppServer的JVM堆最大值" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的JVM堆最小值" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的JVM标准输出日志切换周期" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的JVM标准输出日志回滚类型" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的JVM标准错误日志切换周期" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的JVM标准错误日志回滚类型" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的WebContainer线程池最大值" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的WebContainer线程池最小值" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的通用JVM参数" , "value" : { "count" : 6.0}} 
    { "_id" : "AppServer的通用JVM参数-SystemGC" , "value" : { "count" : 6.0}} 
    { "_id" : "Audit是否打开" , "value" : { "count" : 21.0}} 
    { "_id" : "CWPKI0041W" , "value" : { "count" : 65.0}} 
    { "_id" : "CWPMC0017W" , "value" : { "count" : 7.0}} 
    { "_id" : "CWSAA0037W" , "value" : { "count" : 13.0}} 
    { "_id" : "Could not invoke an operation on object" , "value" : { "count" : 21.0}} 
    { "_id" : "DCSV0000W" , "value" : { "count" : 4.0}} 
    { "_id" : "DCSV1115W" , "value" : { "count" : 137.0}} 

    2:多个条件分组 


    (1) 现在我们根据title,status,severity进行分组 并且统计每个组的数量 
    db.runCommand({ mapreduce: "check_result", 
    map : function Map() { 
    //emit 函数中的key是唯一的,是分组条件 
    emit( 
    {"title":this.title,"status":this.status,"serverity":this.severity} 

     {count: 1} 
    ); 
    }, 
    reduce : function Reduce(key, values) { 
       total=0;//定义一个变量total , values是一个数组 
    for( var i in values){ 
     total +=values[i].count 


    return {"count":total}; 
    }, 
    finalize : function Finalize(key, reduced) { 
    return reduced; 
    }, 
    out : { inline : 1 } 
    }); 


    输出结果如下格式化: 

    { "_id" : { "title" : "" , "status" : "NotOK"} , "value" : { "count" : 113.0}} 
    { "_id" : { "title" : "/tmp是否设置了t标志位" , "status" : "NotOK"} , "value" : { "count" : 21.0}} 
    { "_id" : { "title" : "ASYN0080W" , "status" : "NotOK"} , "value" : { "count" : 120.0}} 
    { "_id" : { "title" : "AppServer的JVM堆最大值" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的JVM堆最小值" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的JVM标准输出日志切换周期" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的JVM标准输出日志回滚类型" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的JVM标准错误日志切换周期" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的JVM标准错误日志回滚类型" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的WebContainer线程池最大值" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的WebContainer线程池最小值" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的通用JVM参数" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "AppServer的通用JVM参数-SystemGC" , "status" : "NotOK"} , "value" : { "count" : 6.0}} 
    { "_id" : { "title" : "Audit是否打开" , "status" : "NotOK"} , "value" : { "count" : 21.0}} 
    { "_id" : { "title" : "CWPKI0041W" , "status" : "NotOK"} , "value" : { "count" : 65.0}} 
    { "_id" : { "title" : "CWPMC0017W" , "status" : "NotOK"} , "value" : { "count" : 7.0}} 
    { "_id" : { "title" : "CWSAA0037W" , "status" : "NotOK"} , "value" : { "count" : 13.0}} 
    { "_id" : { "title" : "Could not invoke an operation on object" , "status" : "NotOK"} , "value" : { "count" : 21.0}} 
    { "_id" : { "title" : "DCSV0000W" , "status" : "NotOK"} , "value" : { "count" : 4.0}} 
    { "_id" : { "title" : "DCSV1115W" , "status" : "NotOK"} , "value" : { "count" : 137.0}} 

  • 相关阅读:
    redis下载安装及php配置redis
    php--小数点问题
    php--0与空的判断
    php--判断是否是手机端
    php--ip的处理
    mysql--sql_mode报错整理
    mysql-建表、添加字段、修改字段、添加索引SQL语句写法
    Python-多任务复制文件夹
    Python学习笔记(十一)——赋值、深拷贝与浅拷贝
    Python学习笔记(十)—JSON格式的处理
  • 原文地址:https://www.cnblogs.com/94cool/p/6435481.html
Copyright © 2011-2022 走看看