zoukankan      html  css  js  c++  java
  • mongo14-----group,aggregate,mapReduce

    group,aggregate,mapReduce
    
    分组统计: group()
    简单聚合: aggregate()
    强大统计: mapReduce()
    
    
    db.collection.group(document)
    document:{
                key:{key1:1,key2:1},  //根据那几个字段分组
                cond:{},  //筛选的条件
                reduce: function(curr,result) {  //分组之后的聚合运算,curr是一行数据,result是计算后的结果
                },
                initial:{},  //初始化result里面
                finalize:function() {  //reduce一组都执行完毕后最后执行的函数
                }
              }
              
    #计算每个栏目下(cat_id)的商品数 count()操作
    select  cat_id,count(*) from goods group by cat_id;  //mysql操作
    
    use shop
    db.goods.group(
    {
        key:{cat_id:1},   //根据哪个字段分组
        cond:{},          //所有行取出来,不加条件
        reduce:function(curr,result) {//reduce的执行过程:每一行就是一个curr,每一组共用一个result变量,
            result.cnt += 1;       //result.cnt是每组有多少行,每个组有一个result,
        },
        initial:{cnt:0}
        }
    ):
    [
        {
            "cat_id" : 4.0,
            "cnt" : 3.0
        },
        {
            "cat_id" : 8.0,
            "cnt" : 3.0
        },
        {
            "cat_id" : null,
            "cnt" : 2.0
        }
    ]
    
    
    
    
    #查询每个栏目下价格高于3500元的商品数量
    use shop
    db.goods.group(
    {
        key:{cat_id:1},   //cat_id分组,并且查出car_id和shop_price字段
        cond:{shop_price:{$gt:3500}},
        reduce:function(curr,result) {
            result.cnt += 1;
        },
        initial:{cnt:0}
    }
    ):
    [
        {
            "cat_id" : 3.0,
            "shop_price" : 5999.0,
            "cnt" : 1.0
        },
        {
            "cat_id" : 5.0,
            "shop_price" : 3700.0,
            "cnt" : 1.0
        }
    ]
    
    
    
    
    #查询每个栏目下价格大于3000元的商品个数
    {
        key:{cat_id:1},
        cond:{},
        reduce: function(curr,result) {
            result.total += 1;
        },
        initial:{total:0}
    }:
    [
        {
            "cat_id" : 4.0,
            "total" : 3.0
        },
        {
            "cat_id" : 8.0,
            "total" : 3.0
        },    
        {
            "cat_id" : null,
            "total" : 2.0
        }
    ]
    
    
    
    #计算每个栏目下的商品库存量 sum()操作
    select  sum(goods_number) from goods group by cat_id;
    
    use shop
    db.goods.group(
    {
        key:{cat_id:1},
        cond:{},
        reduce: function(curr,result) {
                result.total += curr.goods_number;
        },
        initial:{total:0}
    }
    ):
    [
        {
            "cat_id" : 4.0,
            "total" : 3.0
        },
        {
            "cat_id" : 8.0,
            "total" : 61.0
        },
        {
            "cat_id" : null,
            "total" : NaN
        }
    ]
    
    
    
    #查询每个栏目最贵的商品价格, max()操作
    select  max(shop_price) from goods group by cat_id;
    
    use shop
    db.goods.group(
    {
        key:{cat_id:1},
        cond:{},
        reduce:function(curr , result) {
            if(curr.shop_price > result.max) {
                result.max = curr.shop_price;
            }
        },
        initial:{max:0}
    }
    ):
    
    
    
    #查询每个栏目下商品的平均价格
    select cat_id,avg(shop_price) from goods group by cat_id;
    
    use shop
    db.goods.group(
    {
        key:{cat_id:1},          //相当于group by 
        cond:{},                       //相当于where
        reduce:function(curr , result) {             //相当于sum.avg函数
            result.cnt += 1;
            result.sum += curr.shop_price;
        },
        initial:{sum:0,cnt:0},                //进这个组执行一下
        finalize:function(result) {      //出这个组执行一下,   组操作完毕后的回调函数
            result.avg = result.sum/result.cnt;
        }
    }
    ):
    [
        {
            "cat_id" : 4.0,
            "sum" : 6891.0,
            "cnt" : 3.0,
            "avg" : 2297.0
        },
        {
            "cat_id" : 8.0,
            "sum" : 226.0,
            "cnt" : 3.0,
            "avg" : 75.3333333333333
        },
        {
            "cat_id" : null,
            "sum" : NaN,
            "cnt" : 2.0,
            "avg" : NaN
        }
    ]
    
    
    
    
    
    注意: 
    1:group需要我们手写聚合函数的业务逻辑
    2:group 不支持集群shard cluster, 无法分布式运算
    
    3:分布式可以用 aggregate() (version2.2) , 
    或者mapReduce() (version2.4)
    
    GROUP BY        $group
    HAVING            $match
    SELECT            $project
    ORDER BY        $sort
    LIMIT            $limit
    SUM()            $sum
    COUNT()            $sum
    
    
    #查询每个栏目下的商品数量
    select count(*) from goods group by cat_id;
    
    db.goods.aggregate(
    [
        {
            $group:{
                _id:"$cat_id",     //根据cad_id分组
                total:{$sum:1}     //乘以1
            }
        }     
    ]
    ):
    {
        "_id" : null,
        "total" : -2.0
    }
    {
        "_id" : 14.0,
        "total" : -2.0
    }
    {
        "_id" : 2.0,
        "total" : -1.0
    }
    {
        "_id" : 13.0,
        "total" : -2.0
    }
    
    
    
    #查询goods下有多少条商品,select count(*) from goods
    [
    {$group:{_id:null,total:{$sum:1}}}
    ];
    {
        "_id" : null,
        "total" : 33.0
    }
    
    
    
    #查询每个栏目下 价格大于3000元的商品个数
    use shop
    db.goods.aggregate(
    [
        {$match:{shop_price:{$gt:3000}}},
        {$group:{_id:"$cat_id",total:{$sum:1}}}
    ]
    ):
    {
        "_id" : 5.0,
        "total" : 1.0
    }
    {
        "_id" : 3.0,
        "total" : 2.0
    }
    
    
    
    
    #查询每个栏目下 价格大于50元的商品个数
    #并筛选出"满足条件的商品个数" 大于等于3的栏目 
    select cat_id,count(*) as cnt from goods where shop_price>3000 group by cat_id having cnt>=2
    
    
    use shop
    db.goods.aggregate(
    [
        {$match:{shop_price:{$gt:3000}}},    //放在group之前是where
        {$group:{_id:"$cat_id",total:{$sum:1}}},
        {$match:{total:{$gte:2}}}             //放在group之后是having
    ]
    ):
    {
        "_id" : 3.0,
        "total" : 2.0
    }
    
    
    
    
    
    #查询每个栏目下的库存量
    use shop
    db.goods.aggregate(
    [
        {$group:{_id:"$cat_id" , total:{$sum:"$goods_number"}}},   //cat_id分组,goods_number求和,
    ]
    ):
    {
        "_id" : 5.0,
        "total" : 8.0
    }
    {
        "_id" : 15.0,
        "total" : 2.0
    }
    
    
    
    
    #查询每个栏目下的库存量,并按库存量排序
    use shop
    db.goods.aggregate(
    [
    {$group:{_id:"$cat_id" , total:{$sum:"$goods_number"}}},
    {$sort:{total:1}}         //1是升序
    ]
    )
    
    
    
    #查询每个栏目下的库存量,并按库存量排序
    use shop
    db.goods.aggregate(
    [
        {$group:{_id:"$cat_id" , total:{$sum:"$goods_number"}}},
        {$sort:{total:1}},
        {$limit:3}       //取前3个
    ]
    ):
    {
        "_id" : null,
        "total" : 0
    }
    {
        "_id" : 2.0,
        "total" : 0.0
    }
    {
        "_id" : 15.0,
        "total" : 2.0
    }
    
    
    
    #查询每个栏目的商品平均价格,并按平均价格由高到低排序
    select cat_id ,avg(shop_price) as pj from goods group by cat_id order by pj desc limit 3
    
    use shop
    db.goods.aggregate(
    [
        {$group:{_id:"$cat_id" , avg:{$avg:"$shop_price"}}},     //car_id排序,shop_price求平均,
        {$sort:{avg:-1}},
        {$limit:3}       
    ]
    ):
    {
        "_id" : 5.0,
        "avg" : 3700.0
    }
    {
        "_id" : 4.0,
        "avg" : 2297.0
    }
    {
        "_id" : 3.0,
        "avg" : 1746.06666666667
    }
    mapReduce 随着"大数据"概念而流行,mapReduce的真正强项在于分布式。
    其实mapReduce的概念非常简单,比aggregate要简单,从功能上说,相当于RDBMS(传统数据库)的 group 操作。
    
    当数据非常大时,像google,有N多数据中心,数据都不在地球的一端,用group力所不及.group既然不支持分布式, 由于单台服务器的运算能力必然是有限的.
    
    而mapRecuce支持分布式(不是算法好),而是支持大量的服务器同时工作,用蛮力来统计.mapRecuce就是group和aggregate,只不过支持分布式。
    
    mapRecuce的工作过程:1.map-->映射,2.reduce->归约
    
    map: 1.先在全世界机器找(分布式集群上找),把属于同一个组的数据,映射到一个数组上.cat_id [23,2,6,7]2.reduce: 把数组(同一组)的数据,进行运算.
    
    
    
    
    
    #用mapReduce计算每个栏目的库存总量
    
    //map函数(进行映射工作,映射成一个二维数组)
    var map = function() {
        emit(this.cat_id,this.goods_number);  //根据cat_id分组,
    }
    
    /*
    {
        cat_id1:[goods_number1,goods_number2,goods_number3.....],
        cat_id2:[goods_number1,goods_number2,goods_number3.....]
        cat_id3:[goods_number1,goods_number2,goods_number3.....]
    }
    */
    
    var reduce = function(cat_id,numbers) {   //对数组做处理,求goods_number的和,
        return Array.sum(numbers);    //mongo对js的数组增加的求和方法
    }
    
    /*
    {
        _id:cat_id1, value:goods_number1+goods_number2+goods_number3.....,
        _id:cat_id1, value:goods_number1+goods_number2+goods_number3.....,
        _id:cat_id1, value:goods_number1+goods_number2+goods_number3.....,
    }
    */
    
    db.goods.mapReduce(map,reduce,{out:'res'});    //out计算的结果放在res集合里面去,
    //多了一个res表
    show tables
    db.res.find():
    {
        "_id" : null,
        "value" : NaN
    }
    {
        "_id" : 2.0,
        "value" : 0.0
    }
    {
        "_id" : 3.0,
        "value" : 203.0
    }
    {
        "_id" : 4.0,
        "value" : 3.0
    }
    {
        "_id" : 15.0,
        "value" : 2.0
    }
    
    //查看array的所有方法:
    for (var k in Array){
            print(k)
    }:
    contains
    unique
    shuffle
    tojson
    fetchRefs
    sum
    avg
    stdDev
    
    
    
    
    #用mapReduce计算每个栏目下商品的平均价格
    var map = function() {
        emit(this.cat_id,this.shop_price);
    }
    var reduce = function(cat_id,values) {
        return Array.avg(values);
    }
    db.goods.mapReduce(map,reduce,{out:'res'});
    :
    {
        "_id" : null,
        "value" : NaN
    }
    {
        "_id" : 2.0,
        "value" : 823.33
    }
    {
        "_id" : 3.0,
        "value" : 1746.06666666667
    }
    var map = function() {
        if(this.jing < 0 || this.wei < 0){
            return;
        }
        var j = Math.floor(this.jing/5)*5;
        var w = Math.floor(this.wei/5)*5;
        var block = j+":"+w;
        emit(block,1);
    }
    var reduce = function(block,values) {
        return Array.sum(values);
    }
    db.goods.mapReduce(map,reduce,{out:'res'});
  • 相关阅读:
    黑马前端2020就业Web全套课-2020.4月最新版
    什么是Redis雪崩、穿透和击穿? 全面掌握Redis
    ElasticStack高级搜索入门到项目实战,Elasticsearch全文检索
    阿里云盘邀请码+软件下载
    Intellij IDEA超实用设置汇总,高效便捷敲代码
    双11的亿级高并发架构,是怎么设计的?
    TensorFlow 卷积神经网络实用指南 | iBooker·ApacheCN
    TensorFlow 入门 | iBooker·ApacheCN
    TensorFlow 2.0 快速入门指南 | iBooker·ApacheCN
    深度学习快速参考 | iBooker·ApacheCN
  • 原文地址:https://www.cnblogs.com/yaowen/p/8178050.html
Copyright © 2011-2022 走看看