mongodb有3个方法实现数据的分析与统计,分别是group分组统计、aggregate简单聚合、mapReduce强大统计。group 函数需要自己实现逻辑,所以它很灵活,但是有一个缺点就是不支持分片,不支持数据的分布式运算,在下面的章节中再分析支持share分片的方法。下面先说一下 group 函数的语法构成:
db.collection.group(document)
document格式:
{
key:{key1:1,key2:1},
cond:{},
reduce:function(curr,result){
},
initial:{},
finalize:function(){
}
}
说明:
key:分组字段,是对谁进行分组,相当于关系型数据库中的group by。
cond:查询条件,相当于where。
reduce:聚合函数,是自己的实现逻辑,也就是在分组的结果中做什么。
initial:初始化,可以理解为进入分组先做什么。
finalize:统计一组后的回调函数,理解为跳出分组后做什么,在reduce后执行。
下面用具体列子说明。
1、准备数据:将下面31条数据写入shop库的goods表
[{"goods_id":1,"cat_id":4,"goods_name":"KD876","goods_number":1,"click_count":7,"shop_price":1388.00,"add_time":1240902890},{"goods_id":4,"cat_id":8,"goods_name":"u8bfau57fau4e9aN85u539fu88c5u5145u7535u5668","goods_number":17,"click_count":0,"shop_price":58.00,"add_time":1241422402},{"goods_id":3,"cat_id":8,"goods_name":"u8bfau57fau4e9au539fu88c55800u8033u673a","goods_number":24,"click_count":3,"shop_price":68.00,"add_time":1241422082},{"goods_id":5,"cat_id":11,"goods_name":"u7d22u7231u539fu88c5M2u5361u8bfbu5361u5668","goods_number":8,"click_count":3,"shop_price":20.00,"add_time":1241422518},{"goods_id":6,"cat_id":11,"goods_name":"u80dcu521bKINGMAXu5185u5b58u5361","goods_number":15,"click_count":0,"shop_price":42.00,"add_time":1241422573},{"goods_id":7,"cat_id":8,"goods_name":"u8bfau57fau4e9aN85u539fu88c5u7acbu4f53u58f0u8033u673aHS-82","goods_number":20,"click_count":0,"shop_price":100.00,"add_time":1241422785},{"goods_id":8,"cat_id":3,"goods_name":"u98deu5229u6d669@9v","goods_number":1,"click_count":9,"shop_price":399.00,"add_time":1241425512},{"goods_id":9,"cat_id":3,"goods_name":"u8bfau57fau4e9aE66","goods_number":4,"click_count":20,"shop_price":2298.00,"add_time":1241511871},{"goods_id":10,"cat_id":3,"goods_name":"u7d22u7231C702c","goods_number":7,"click_count":11,"shop_price":1328.00,"add_time":1241965622},{"goods_id":11,"cat_id":3,"goods_name":"u7d22u7231C702c","goods_number":1,"click_count":0,"shop_price":1300.00,"add_time":1241966951},{"goods_id":12,"cat_id":3,"goods_name":"u6469u6258u7f57u62c9A810","goods_number":8,"click_count":13,"shop_price":983.00,"add_time":1245297652}] [{"goods_id":13,"cat_id":3,"goods_name":"u8bfau57fau4e9a5320 XpressMusic","goods_number":8,"click_count":13,"shop_price":1311.00,"add_time":1241967762},{"goods_id":14,"cat_id":4,"goods_name":"u8bfau57fau4e9a5800XM","goods_number":1,"click_count":6,"shop_price":2625.00,"add_time":1241968492},{"goods_id":15,"cat_id":3,"goods_name":"u6469u6258u7f57u62c9A810","goods_number":3,"click_count":8,"shop_price":788.00,"add_time":1241968703},{"goods_id":16,"cat_id":2,"goods_name":"u6052u57fau4f1fu4e1aG101","goods_number":0,"click_count":3,"shop_price":823.33,"add_time":1241968949},{"goods_id":17,"cat_id":3,"goods_name":"u590fu65b0N7","goods_number":1,"click_count":2,"shop_price":2300.00,"add_time":1241969394},{"goods_id":18,"cat_id":4,"goods_name":"u590fu65b0T5","goods_number":1,"click_count":0,"shop_price":2878.00,"add_time":1241969533},{"goods_id":19,"cat_id":3,"goods_name":"u4e09u661fSGH-F258","goods_number":12,"click_count":7,"shop_price":858.00,"add_time":1241970139},{"goods_id":20,"cat_id":3,"goods_name":"u4e09u661fBC01","goods_number":12,"click_count":14,"shop_price":280.00,"add_time":1241970417},{"goods_id":21,"cat_id":3,"goods_name":"u91d1u7acb A30","goods_number":40,"click_count":4,"shop_price":2000.00,"add_time":1241970634},{"goods_id":22,"cat_id":3,"goods_name":"u591au666eu8fbeTouch HD","goods_number":1,"click_count":15,"shop_price":5999.00,"add_time":1241971076}] [{"goods_id":23,"cat_id":5,"goods_name":"u8bfau57fau4e9aN96","goods_number":8,"click_count":17,"shop_price":3700.00,"add_time":1241971488},{"goods_id":24,"cat_id":3,"goods_name":"P806","goods_number":100,"click_count":35,"shop_price":2000.00,"add_time":1241971981},{"goods_id":25,"cat_id":13,"goods_name":"u5c0fu7075u901a/u56fau8bdd50u5143u5145u503cu5361","goods_number":2,"click_count":0,"shop_price":48.00,"add_time":1241972709},{"goods_id":26,"cat_id":13,"goods_name":"u5c0fu7075u901a/u56fau8bdd20u5143u5145u503cu5361","goods_number":2,"click_count":0,"shop_price":19.00,"add_time":1241972789},{"goods_id":27,"cat_id":15,"goods_name":"u8054u901a100u5143u5145u503cu5361","goods_number":2,"click_count":0,"shop_price":95.00,"add_time":1241972894},{"goods_id":28,"cat_id":15,"goods_name":"u8054u901a50u5143u5145u503cu5361","goods_number":0,"click_count":0,"shop_price":45.00,"add_time":1241972976},{"goods_id":29,"cat_id":14,"goods_name":"u79fbu52a8100u5143u5145u503cu5361","goods_number":0,"click_count":0,"shop_price":90.00,"add_time":1241973022},{"goods_id":30,"cat_id":14,"goods_name":"u79fbu52a820u5143u5145u503cu5361","goods_number":9,"click_count":1,"shop_price":18.00,"add_time":1241973114},{"goods_id":31,"cat_id":3,"goods_name":"u6469u6258u7f57u62c9E8 ","goods_number":1,"click_count":5,"shop_price":1337.00,"add_time":1242110412},{"goods_id":32,"cat_id":3,"goods_name":"u8bfau57fau4e9aN85","goods_number":4,"click_count":9,"shop_price":3010.00,"add_time":1242110760}]
2、示例:查询每个栏目下的商品数据
> db.goods.group({ ... key:{cat_id:1}, ... cond:{}, ... reduce:function(curr,result){ ... result.cnt+=1; ... }, ... initial:{cnt:0} ... })
结果:
[ { "cat_id" : 4, "cnt" : 3 }, { "cat_id" : 8, "cnt" : 3 }, { "cat_id" : 11, "cnt" : 2 }, { "cat_id" : 3, "cnt" : 15 }, { "cat_id" : 2, "cnt" : 1 }, { "cat_id" : 5, "cnt" : 1 }, { "cat_id" : 13, "cnt" : 2 }, { "cat_id" : 15, "cnt" : 2 }, { "cat_id" : 14, "cnt" : 2 } ]
3、示例:查询每个栏目下价格高于50元的商品数量
> db.goods.group({ ... key:{cat_id:1}, ... cond:{shop_price:{$gt:50}}, ... reduce:function(curr,result){ ... result.cnt+=1; ... }, ... initial:{cnt:0} ... })
结果:
[ { "cat_id" : 4, "cnt" : 3 }, { "cat_id" : 8, "cnt" : 3 }, { "cat_id" : 3, "cnt" : 15 }, { "cat_id" : 2, "cnt" : 1 }, { "cat_id" : 5, "cnt" : 1 }, { "cat_id" : 15, "cnt" : 1 }, { "cat_id" : 14, "cnt" : 1 } ]
4、示例:查询每个栏目下的商品库存量
> db.goods.group({ ... key:{cat_id:1}, ... cond:{}, ... reduce:function(curr,result){ ... result.num+=curr.goods_number ... }, ... initial:{ ... num:0 ... } ... } ... )
结果:
[ { "cat_id" : 4, "num" : 3 }, { "cat_id" : 8, "num" : 61 }, { "cat_id" : 11, "num" : 23 }, { "cat_id" : 3, "num" : 203 }, { "cat_id" : 2, "num" : 0 }, { "cat_id" : 5, "num" : 8 }, { "cat_id" : 13, "num" : 4 }, { "cat_id" : 15, "num" : 2 }, { "cat_id" : 14, "num" : 9 } ]
5、示例:查询每个栏目下最贵的商品
> db.goods.group({ ... key:{cat_id:1}, ... cond:{}, ... reduce:function(curr,result){ ... if(result.max<curr.shop_price){ ... result.max=curr.shop_price; ... } ... }, ... initial:{max:0} ... })
结果:
[ { "cat_id" : 4, "max" : 2878 }, { "cat_id" : 8, "max" : 100 }, { "cat_id" : 11, "max" : 42 }, { "cat_id" : 3, "max" : 5999 }, { "cat_id" : 2, "max" : 823.33 }, { "cat_id" : 5, "max" : 3700 }, { "cat_id" : 13, "max" : 48 }, { "cat_id" : 15, "max" : 95 }, { "cat_id" : 14, "max" : 90 } ]
6、示例:查询每个栏目下商品的平均价格
> db.goods.group({ ... key:{cat_id:1}, ... cond:{}, ... reduce:function(curr,result){ ... result.cnt+=1; ... result.sum+=curr.shop_price; ... }, ... initial:{sum:0,cnt:0}, ... finalize:function(result){ ... result.avg=result.sum/result.cnt; ... } ... })
结果:
[ { "cat_id" : 4, "sum" : 6891, "cnt" : 3, "avg" : 2297 }, { "cat_id" : 8, "sum" : 226, "cnt" : 3, "avg" : 75.33333333333333 }, { "cat_id" : 11, "sum" : 62, "cnt" : 2, "avg" : 31 }, { "cat_id" : 3, "sum" : 26191, "cnt" : 15, "avg" : 1746.0666666666666 }, { "cat_id" : 2, "sum" : 823.33, "cnt" : 1, "avg" : 823.33 }, { "cat_id" : 5, "sum" : 3700, "cnt" : 1, "avg" : 3700 }, { "cat_id" : 13, "sum" : 67, "cnt" : 2, "avg" : 33.5 }, { "cat_id" : 15, "sum" : 140, "cnt" : 2, "avg" : 70 }, { "cat_id" : 14, "sum" : 108, "cnt" : 2, "avg" : 54 } ]
可以看出最后返回的json格式的键包括了分组的字段和result中用的字段。在initial中初始化的键,reduce中可以用,结果中可以返回,聚合函数reduce有两个参数,一个是curr,可以根据当前遍历数据的键取值,一个是result,可以取初始的值进行运算,不断的修改这个值。回调函数finalize有一个参数result,接收reduce计算的结果。