zoukankan      html  css  js  c++  java
  • 【Mongodb】聚合运算

    Mongodb高手课笔记

    聚合查询

    Mongodb可以进行一系列的聚合查询,从而计算出用户需要的结果
    整个聚合运算的过程称为管道Pipeline,它是由多个步骤stage组成的.就像是过滤器,经过每多次过滤得到计算结果.

    聚合运算的基本格式

    pipeline = [$stage1, $stage2, ...$stageN];
    db.<COLLECTION>.aggregate(
          pipeline,
          { options }
    );
    

    常见步骤:

    步骤 作用 SQL等价运算符
    $match 过滤 WHERE
    $project 投影 AS
    $sort 排序 ORDER BY
    $group 分组 GROUP BY
    $skip $limit 结果限制 SKIP/LIMIT
    $lookup 左外连接 LEFT OUTER JOIN
    $unwind 展开数组 N/A
    $graphLookup 图搜索 N/A
    $facet $bucket 分面搜索 N/A

    常见步骤中的运算符:

    $match 
    •$eq $gt $gte $lt $lte
    •$and $or $not $in
    •$geoWithin $intersect
    
    $project
    •选择需要的或排除不需要的字段
    • $map $reduce $filter
    •$range
    •$multiply $divide $substract $add
    •$year $month $dayOfMonth $hour $minute $second
    
    $group
    •$sum $avg
    •$push $addToSet
    •$first $last $max $min
    

    MQL 常用步骤与 SQL 对比

    SELECT
    FIRST_NAME AS `名`,
    LAST_NAME AS `姓`
    FROM Users
    WHERE GENDER = '男'
    SKIP 100
    LIMIT 20
    
    db.users.aggregate([
    {$match: {gender: ’’男”}},
    {$skip: 100},
    {$limit: 20},
    {$project: {
    '名': '$first_name',
    '姓': '$last_name'
    }}
    ]);
    
    SELECT DEPARTMENT,
    COUNT(NULL) AS EMP_QTY
    FROM Users
    WHERE GENDER = '女'
    GROUP BY DEPARTMENT HAVING
    COUNT(*) < 10
    
    db.users.aggregate([
    {$match: {gender: '女'}},
    {$group: {
          _id: '$DEPARTMENT',
          emp_qty: {$sum: 1}
    }},
    {$match: {emp_qty: {$lt: 10}}}
    ]
    

    MQL 特有步骤

    $unwind

    如查询出一条订单记录
    db.orders.findOne();

    {
    	"_id" : ObjectId("5dbe7a545368f69de2b4d36e"),
    	"street" : "493 Hilll Curve",
    	"city" : "Champlinberg",
    	"state" : "Texas",
    	"country" : "Malaysia",
    	"zip" : "24344-1715",
    	"phone" : "425.956.7743 x4621",
    	"name" : "Destinee Schneider",
    	"userId" : 3573,
    	"orderDate" : ISODate("2019-03-26T03:20:08.805Z"),
    	"status" : "created",
    	"shippingFee" : NumberDecimal("8.00"),
    	"orderLines" : [
    		{
    			"product" : "Refined Fresh Tuna",
    			"sku" : "2057",
    			"qty" : 25,
    			"price" : NumberDecimal("56.00"),
    			"cost" : NumberDecimal("46.48")
    		},
    		{
    			"product" : "Refined Concrete Ball",
    			"sku" : "1738",
    			"qty" : 61,
    			"price" : NumberDecimal("47.00"),
    			"cost" : NumberDecimal("47")
    		},
    		{
    			"product" : "Rustic Granite Towels",
    			"sku" : "500",
    			"qty" : 62,
    			"price" : NumberDecimal("74.00"),
    			"cost" : NumberDecimal("62.16")
    		},
    		{
    			"product" : "Refined Rubber Salad",
    			"sku" : "1400",
    			"qty" : 73,
    			"price" : NumberDecimal("93.00"),
    			"cost" : NumberDecimal("87.42")
    		},
    		{
    			"product" : "Intelligent Wooden Towels",
    			"sku" : "5674",
    			"qty" : 72,
    			"price" : NumberDecimal("84.00"),
    			"cost" : NumberDecimal("68.88")
    		},
    		{
    			"product" : "Refined Steel Bacon",
    			"sku" : "5009",
    			"qty" : 8,
    			"price" : NumberDecimal("53.00"),
    			"cost" : NumberDecimal("50.35")
    		}
    	],
    	"total" : NumberDecimal("407")
    }
    
    

    如果使用$unwind,可以把Bson数据里面的数组展开,这个功能是不是很强大呢,可以用来做报表之类的

    db.orders.aggregate([{$limit: 1},{ $unwind: "$orderLines"}]);
    

    结果

    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Refined Fresh Tuna", "sku" : "2057", "qty" : 25, "price" : NumberDecimal("56.00"), "cost" : NumberDecimal("46.48") }, "total" : NumberDecimal("407") }
    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Refined Concrete Ball", "sku" : "1738", "qty" : 61, "price" : NumberDecimal("47.00"), "cost" : NumberDecimal("47") }, "total" : NumberDecimal("407") }
    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Rustic Granite Towels", "sku" : "500", "qty" : 62, "price" : NumberDecimal("74.00"), "cost" : NumberDecimal("62.16") }, "total" : NumberDecimal("407") }
    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Refined Rubber Salad", "sku" : "1400", "qty" : 73, "price" : NumberDecimal("93.00"), "cost" : NumberDecimal("87.42") }, "total" : NumberDecimal("407") }
    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Intelligent Wooden Towels", "sku" : "5674", "qty" : 72, "price" : NumberDecimal("84.00"), "cost" : NumberDecimal("68.88") }, "total" : NumberDecimal("407") }
    { "_id" : ObjectId("5dbe7a545368f69de2b4d36e"), "street" : "493 Hilll Curve", "city" : "Champlinberg", "state" : "Texas", "country" : "Malaysia", "zip" : "24344-1715", "phone" : "425.956.7743 x4621", "name" : "Destinee Schneider", "userId" : 3573, "orderDate" : ISODate("2019-03-26T03:20:08.805Z"), "status" : "created", "shippingFee" : NumberDecimal("8.00"), "orderLines" : { "product" : "Refined Steel Bacon", "sku" : "5009", "qty" : 8, "price" : NumberDecimal("53.00"), "cost" : NumberDecimal("50.35") }, "total" : NumberDecimal("407") }
    
    

    $bucket

    $bucket可以用来做目录,比如淘宝筛选栏里面会有价格区间

    db.products.aggregate([{
    $bucket:{
          groupBy: "$price",
          boundaries: [0,10,20,30,40],
          default: "Other",
          output:{"count":{$sum:1}}
    }
    }])
    
    
    

    db.products.aggregate([{
    $facet:{
          price:{
          $bucket:{…}
    },
    year:{
          $bucket:{…}
          }
    }
    }])
    

    聚合运算实战

    聚合实验数据模型

    dump
    链接: https://pan.baidu.com/s/1lMtn_OgE6wLQEyK4J39NAg 提取码: t7r2

    {
    	"_id" : ObjectId("5dbe7a545368f69de2b4d36e"),
    	"street" : "493 Hilll Curve",
    	"city" : "Champlinberg",
    	"state" : "Texas",
    	"country" : "Malaysia",
    	"zip" : "24344-1715",
    	"phone" : "425.956.7743 x4621",
    	"name" : "Destinee Schneider",
    	"userId" : 3573,
    	"orderDate" : ISODate("2019-03-26T03:20:08.805Z"),
    	"status" : "created",
    	"shippingFee" : NumberDecimal("8.00"),
    	"orderLines" : [
    		{
    			"product" : "Refined Fresh Tuna",
    			"sku" : "2057",
    			"qty" : 25,
    			"price" : NumberDecimal("56.00"),
    			"cost" : NumberDecimal("46.48")
    		},
    		{
    			"product" : "Refined Concrete Ball",
    			"sku" : "1738",
    			"qty" : 61,
    			"price" : NumberDecimal("47.00"),
    			"cost" : NumberDecimal("47")
    		},
    		{
    			"product" : "Rustic Granite Towels",
    			"sku" : "500",
    			"qty" : 62,
    			"price" : NumberDecimal("74.00"),
    			"cost" : NumberDecimal("62.16")
    		},
    		{
    			"product" : "Refined Rubber Salad",
    			"sku" : "1400",
    			"qty" : 73,
    			"price" : NumberDecimal("93.00"),
    			"cost" : NumberDecimal("87.42")
    		},
    		{
    			"product" : "Intelligent Wooden Towels",
    			"sku" : "5674",
    			"qty" : 72,
    			"price" : NumberDecimal("84.00"),
    			"cost" : NumberDecimal("68.88")
    		},
    		{
    			"product" : "Refined Steel Bacon",
    			"sku" : "5009",
    			"qty" : 8,
    			"price" : NumberDecimal("53.00"),
    			"cost" : NumberDecimal("50.35")
    		}
    	],
    	"total" : NumberDecimal("407")
    }
    

    聚合操作:

    1. 场景: 查找所有订单的总金额
    db.orders.aggregate([{$group: {_id: null, total: {$sum: "$total"}}}]);
    

    结果

    { "_id" : null, "total" : NumberDecimal("44019609") }
    
    1. 查询2019年第一季度(1月1日~3月31日)已完成订单(completed)的订单总金额和订单总数
    db.orders.aggregate([
        {
            $match: { 
                status: "completed",  
                orderDate: { 
                    $gte: ISODate("2019-01-01"), 
                    $lte:  ISODate("2019-03-31") 
                } 
            }
        },
        {
            $group: {
                _id: null, 
                total: {$sum: "$total"}, 
                shippingFee: {$sum: "$shippingFee"}, 
                count: {$sum: 1 }  
            }
        },
        {
            $project: {
                grandTotal: {
                    $add: ["$total", "$shippingFee" ]
                },
                count: 1,
                _id: 0
            }
        }
    ]);
    

    结果

    { "count" : 5818, "grandTotal" : NumberDecimal("2612553.00") }
    
  • 相关阅读:
    Scrapy学习-7-数据存储至数据库
    Scrapy学习-6-JSON数据处理
    Scrapy学习-5-下载图片实例
    Scrapy学习-4-Items类&Pipelines类
    Scrapy学习-3-Request回调巧用
    Scrapy学习-2-xpath&css使用
    iTunes.exe 在win7系统中运行出错解决办法
    [转]手机浏览器的User-Agent汇总
    apple mobile device服务无法启动,错误1053 解决
    [转]EF 4.1 Code First
  • 原文地址:https://www.cnblogs.com/amberbar/p/13203284.html
Copyright © 2011-2022 走看看