zoukankan      html  css  js  c++  java
  • mongodb聚合操作$使用例子

    $操作符使用

    $操作符一般用作
    

    == index a ==

    1. $addField 增加一个字段

    使用场景一:查询的结果中增加字段

    数据类型如下

    {
       "_id" : ObjectId("610cd5a0702aac3e62a77a13"),
       "name" : "zhang",
       "intro" : "a"
    }
    
    /* 2 */
    {
       "_id" : ObjectId("610cd5a4702aac3e62a77a14"),
       "name" : "zhang",
       "intro" : "b"
    }
    
    /* 3 */
    {
       "_id" : ObjectId("610cd5a6702aac3e62a77a15"),
       "name" : "zhang",
       "intro" : "b"
    }
    
    

    demo1:返回的结果中增加一个字段,保存执行的值

    match_dict = {"$match":{"name": "zhang"}}
    add_dict = {"$addFields": {"intro_list":"$intro"}}
    ret = db["test"].aggregate([match_dict,add_dict])
    print(list(ret))
    # 结果
    [{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'intro_list': 'a'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}]
    
    

    demo2:将一个值类型转换后,保存到增加的字段

    match_dict = {"$match": {"name": "zhang"}}
    # 将查询结果的值进行转换,保存到一个指定的字段中,多用于lookup时候。
    add_dict = {"$addFields": {"str_id": {"$toString":"$_id"}}}
    ret = db["test"].aggregate([match_dict, add_dict])
    # 结果
    [{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'str_id': '610cd5a0702aac3e62a77a13'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a4702aac3e62a77a14'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a6702aac3e62a77a15'}]
    
    

    ObjectId转换为str类型

    # 以前都是查出结果后,循环str将id从Object转换为str,现在可以一步到位
    add_dict = {"$addFields": {"_id": {"$toString":"$_id"}}}
    # 结果为
    [{'_id': '610cd5a0702aac3e62a77a13', 'name': 'zhang', 'intro': 'a'}, {'_id': '610cd5a4702aac3e62a77a14', 'name': 'zhang', 'intro': 'b'}, {'_id': '610cd5a6702aac3e62a77a15', 'name': 'zhang', 'intro': 'b'}]
    
    

    数据类型如下

    /* 1 */
    {
        "_id" : ObjectId("610cd5a0702aac3e62a77a13"),
        "name" : "china",
        "city_gdp" : [ 
            {
                "city" : "beijing",
                "gdp" : 100
            }, 
            {
                "city" : "shanghai",
                "gdp" : 200
            }, 
            {
                "city" : "tianjin",
                "gdp" : 300
            }, 
            {
                "city" : "chongqing",
                "gdp" : 400
            }
        ]
    }
    
    

    demo3:求gdp的总和和平均值

    match_dict = {"$match": {"name": "china"}}
    # 增加两个字段:保存总和和平均值
    add_dict = {"$addFields": {"total": {"$sum":"$city_gdp.gdp"},"avg":{"$avg":"$city_gdp.gdp"}}}
    ret = db["test"].aggregate([match_dict, add_dict])
    print(list(ret))
    # 结果
    [{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'china', 'city_gdp': [{'city': 'beijing', 'gdp': 100}, {'city': 'shanghai', 'gdp': 200}, {'city': 'tianjin', 'gdp': 300}, {'city': 'chongqing', 'gdp': 400}], 'total': 1000, 'avg': 250.0}]
    
    

    使用场景二:给原始数据增加字段---类似于修改表结构

    数据结构如下

    /* 2 */
    {
        "_id" : ObjectId("610e97f48e28e168e81a2e62"),
        "company" : "apple",
        "computer" : 15000,
        "iphone" : 6000
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
        "company" : "huawei",
        "computer" : 10000,
        "iphone" : 8000
    }
    

    每条数据增加一个字段,名字为pad,默认初始值为3000

    update_dict = {"$addFields": {"pad": 3000}}
    # 注意:必须使用列表包裹更新条件,否则报错
    ret = db["test"].update_many({}, [update_dict])
    # 结果
    /* 2 */
    {
        "_id" : ObjectId("610e97f48e28e168e81a2e62"),
        "company" : "apple",
        "computer" : 15000,
        "iphone" : 6000,
        "pad" : 3000
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
        "company" : "huawei",
        "computer" : 10000,
        "iphone" : 8000,
        "pad" : 3000
    }
    

    2.$addToSet

    使用方式一:作为列表修改器,一般和update方法搭配使用。
    数据类型如下

    {
        "_id" : ObjectId("610e91708e28e168e81a1c2f"),
        "name" : "xian",
        "foods" : [ 
            "肉夹馍"
        ]
    }
    

    给foods列表中添加数据,达到去重效果

    ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": "糖醋面皮"}})
    # 结果---插入相同的值会直接去重,但是上面的写法缺点是每次只能添加一个元素
    {
        "_id" : ObjectId("610e91708e28e168e81a1c2f"),
        "name" : "xian",
        "foods" : [ 
            "肉夹馍", 
            "糖醋面皮"
        ]
    }
    
    # 添加一组元素 --- 配合$each使用
    ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": {"$each":["麻辣烫夹馍","东关烧烤"]}}})
    # 结果
    {
        "_id" : ObjectId("610e91708e28e168e81a1c2f"),
        "name" : "xian",
        "foods" : [ 
            "肉夹馍", 
            "糖醋面皮", 
            "麻辣烫夹馍", 
            "东关烧烤"
        ]
    }
    

    使用方式二:去重的话,一般和group搭配使用

    数据类型如下

    
    {
        "_id" : ObjectId("610e8d0d8e28e168e81a1009"),
        "name" : "zhang",
        "intro" : "a"
    }
    
    /* 2 */
    {
        "_id" : ObjectId("610e8d148e28e168e81a1023"),
        "name" : "zhang",
        "intro" : "a"
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e8d1b8e28e168e81a103b"),
        "name" : "zhang",
        "intro" : "b"
    }
    
    

    查询intro值并去重

    # 需要说明的是,$addToSet去重一般会和分组group一起使用
    match_dict = {"$match": {"name": "zhang"}}
    group_dict = {"$group": {"_id": None, "intro_list": {"$addToSet": "$intro"}}}
    # 结果
    [{'_id': None, 'intro_list': ['b', 'a']}]
    

    3. $and

    作用:逻辑与

    4. $avg

    作用:求平均

    5. $add

    作用:数学运算

    数据类型如下

    /* 2 */
    {
        "_id" : ObjectId("610e97f48e28e168e81a2e62"),
        "company" : "apple",
        "computer" : 15000,
        "iphone" : 6000
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
        "company" : "huawei",
        "computer" : 10000,
        "iphone" : 8000
    }
    
    
    match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
    # project也可以增加字段,$add将不同字段的值进行相加
    project_dict = {"$project": {"company": 1, "computer": 1, "iphone": 1, "total": {"$add": ["$computer", "$iphone"]}}}
    ret = db["test"].aggregate([match_dict, project_dict])
    print(list(ret))
    
    # 结果
    [{'_id': ObjectId('610e97f48e28e168e81a2e62'), 'company': 'apple', 'computer': 15000, 'iphone': 6000, 'total': 21000}, {'_id': ObjectId('610e97fe8e28e168e81a2e87'), 'company': 'huawei', 'computer': 10000, 'iphone': 8000, 'total': 18000}]
    
    
    

    6. $all

    作用:用于匹配数组元素

    7. $abs

    作用:求绝对值

    数据

    {
        "_id" : ObjectId("610ea57f8e28e168e81a5370"),
        "name" : "haerbin",
        "temp" : -10
    }
    

    返回绝对值后的数据

    match_dict = {"$match": {"name": "haerbin"}}
    # 利用project可以增加字段的性质,替换_id的值
    project = {"$project": {"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}}}
    ret = db["test"].aggregate([match_dict, project])
    # 结果
    [{'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}]
    
    

    同样适用于find

    # 完美解决find查询后处理_id类型转换的麻烦
    ret = db["test"].find_one({"name": "haerbin"},{"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}})
    # 结果:
    {'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}
    

    8. $arrayElemAt

    作用:返回数组内指定下标的元素

    数据类型如下:

    {
        "_id" : ObjectId("610e91708e28e168e81a1c2f"),
        "name" : "xian",
        "foods" : [ 
            "肉夹馍", 
            "糖醋面皮", 
            "麻辣烫夹馍", 
            "东关烧烤"
        ],
        "pad" : 3000
    }
    

    返回foods下标为0的元素

    match_dict = {"$match": {"name": "xian"}}
    # 取foods列表中的第一个元素
    project = {"$project": {"foods": 1, "pad": 1, "_id": {"$toString": "$_id"}, "foods_num1": {"$arrayElemAt":["$foods", 0]}}}
    ret = db["test"].aggregate([match_dict, project])
    # 结果
    [{'foods': ['肉夹馍', '糖醋面皮', '麻辣烫夹馍', '东关烧烤'], 'pad': 3000, '_id': '610e91708e28e168e81a1c2f', 'foods_num1': '肉夹馍'}]
    
    

    9. $arrayToObject

    作用:将数组内的key-value元素转换为对象

    使用场景一:将数据的字段A和字段B的值组成字典格式返回

    数据类型如下

    {
        "_id" : ObjectId("610e97f48e28e168e81a2e62"),
        "company" : "apple",
        "computer" : 15000,
        "iphone" : 6000,
        "pad" : 3000
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
        "company" : "huawei",
        "computer" : 10000,
        "iphone" : 8000,
        "pad" : 3000
    }
    
    

    输出,{"hawei":8000},{"apple":"6000"}

    match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
    # push不能和project搭配使用。
    group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$pad"}}}}
    # project = {"$project": {"computer": 1, "iphone": 1, "_id": {"$toString": "$_id"}, "pad": 1,"list": {"$push": {"k": "$company", "v": "$pad"}}}}
    ret = db["test"].aggregate([match_dict, group_dict])
    # 结果 
    [{'_id': None, 'list': [{'k': 'apple', 'v': 3000}, {'k': 'huawei', 'v': 3000}]}]
    
    match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
    group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$iphone"}}}}
    project = {"$project": {"return_dict": {"$arrayToObject": "$list"}}}
    ret = db["test"].aggregate([match_dict, group_dict, project])
    # 结果
    [{'_id': None, 'return_dict': {'apple': 6000, 'huawei': 8000}}]
    

    arrayToObject能够识别的类型如下

    类型一:
    [ [ “item”, “abc123”], [ “qty”, 25 ] ]
    类型二:
    [ { “k”: “item”, “v”: “abc123”}, { “k”: “qty”, “v”: 25 } ]
    

    https://blog.csdn.net/qq_38923792/article/details/110390327

    10. $accumulator

    作用: 自定义累加器
    TODO

    11. $allElementsTrue

    作用:判断集合中是否包含指定元素,如果数组中有null,0,undefine则返回false,空数组及其他值返回True。

    数据类型如下

     5 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe472"),
        "s_id" : 1,
        "responses" : [ 
            true
        ]
    }
    
    /* 6 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe473"),
        "s_id" : 2,
        "responses" : [ 
            true, 
            false
        ]
    }
    
    /* 7 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe474"),
        "s_id" : 3,
        "responses" : []
    }
    
    /* 8 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe475"),
        "s_id" : 4,
        "responses" : [ 
            1, 
            true, 
            "seven"
        ]
    }
    
    /* 9 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe476"),
        "s_id" : 5,
        "responses" : [ 
            0
        ]
    }
    
    /* 10 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe477"),
        "s_id" : 6,
        "responses" : [ 
            []
        ]
    }
    
    /* 11 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe478"),
        "s_id" : 7,
        "responses" : [ 
            [ 
                0
            ]
        ]
    }
    
    /* 12 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe479"),
        "s_id" : 8,
        "responses" : [ 
            [ 
                true
            ]
        ]
    }
    
    /* 13 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe47a"),
        "s_id" : 9,
        "responses" : [ 
            null
        ]
    }
    
    /* 14 */
    {
        "_id" : ObjectId("610fbcacaa6673116fbbe47b"),
        "s_id" : 10,
        "responses" : [ 
            false
        ]
    

    判断response是否包含None,0,undefine的元素
    当前版本:MONGO_VERSION=4.4.6
    需要版本:MONGO_VERSION=5.0+

    temp_dict = {"$project": {"responses": 1, "isAllTrue": {"$allElementsTrue": ["$responses"] }, "_id": 0 } }
    ret = db["test"].aggregate([temp_dict])
    报错:pymongo.errors.OperationFailure: $allElementsTrue's argument must be an array, but is missing, full error: {'operationTime': Timestamp(1628423017, 1), 'ok': 0.0, 'errmsg': "$allElementsTrue's argument must be an array, but is missing", 'code': 17040, 'codeName': 'Location17040', '$clusterTime': {'clusterTime': Timestamp(1628423017, 1), 'signature': {'hash': b'x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00', 'keyId': 0}}}
    
    

    12. $acos

    作用:返回一个值的反余弦值

    13. $avg

    作用:求平均值

    使用场景一:和project搭配使用

    数据样式

    /* 2 */
    {
        "_id" : ObjectId("610e97f48e28e168e81a2e62"),
        "company" : "apple",
        "computer" : 15000,
        "iphone" : 6000,
        "pad" : 3000
    }
    
    /* 3 */
    {
        "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
        "company" : "huawei",
        "computer" : 10000,
        "iphone" : 8000,
        "pad" : 3000
    }
    
    

    求computer + iphone + pad 加起来的平均值

    match_dict = {"$match": {"$or": [{"company": "huawei"}, {"company": "apple"}]}}
    # 对多个数求平均值
    project = {"$project": {"_id": {"$toString": "$_id"}, "avg_three": {"$avg": ["$computer", "$iphone", "$pad"]}}}
    ret = db["test"].aggregate([match_dict, project])
    print(list(ret))"avg_three": {"$avg": ["$computer", "$iphone", "$ipad"]}}}}
    ret = db["test"].aggregate([match_dict, project])
    # 结果
    [{'_id': '610e97f48e28e168e81a2e62', 'avg_three': 8000.0}, {'_id': '610e97fe8e28e168e81a2e87', 'avg_three': 7000.0}]
    
    

    使用场景二:和group搭配使用

    数据格式如下

    { "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
    { "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
    { "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
    { "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
    { "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:12:00Z") }
    

    官方例子代码

    db.sales.aggregate(
       [
         {
           $group:
             {
               _id: "$item",
               avgAmount: { $avg: { $multiply: [ "$price", "$quantity" ] } },
               avgQuantity: { $avg: "$quantity" }
             }
         }
       ]
    )
    
    #  结果
    { "_id" : "xyz", "avgAmount" : 37.5, "avgQuantity" : 7.5 }
    { "_id" : "jkl", "avgAmount" : 20, "avgQuantity" : 1 }
    { "_id" : "abc", "avgAmount" : 60, "avgQuantity" : 6 }
    

    == index b ==

    1. $bucket

    作用:分段统计数据

    使用方式一:单独使用

    数据格式如下

    // Newly added document
    { 
        "name" : "t1", 
        "price" : 100.0, 
        "_id" : ObjectId("6110e53447e9154a103dc257")
    }
    // Newly added document
    { 
        "name" : "t2", 
        "price" : 200.0, 
        "_id" : ObjectId("6110e53847e9154a103dc258")
    }
    // Newly added document
    { 
        "name" : "t3", 
        "price" : 300.0, 
        "_id" : ObjectId("6110e53d47e9154a103dc259")
    }
    // Newly added document
    { 
        "name" : "t4", 
        "price" : 400.0, 
        "_id" : ObjectId("6110e54247e9154a103dc25a")
    }
    // Newly added document
    { 
        "name" : "t5", 
        "price" : 500.0, 
        "_id" : ObjectId("6110e54747e9154a103dc25b")
    }
    

    分组 --- 一般group是按照字段进行分组,$bucket可以按照字段值进行判断分组

    match = {"$match":{"_id":{"$ne":""}}}
    bucket =  {
        "$bucket": {
          # 分组
          "groupBy": "$price", 
          # 边界,200<=x<400 有几个边界条件,就会分几组
          "boundaries": [ 200, 400 ], 
          #符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
          "default": "Other", 
          # 输出格式
          "output": {
            "count": { "$sum": 1 }, 
            "titles" : { "$push": "$name"} 
          }
        
      }
      }
    # 结果
    [{'_id': 200, 'count': 2, 'titles': ['t2', 't3']}, 
    {'_id': 'Other', 'count': 3, 'titles': ['t1', 't4', 't5']}]
    
    
    
    bucket =  {
        "$bucket": {
          # 分组
          "groupBy": "$price", 
          # 边界,t1:200<=x<300, t2:300<=x<500 
          "boundaries": [ 100, 300, 500 ], 
          # 符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
          "default": "Other", 
          # 输出格式
          "output": {
            "count": { "$sum": 1 }, 
            "titles" : { "$push": "$name"} 
          }
        
      }
      }
    # 结果
    [{'_id': 100, 'count': 2, 'titles': ['t1', 't2']}, {'_id': 300, 'count': 2, 'titles': ['t3', 't4']}, {'_id': 'Other', 'count': 1, 'titles': ['t5']}]
    

    使用方式二:搭配facet使用

    https://docs.mongodb.com/v4.4/reference/operator/aggregation/bucket/#mongodb-pipeline-pipe.-bucket

    2. $bucketauto

    作用:根据指定的表达式将传入的文档分类到特定数量的组(称为bucket)中。Bucket边界将自动确定,以便将文档平均分配到指定数量的Bucket中。

    https://docs.mongodb.com/v4.4/reference/operator/aggregation/bucketAuto/#mongodb-pipeline-pipe.-bucketAuto

    3. $binarySize

    作用:返回指定字符串或二进制数据的字节大小

    例如:可以存储照片的大小

    == index c ==

    1. $ceil

    作用:

    数据格式如下

    { 
        "_id" : ObjectId("6110e53447e9154a103dc257"), 
        "name" : "t1", 
        "price" : -2.1
    }
    { 
        "_id" : ObjectId("6110e53847e9154a103dc258"), 
        "name" : "t2", 
        "price" : 0.0
    }
    { 
        "_id" : ObjectId("6110e53d47e9154a103dc259"), 
        "name" : "t3", 
        "price" : 3.2
    }
    { 
        "_id" : ObjectId("6110e54247e9154a103dc25a"), 
        "name" : "t4", 
        "price" : -2.6
    }
    { 
        "_id" : ObjectId("6110e54747e9154a103dc25b"), 
        "name" : "t5", 
        "price" : 1.0
    }
    // Newly added document
    { 
        "name" : "t6", 
        "price" : 3.6, 
        "_id" : ObjectId("6114be69605d7c02aa34eac8")
    }
    

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    project =  {
        "$project": { "price":1,"name":1,"new_price":{"$ceil":"$price"}}
      }
    
    ret= db["test"]["test"].aggregate([match,project])
    # 结果:0还是0,负数返回比自己大的最近负整数,正数返回比自己大的最近正整数
    [{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'new_price': -2.0}, 
    {'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'new_price': 0.0},
    {'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'new_price': 4.0}, 
    {'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'new_price': -2.0}, 
    {'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'new_price': 1.0}, 
    {'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'new_price': 4.0}]
    print(list(ret))
    

    2. $cmp

    作用:比较两个数的大小

    数据格式如下:

    { 
        "_id" : ObjectId("6110e53447e9154a103dc257"), 
        "name" : "t1", 
        "price" : -2.1
    }
    { 
        "_id" : ObjectId("6110e53847e9154a103dc258"), 
        "name" : "t2", 
        "price" : 0.0
    }
    { 
        "_id" : ObjectId("6110e53d47e9154a103dc259"), 
        "name" : "t3", 
        "price" : 3.2
    }
    { 
        "_id" : ObjectId("6110e54247e9154a103dc25a"), 
        "name" : "t4", 
        "price" : -2.6
    }
    { 
        "_id" : ObjectId("6110e54747e9154a103dc25b"), 
        "name" : "t5", 
        "price" : 1.0
    }
    // Newly added document
    { 
        "name" : "t6", 
        "price" : 3.6, 
        "_id" : ObjectId("6114be69605d7c02aa34eac8")
    }
    

    需求:返回price比0大和比0小的数据

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    project =  {
        # 和0进行比较
        "$project": { "price":1,"name":1,"cmp_to_zero":{"$cmp":["$price",0]}}
      }
    
    ret= db["test"]["test"].aggregate([match,project])
    # 结果---cmp_to_zero 为0表示和0相等,-1表示小于0, 1表示大于0
    [{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'cmp_to_zero': -1}, 
    {'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'cmp_to_zero': 0}, 
    {'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'cmp_to_zero': 1}, 
    {'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'cmp_to_zero': -1}, 
    {'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'cmp_to_zero': 1}, 
    {'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'cmp_to_zero': 1}]
    
    

    3. $concat

    作用:字符串拼接

    数据格式如下

    { 
        "_id" : ObjectId("6110e53447e9154a103dc257"), 
        "city" : "shantou", 
        "contry" : "china", 
        "province" : "guangdong", 
        "street" : "test_street"
    }
    

    拼接完整的地址

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    project =  {
        "$project": { "province":1,"contry":1,"city":1,"street":1,"detail":{"$concat":["$contry","/","$province","/","$city","/","$street"]}}
      }
    
    ret= db["test"]["test"].aggregate([match,project])
    # 结果
    [
    {'_id': ObjectId('6110e53447e9154a103dc257'), 'city': 'shantou', 'contry': 'china', 'province': 'guangdong', 'street': 'test_street', 'detail': 'china/guangdong/shantou/test_street'}
    ]
    

    4.$concatArrays

    作用:将多个列表拼接成一个列表

    数据格式如下

    { 
        "_id" : ObjectId("6110e53447e9154a103dc257"), 
        "east_city" : [
            "shanghai", 
            "xiamen"
        ], 
        "north_city" : [
            "beijing", 
            "tianjin"
        ], 
        "south_city" : [
            "nanjing", 
            "shenzhen"
        ], 
        "west_city" : [
            "shengdu", 
            "wulumuqi"
        ], 
        "middle_city" : [
    
        ]
    }
    
    

    将东西南北城市列表合成一个列表返回

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    project =  {
        "$project": { "city_list":{"$concatArrays":["$east_city","$west_city","$north_city","$south_city","$middle_city"]}}
      }
    
    ret= db["test"]["test"].aggregate([match,project])
    # 结果
    [{'_id': ObjectId('6110e53447e9154a103dc257'), 'city_list': ['shanghai', 'xiamen', 'shengdu', 'wulumuqi', 'beijing', 'tianjin', 'nanjing', 'shenzhen']}]
    print(list(ret))
    

    5.$cond

    作用:返回if或者else里的返回值---条件匹配

    数据格式如下:

    // Newly added document
    { 
        "name" : "t1", 
        "price" : 0.0, 
        "_id" : ObjectId("6114d086605d7c02aa34ead9")
    }
    // Newly added document
    { 
        "name" : "t2", 
        "price" : 10.0, 
        "_id" : ObjectId("6114d08f605d7c02aa34eada")
    }
    // Newly added document
    { 
        "name" : "t3", 
        "price" : 30.0, 
        "_id" : ObjectId("6114d096605d7c02aa34eadb")
    }
    // Newly added document
    { 
        "name" : "t4", 
        "price" : 50.0, 
        "_id" : ObjectId("6114d09f605d7c02aa34eadc")
    }
    // Newly added document
    { 
        "name" : "t5", 
        "price" : 80.0, 
        "_id" : ObjectId("6114d0a4605d7c02aa34eadd")
    }
    // Newly added document
    { 
        "name" : "t6", 
        "price" : 100.0, 
        "_id" : ObjectId("6114d0aa605d7c02aa34eade")
    }
    

    price 小于100返回1000,大于50返回2000

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    project =  {
        # 满足 price和100的比较,
        "$project": {"_id":0,"price":1, "price_result":{"$cond":{"if":{"$gte":["$price",100]},"then":2000,"else":1000}}}
      }
    ret= db["test"]["test"].aggregate([match,project])
    # 
    [{'price': 0.0, 'price_result': 1000}, 
    {'price': 10.0, 'price_result': 1000}, 
    {'price': 30.0, 'price_result': 1000}, 
    {'price': 50.0, 'price_result': 1000}, 
    {'price': 80.0, 'price_result': 1000}, 
    {'price': 100.0, 'price_result': 2000}]
    print(list(ret))
    
    

    6. $convert

    作用:字段类型转换

    准备数据时候,发现mongo不能直接插入decimal类型的数据,只能插入decimal128类型,插入数据代码

    from decimal import Decimal
    from bson.decimal128 import Decimal128
    data = {
        "name":"zhang",
        "age":10,
        "weight":75.5,
        "birthday": datetime.datetime.now(),
        "income": Decimal128(Decimal("8500.23")),
        "has_car":False
    }
    ret = db["test"]["test"].insert_one(data)
    print(ret.inserted_id)
    

    数据格式如下:

    { 
        "_id" : ObjectId("6114e20b661341258c35c08c"), 
        "name" : "zhang", 
        "age" : NumberInt(10), 
        "weight" : 75.5, 
        "birthday" : ISODate("2021-08-12T16:55:39.754+0000"), 
        "income" : NumberDecimal("8500.23"), 
        "has_car" : false
    }
    

    语法如下

    {
       $convert:
          {
             input: <expression>, 输入的值
             to: <type expression>, 输出的值
             onError: <expression>,  // 出错操作 
             onNull: <expression>    // 空值操作
          }
    }
    

    代码

    match = {"$match":{"_id":{"$ne":""}}}
    
    project =  {
        "$project": {
            "name":{"$convert":{"input":"$name","to":"bool","onError":"error","onError":None}},
            "age":{"$convert":{"input":"$age","to":"string","onError":"error","onError":None}},
            "weight":{"$convert":{"input":"$weight","to":"int","onError":"error","onError":None}},
            "birthday":{"$convert":{"input":"$birthday","to":"bool","onError":"error","onError":None}},
            "income":{"$convert":{"input":"$income","to":"decimal","onError":"error","onError":None}},
            "has_car":{"$convert":{"input":"$has_car","to":"bool","onError":"error","onError":None}},
            # $toString 可替代
            "_id":{"$convert":{"input":"$_id","to":"string","onError":"error","onError":None}},
            }
      }
    
    
    ret= db["test"]["test"].aggregate([match,project])
    # 结果
    [
    {
    'name': True, 
    'age': '10',
    'weight': 75, 
    'birthday': True, 
    'income': Decimal128('8500.23'), 
    'has_car': False, 
    '_id': '6114e20b661341258c35c08c'
    }
    ]
    print(list(ret))
    

    具体见:https://docs.mongodb.com/v4.4/reference/operator/aggregation/convert/

    === index d===

    1. $dateFromParts

    作用:根据日期的组成属性构造并返回Date对象。

    2. $dataToString

    作用:将时间类型的数据转成指定格式的字符串

    数据格式如下

    {
        "_id" : ObjectId("6114e20b661341258c35c08c"),
        "name" : "zhang",
        "age" : 10,
        "weight" : 75.5,
        "birthday" : ISODate("2021-08-12T16:55:39.754Z"),
        "income" : NumberDecimal("8500.23"),
        "has_car" : false
    }
    

    birthday转换成不同格式的字符串

    match_dict = {"$match": {"name": {"$ne": None}}}
    
    project = {
        "$project": {
            "birthday": 1,
             # 年月日时分秒 
            "format_1": {"$dateToString": {"format": "%Y-%m-%d %H:%M:%S", "date": "$birthday"}},
             # 这一年的第几天
            "format_2": {"$dateToString": {"format": "%j", "date": "$birthday"}},
             # 这一天是周几,1-7
            "format_3": {"$dateToString": {"format": "%u", "date": "$birthday"}},
             # 这一年的第几个周
            "format_4": {"$dateToString": {"format": "%U", "date": "$birthday"}},
    
        }
    }
    ret = db["test"].aggregate([match_dict, project])
    # 结果
    [
    {
    '_id': ObjectId('6114e20b661341258c35c08c'), 'birthday': datetime.datetime(2021, 8, 12, 16, 55, 39, 754000), 
    'format_1': '2021-08-12 16:55:39', 
    'format_2': '224', 
    'format_3': '4', 
    'format_4': '32'
    }
    ]
    print(list(ret))
    

    == index e ==

    1. $exp

    作用:计算e的n次幂
    https://docs.mongodb.com/v4.4/reference/operator/aggregation/exp/#mongodb-expression-exp.-exp

    2. $eq

    作用:判断两个值是否相等

    数据格式如下

    {
        "_id" : ObjectId("6114e20b661341258c35c08c"),
        "name" : "zhang",
        "age" : 10,
        "weight" : 75.5,
        "birthday" : ISODate("2021-08-12T16:55:39.754Z"),
        "income" : NumberDecimal("8500.23"),
        "has_car" : false
    }
    

    代码

    match_dict = {"$match": {"name": {"$ne": None}}}
    
    project = {
        "$project": {
            "age": 1, "age_10_flag": {"$eq": ["$age", 10]}
        }
    }
    ret = db["test"].aggregate([match_dict, project])
    # 结果
    [
    {'_id': ObjectId('6114e20b661341258c35c08c'), 'age': 10, 'age_10_flag': True}
    ]
    print(list(ret))
    
    

    == index f ==

    1. $facet

    作用:一次性执行多条aggragate语句:符合查询神器

    2. $first

    作用: 从分组结果中取第一条---必须和分组搭配使用

    db = MongoClient(["39.108.63.181:37017","39.108.63.181:47017","39.108.63.181:57017"],replicaset="rs",connect=True)
    
    group_dict = {"$group":{"_id":"$item","earlist_time":{"$first":"$date"}}}
    
    ret = db["test"]["test"].aggregate([group_dict])
    [
    {'_id': 'jkl', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 0)}, 
    {'_id': 'xyz', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 5)}, 
    {'_id': 'abc', 'earlist_time': datetime.datetime(2014, 1, 1, 8, 0)}
    ]
    

    == index f ==

    1. graphLookup

    作用:类似于自关联查询

    数据

    // Newly added document
    { 
        "name" : "china", 
        "_id" : ObjectId("61275f3f1d2b613cc23063ae")
    }
    // Newly added document
    { 
        "name" : "shanxi", 
        "up" : "china", 
        "_id" : ObjectId("61275f511d2b613cc23063af")
    }
    // Newly added document
    { 
        "name" : "xian", 
        "up" : "shanxi", 
        "_id" : ObjectId("61275f721d2b613cc23063b0")
    }
    // Newly added document
    { 
        "name" : "gaoxin", 
        "up" : "xian", 
        "_id" : ObjectId("61275f7c1d2b613cc23063b1")
    }
    
    
    

    == index R ==

    1. $replaceRoot

    作用:替换文档,提升文档显示级别,pymongo只识别$replaceWith(replaceRoot的别名)

    数据:

    // Newly added document
    { 
        "name" : "zhang", 
        "age" : 10.0, 
        "hobby" : {
            "school" : "study", 
            "home" : "game"
        }, 
        "_id" : ObjectId("61274f451d2b613cc230639e")
    }
    

    需要显示:

    {"school" : "study",  "home" : "game"}
    

    代码

    ret = db["test"]["test_aps"].aggregate([{"$match":{"_id":{"$ne":""}}},{"$replaceWith":{"new_dict":"$hobby"}}])
    # 结果
    [{'new_dict': {'school': 'study', 'home': 'game'}}]
    

    配合match,concat使用

    ret = db["test"]["test_aps"].aggregate([
        {"$match":{"_id":{"$ne":""}}},
        {"$replaceWith":{"new_dict":"$hobby"}},
        {"$replaceWith":{"new_dict":{"concat_new_dict":{"$concat":["$new_dict.school","-100-100-","$new_dict.home"]}}}}
        
        ])
    # 结果
    [{'new_dict': {'concat_new_dict': 'study-100-100-game'}}]
    

    2. $redact

    作用:修订/校验,意思是对文档内容进行过滤,选择一些过滤或保留的信息,常常和$cond配合一起使用

    数据:

    // Newly added document
    { 
        "_id" : 1.0, 
        "title" : "123 Department Report", 
        "tags" : [
            "G", 
            "STLW"
        ], 
        "year" : 2014.0, 
        "subsections" : [
            {
                "subtitle" : "Section 1: Overview", 
                "tags" : [
                    "SI", 
                    "G"
                ], 
                "content" : "Section 1: This is the content of section 1."
            }, 
            {
                "subtitle" : "Section 2: Analysis", 
                "tags" : [
                    "STLW"
                ], 
                "content" : "Section 2: This is the content of section 2."
            }, 
            {
                "subtitle" : "Section 3: Budgeting", 
                "tags" : [
                    "TK"
                ], 
                "content" : {
                    "text" : "Section 3: This is the content of section3.", 
                    "tags" : [
                        "HCS"
                    ]
                }
            }
        ]
    }
    

    如果想返回,subsections的tags包含"Tk"或"G"的文档,相当于返回部分文档
    代码

    
    
  • 相关阅读:
    再回首Java第九天
    再回首Java第八天
    再回首Java一周记
    再回首Java第六天
    再回首Java第五天
    再回首Java第四天
    再回首Java第三天
    再回首Java第二天
    再回首Java第一天
    关于i++和++i理解
  • 原文地址:https://www.cnblogs.com/meloncodezhang/p/15113079.html
Copyright © 2011-2022 走看看