zoukankan      html  css  js  c++  java
  • MongoDB 复合索引的试验

    根据典型碰到的场景,来做几个实验:
    这里创建了个loans collection。简化只有100条数据。这个是借贷的表有 _id, userId, status(借贷状态), amount(金额).

    看完 这个实验后, 你会明白了 {userId:1, status:1}, vs {status:1,userId:1} 的差别

    PS:这个case 里面其实status 区分度不高,不应该建立的,这里只是作为实例展示。

    总结:

    • 注意使用上 使用频率上 区分高的/常用的在前面
    • 如果需要减少索引以节省memory/提高修改数据的性能的话,可以保留区分度高,常用的,去除区分度不高,不常用的索引。

    实验如下:

    db.loans.count()
    100

    db.loans.find({ "userId" : "59e022d33f239800129c61c7", "status" : "repayed", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "$and" : [
    {
    "status" : {
    "$eq" : "repayed"
    }
    },
    {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    }
    ]
    },
    "winningPlan" : {
    "stage" : "COLLSCAN",
    "filter" : {
    "$and" : [
    {
    "status" : {
    "$eq" : "repayed"
    }
    },
    {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    }
    ]
    },
    "direction" : "forward"
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    注意上面 COLLSCAN 全表扫描了。因为没有索引。
    next 我们分别建立几个索引

    step 1 先建立 {userId:1, status:1}

    db.loans.createIndex({userId:1, status:1})
    {
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
    }

    db.loans.find({ "userId" : "59e022d33f239800129c61c7", "status" : "repayed", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "$and" : [
    {
    "status" : {
    "$eq" : "repayed"
    }
    },
    {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    }
    ]
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "["repayed", "repayed"]"
    ]
    }
    }
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    如愿命中 {userId:1, status:1} 作为 winning plan

    step2 再建立个典型的索引 userId

    db.loans.createIndex({userId:1})
    {
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 2,
    "numIndexesAfter" : 3,
    "ok" : 1
    }

    db.loans.find({ "userId" : "59e022d33f239800129c61c7", "status" : "repayed", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "$and" : [
    {
    "status" : {
    "$eq" : "repayed"
    }
    },
    {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    }
    ]
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "["repayed", "repayed"]"
    ]
    }
    }
    },
    "rejectedPlans" : [
    {
    "stage" : "FETCH",
    "filter" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1
    },
    "indexName" : "userId_1",
    "multiKeyPaths" : {
    "userId" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ]
    }
    }
    }
    ]
    },
    "ok" : 1
    }

    留意到 DB 检测到 {userId:1, status:1} 为更优执行的方案

    db.loans.find({ "userId" : "59e022d33f239800129c61c7" }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1
    },
    "indexName" : "userId_1",
    "multiKeyPaths" : {
    "userId" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ]
    }
    }
    },
    "rejectedPlans" : [
    {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "[MinKey, MaxKey]"
    ]
    }
    }
    }
    ]
    },
    "ok" : 1
    }

    留意到 DB 检测到 {userId:1} 为更优执行的方案,嗯~,如我们所料

    db.loans.find({ "status" : "repayed" }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "winningPlan" : {
    "stage" : "COLLSCAN",
    "filter" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "direction" : "forward"
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    ***有趣的部分: status 不命中索引, 全表扫描 ***
    接下来,我加了个sort

    db.loans.find({ "userId" : "59e022d33f239800129c61c7" }).sort({status:1}).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "[MinKey, MaxKey]"
    ]
    }
    }
    },
    "rejectedPlans" : [
    {
    "stage" : "SORT",
    "sortPattern" : {
    "status" : 1
    },
    "inputStage" : {
    "stage" : "SORT_KEY_GENERATOR",
    "inputStage" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1
    },
    "indexName" : "userId_1",
    "multiKeyPaths" : {
    "userId" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ]
    }
    }
    }
    }
    }
    ]
    },
    "ok" : 1
    }

    ***有趣的部分: status 不命中索引 ***

    db.loans.find({ "status" : "repayed","userId" : "59e022d33f239800129c61c7", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "$and" : [
    {
    "status" : {
    "$eq" : "repayed"
    }
    },
    {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    }
    ]
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "["repayed", "repayed"]"
    ]
    }
    }
    },
    "rejectedPlans" : [
    {
    "stage" : "FETCH",
    "filter" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1
    },
    "indexName" : "userId_1",
    "multiKeyPaths" : {
    "userId" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ]
    }
    }
    }
    ]
    },
    "ok" : 1
    }

    命中索引, 跟 query 的各个字段顺序不相关,如我们猜测

    有趣部分再来, 我们删掉索引{userId:1}

    db.loans.dropIndex({"userId":1})
    { "nIndexesWas" : 3, "ok" : 1 }

    db.loans.find({"userId" : "59e022d33f239800129c61c7", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "["59e022d33f239800129c61c7", "59e022d33f239800129c61c7"]"
    ],
    "status" : [
    "[MinKey, MaxKey]"
    ]
    }
    }
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    DB 执行分析器觉得索引{userId:1, status:1} 能更优

    没有命中复合索引 ,这个是因为status 不是 leading field

    db.loans.find({ "status" : "repayed" }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "winningPlan" : {
    "stage" : "COLLSCAN",
    "filter" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "direction" : "forward"
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    再换个角度sort 一遍, 与前面query & sort 互换 ,之前是

    db.loans.find({userId:1}).sort({ "status" : "repayed" })
    看看有啥不一样?

    db.loans.find({ "status" : "repayed" }).sort({userId:1}).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "filter" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "userId" : 1,
    "status" : 1
    },
    "indexName" : "userId_1_status_1",
    "multiKeyPaths" : {
    "userId" : [ ],
    "status" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "userId" : [
    "[MinKey, MaxKey]"
    ],
    "status" : [
    "[MinKey, MaxKey]"
    ]
    }
    }
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    如猜测,命中索引

    再来玩1玩,确认下leading filed试验:

    db.loans.dropIndex("userId_1_status_1")
    { "nIndexesWas" : 2, "ok" : 1 }

    db.loans.getIndexes()
    [
    {
    "v" : 2,
    "key" : {
    "_id" : 1
    },
    "name" : "id",
    "ns" : "cashLoan.loans"
    }
    ]

    db.loans.createIndex({status:1, userId:1})
    {
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
    }

    db.loans.getIndexes()
    [
    {
    "v" : 2,
    "key" : {
    "_id" : 1
    },
    "name" : "id",
    "ns" : "cashLoan.loans"
    },
    {
    "v" : 2,
    "key" : {
    "status" : 1,
    "userId" : 1
    },
    "name" : "status_1_userId_1",
    "ns" : "cashLoan.loans"
    }
    ]

    db.loans.find({ "status" : "repayed" }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "status" : {
    "$eq" : "repayed"
    }
    },
    "winningPlan" : {
    "stage" : "FETCH",
    "inputStage" : {
    "stage" : "IXSCAN",
    "keyPattern" : {
    "status" : 1,
    "userId" : 1
    },
    "indexName" : "status_1_userId_1",
    "multiKeyPaths" : {
    "status" : [ ],
    "userId" : [ ]
    },
    "direction" : "forward",
    "indexBounds" : {
    "status" : [
    "["repayed", "repayed"]"
    ],
    "userId" : [
    "[MinKey, MaxKey]"
    ]
    }
    }
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    status_1_userId_1 有这个索引的前提,去查 leading fields -- status: xx 会中

    db.loans.getIndexes()
    [
    {
    "v" : 2,
    "key" : {
    "_id" : 1
    },
    "name" : "id",
    "ns" : "cashLoan.loans"
    },
    {
    "v" : 2,
    "key" : {
    "status" : 1,
    "userId" : 1
    },
    "name" : "status_1_userId_1",
    "ns" : "cashLoan.loans"
    }
    ]

    db.loans.find({"userId" : "59e022d33f239800129c61c7", }).explain()
    {
    "queryPlanner" : {
    "namespace" : "cashLoan.loans",
    "parsedQuery" : {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    },
    "winningPlan" : {
    "stage" : "COLLSCAN",
    "filter" : {
    "userId" : {
    "$eq" : "59e022d33f239800129c61c7"
    }
    },
    "direction" : "forward"
    },
    "rejectedPlans" : [ ]
    },
    "ok" : 1
    }

    status_1_userId_1 有这个索引的前提,去查 非leading fields -- user_id: xx 没中,全表扫描

    所以 注意使用上 使用频率上 区分高的/常用的, 应该使用于混合索引,在前面作为leading fields,

  • 相关阅读:
    c++实验9 图及图的操作实验
    Html/CSS 示例演练 图书馆后台界面
    c++实验8 哈夫曼编码-译码器
    Html/CSS 初步介绍html和css部分重要标签
    c++实验7 二叉树
    JavaWeb -学生信息管理实践(JDBC+web+三层架构+DBUtil构造思路)
    java 实验6 图形用户界面设计试验(2)
    操作表与操作表数据行
    数据表操作练习
    数据库的介绍:
  • 原文地址:https://www.cnblogs.com/no7dw/p/13628801.html
Copyright © 2011-2022 走看看