zoukankan      html  css  js  c++  java
  • mongodb复杂查询示例

    以oracle的测试schema为例,emp和dept表,原始数据如下

    SQL> select * from emp;
    
         EMPNO ENAME      JOB              MGR HIREDATE              SAL       COMM     DEPTNO
    ---------- ---------- --------- ---------- -------------- ---------- ---------- ----------
          7369 SMITH      CLERK           7902 17-12月-80            800                    20
          7499 ALLEN      SALESMAN        7698 20-2月 -81           1600        300         30
          7521 WARD       SALESMAN        7698 22-2月 -81           1250        500         30
          7566 JONES      MANAGER         7839 02-4月 -81           2975                    20
          7654 MARTIN     SALESMAN        7698 28-9月 -81           1250       1400         30
          7698 BLAKE      MANAGER         7839 01-5月 -81           2850                    30
          7782 CLARK      MANAGER         7839 09-6月 -81           2450                    10
          7839 KING       PRESIDENT            17-11月-81           5000                    10
          7844 TURNER     SALESMAN        7698 08-9月 -81           1500          0         30
          7900 JAMES      CLERK           7698 03-12月-81            950                    30
          7902 FORD       ANALYST         7566 03-12月-81           3000                    20
          7934 MILLER     CLERK           7782 23-1月 -82           1300                    10
    
    已选择12行。
    
    SQL> select * from dept;
    
        DEPTNO DNAME          LOC
    ---------- -------------- -------------
            10 ACCOUNTING     NEW YORK
            20 RESEARCH       DALLAS
            30 SALES          CHICAGO
            40 OPERATIONS     BOSTON

    完成如下查询,

    a 列出每个部门的名称及其人数 
    b 列出工资最高的头三名员工姓名及其工资 
    c 列出工资比上司高的员工姓名 
    d 列出平均工资高于公司总平均工资的部门名称

    首先,需要建立mongodb集合,一开始,我采用完全模拟emp和dept表的方式,发现很多情况下都需要join,而mongodb并不支持join,做起来比较迂回。于是决定,把所有数据都放在同一个集合(emp_dept)内,新加元素TABLE用来表示是来源于哪个table的数据。以下是初始化emp_dept集合的ruby代码

    #!/usr/bin/env ruby
    # 20140307, init_emp_dept.rb
    ###
    ###
    
    
    require "rubygems"
    require "mongo"
    
    mongoconn = Mongo::MongoClient.new("localhost", 27017)
    db = mongoconn.db("test")
    
    # 7369 SMITH      CLERK           7902 17-12月-80            800                    20
    # 7499 ALLEN      SALESMAN        7698 20-2月 -81           1600        300         30
    # 7521 WARD       SALESMAN        7698 22-2月 -81           1250        500         30
    # 7566 JONES      MANAGER         7839 02-4月 -81           2975                    20
    # 7654 MARTIN     SALESMAN        7698 28-9月 -81           1250       1400         30
    # 7698 BLAKE      MANAGER         7839 01-5月 -81           2850                    30
    # 7782 CLARK      MANAGER         7839 09-6月 -81           2450                    10
    # 7839 KING       PRESIDENT            17-11月-81           5000                    10
    # 7844 TURNER     SALESMAN        7698 08-9月 -81           1500          0         30
    # 7900 JAMES      CLERK           7698 03-12月-81            950                    30
    # 7902 FORD       ANALYST         7566 03-12月-81           3000                    20
    # 7934 MILLER     CLERK           7782 23-1月 -82           1300                    10
    
    coll = db.collection("emp_dept")
    coll.insert({ TABLE:"EMP", EMPNO:7369, ENAME:"SMITH", JOB:"CLERK", MGR:7902, HIREDATE:"19801217", SAL:800, DEPTNO:20 })
    coll.insert({ TABLE:"EMP", EMPNO:7499, ENAME:"ALLEN", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810220", SAL:1600, COMM:300, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7521, ENAME:"WARD", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810222", SAL:1250, COMM:500, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7566, ENAME:"JONES", JOB:"MANAGER", MGR:7839, HIREDATE:"19810402", SAL:2975, DEPTNO:20 })
    coll.insert({ TABLE:"EMP", EMPNO:7654, ENAME:"MARTIN", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810928", SAL:1250, COMM:1400, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7698, ENAME:"BLAKE", JOB:"MANAGER", MGR:7839, HIREDATE:"19810501", SAL:2850, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7782, ENAME:"CLARK", JOB:"MANAGER", MGR:7839, HIREDATE:"19810609", SAL:2450, DEPTNO:10 })
    coll.insert({ TABLE:"EMP", EMPNO:7839, ENAME:"KING", JOB:"PRESIDENT", HIREDATE:"19811117", SAL:5000, DEPTNO:10 })
    coll.insert({ TABLE:"EMP", EMPNO:7844, ENAME:"TURNER", JOB:"SALESMAN", MGR:7698, HIREDATE:"19810908", SAL:1500, COMM:0, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7900, ENAME:"JAMES", JOB:"CLERK", MGR:7698, HIREDATE:"19811203", SAL:950, DEPTNO:30 })
    coll.insert({ TABLE:"EMP", EMPNO:7902, ENAME:"FORD", JOB:"ANALYST", MGR:7566, HIREDATE:"19811203", SAL:3000, DEPTNO:20 })
    coll.insert({ TABLE:"EMP", EMPNO:7934, ENAME:"MILLER", JOB:"CLERK", MGR:7782, HIREDATE:"19820123", SAL:1300, DEPTNO:10 })
    
    # coll.find.each { |doc| puts doc.inspect }
    
    # 10 ACCOUNTING     NEW YORK
    # 20 RESEARCH       DALLAS
    # 30 SALES          CHICAGO
    # 40 OPERATIONS     BOSTON
    
    coll.insert({ TABLE:"DEPT", DEPTNO:10, DNAME:"ACCOUNTING", LOC:"NEW YORK"})
    coll.insert({ TABLE:"DEPT", DEPTNO:20, DNAME:"RESEARCH", LOC:"DALLAS"})
    coll.insert({ TABLE:"DEPT", DEPTNO:30, DNAME:"SALES", LOC:"CHICAGO"})
    coll.insert({ TABLE:"DEPT", DEPTNO:40, DNAME:"OPERATIONS", LOC:"BOSTON"})
    
    
    coll.find.each { |doc| puts doc.inspect }
    
    # db.drop_collection("emp_dept")

    贴一下输出,便于看清楚集合的结构

    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000001'), "TABLE"=>"EMP", "EMPNO"=>7369, "ENAME"=>"SMITH", "JOB"=>"CLERK", "MGR"=>7902, "HIREDATE"=>"19801217", "SAL"=>800, "DEPTNO"=>20}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000002'), "TABLE"=>"EMP", "EMPNO"=>7499, "ENAME"=>"ALLEN", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810220", "SAL"=>1600, "COMM"=>300, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000003'), "TABLE"=>"EMP", "EMPNO"=>7521, "ENAME"=>"WARD", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810222", "SAL"=>1250, "COMM"=>500, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000004'), "TABLE"=>"EMP", "EMPNO"=>7566, "ENAME"=>"JONES", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810402", "SAL"=>2975, "DEPTNO"=>20}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000005'), "TABLE"=>"EMP", "EMPNO"=>7654, "ENAME"=>"MARTIN", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810928", "SAL"=>1250, "COMM"=>1400, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000006'), "TABLE"=>"EMP", "EMPNO"=>7698, "ENAME"=>"BLAKE", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810501", "SAL"=>2850, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000007'), "TABLE"=>"EMP", "EMPNO"=>7782, "ENAME"=>"CLARK", "JOB"=>"MANAGER", "MGR"=>7839, "HIREDATE"=>"19810609", "SAL"=>2450, "DEPTNO"=>10}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000008'), "TABLE"=>"EMP", "EMPNO"=>7839, "ENAME"=>"KING", "JOB"=>"PRESIDENT", "HIREDATE"=>"19811117", "SAL"=>5000, "DEPTNO"=>10}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000009'), "TABLE"=>"EMP", "EMPNO"=>7844, "ENAME"=>"TURNER", "JOB"=>"SALESMAN", "MGR"=>7698, "HIREDATE"=>"19810908", "SAL"=>1500, "COMM"=>0, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000a'), "TABLE"=>"EMP", "EMPNO"=>7900, "ENAME"=>"JAMES", "JOB"=>"CLERK", "MGR"=>7698, "HIREDATE"=>"19811203", "SAL"=>950, "DEPTNO"=>30}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000b'), "TABLE"=>"EMP", "EMPNO"=>7902, "ENAME"=>"FORD", "JOB"=>"ANALYST", "MGR"=>7566, "HIREDATE"=>"19811203", "SAL"=>3000, "DEPTNO"=>20}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000c'), "TABLE"=>"EMP", "EMPNO"=>7934, "ENAME"=>"MILLER", "JOB"=>"CLERK", "MGR"=>7782, "HIREDATE"=>"19820123", "SAL"=>1300, "DEPTNO"=>10}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000d'), "TABLE"=>"DEPT", "DEPTNO"=>10, "DNAME"=>"ACCOUNTING", "LOC"=>"NEW YORK"}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000e'), "TABLE"=>"DEPT", "DEPTNO"=>20, "DNAME"=>"RESEARCH", "LOC"=>"DALLAS"}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af00000f'), "TABLE"=>"DEPT", "DEPTNO"=>30, "DNAME"=>"SALES", "LOC"=>"CHICAGO"}
    {"_id"=>BSON::ObjectId('531c2a59cf7c7c45af000010'), "TABLE"=>"DEPT", "DEPTNO"=>40, "DNAME"=>"OPERATIONS", "LOC"=>"BOSTON"}

    对于问题a,mongodb的查询如下

    /* 20140309
     * a 列出每个部门的名称及其人数
     */
    db.emp_dept.group({key: {"DEPTNO":true},
      initial: {"DNAME":"", "csum":0},
      reduce: function(obj, prev){
          if (obj.TABLE == "DEPT"){
              prev.DNAME = prev.DNAME + obj.DNAME
          } else if (obj.TABLE == "EMP"){
              prev.csum ++;
          }
    }})

    输出如下

    [
        {
            "DEPTNO" : 20,
            "DNAME" : "RESEARCH",
            "csum" : 3
        },
        {
            "DEPTNO" : 30,
            "DNAME" : "SALES",
            "csum" : 6
        },
        {
            "DEPTNO" : 10,
            "DNAME" : "ACCOUNTING",
            "csum" : 3
        },
        {
            "DEPTNO" : 40,
            "DNAME" : "OPERATIONS",
            "csum" : 0
        }
    ]

    问题b,比较简单,代码如下

    /* 20140309
     * b 列出工资最高的头三名员工姓名及其工资
     */
    db.emp_dept.find({"TABLE":"EMP"},{"ENAME":1,"SAL":1,"_id":0}).sort({"SAL":-1}).limit(3)

    输出如下

    { "ENAME" : "KING", "SAL" : 5000 }
    { "ENAME" : "FORD", "SAL" : 3000 }
    { "ENAME" : "JONES", "SAL" : 2975 }

    问题c,简单一点的解法可以这么做

    /* 20140309
     * c 列出工资比上司高的员工姓名
     */
    // solution 1
    db.emp_dept.find({"$where": function(){
        if (this["TABLE"] == "EMP") {
            if (this["MGR"] != null){
                var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]})
                if (this["SAL"] > mgr["SAL"]){
                    return true;
                }else {
                    return false;
                }
            }
        }
    }}, {"EMPNO":1, "ENAME":1, "_id":0})

    输出如下

    { "EMPNO" : 7902, "ENAME" : "FORD" }

    如果希望能够在查出这些员工的同时,也能列出其上司和其上司的工资,可能就需要用map-reduce,如果用map-reduce的话,写法就有很多种了,我是这样写的

    /* 20140309
     * c 列出工资比上司高的员工姓名
     */
    // solution 2
    get_mgr_sal = function() { 
        if (this["TABLE"] == "EMP") {
            if (this["MGR"] != null){
                var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]});
                if (this["SAL"] > mgr["SAL"]){
                    emit({"EMPNO":this["EMPNO"], "ENAME":this["ENAME"]}, {"SAL" : this["SAL"], 
                        "MGR_EMPNO":mgr["EMPNO"], "MGR_NAME":mgr["ENAME"], "MGR_SAL":mgr["SAL"]}); 
                }
            }
        }
    }; 
    
    get_mgr_sal = function() { 
        if (this["TABLE"] == "EMP") {
            if (this["MGR"] != null){
                var mgr = db.emp_dept.findOne({"TABLE":"EMP", "EMPNO":this["MGR"]})
                emit({"EMPNO":this["EMPNO"], "ENAME":this["ENAME"]}, {"SAL" : this["SAL"], 
                    "MGR_EMPNO":mgr["EMPNO"], "MGR_NAME":mgr["ENAME"], "MGR_SAL":mgr["SAL"]}); 
            }
        }
    }; 
    
    higher_than_mgr = function(key, emits) { 
        return emits;
    };
    
    filter_result = function(key, reduced){
        if (reduced["SAL"] > reduced["MGR_SAL"]){
            return reduced;
        }
    }
    
    db.runCommand({"mapreduce" : "emp_dept", "map" : get_mgr_sal, "reduce" : higher_than_mgr,
        "out":"result", "finalize":filter_result})
    
    db.result.find({"value":{"$ne":null}})

    输出如下

    { "_id" : { "EMPNO" : 7902, "ENAME" : "FORD" }, "value" : { "SAL" : 3000, "MGR_EMPNO" : 7566, "MGR_NAME" : "JONES", "MGR_SAL" : 2975 } }

    虽然已经有了结果,但是我感觉map-reduce这么用,感觉还不是很舒服,但也没有想出更好的写法。

    问题d,我只想出了用map-reduce的解法

    /* 20140309
     * d 列出平均工资高于公司总平均工资的部门名称
     */
    map_avg_sal_dept = function() {
        if (this["TABLE"] == "EMP"){
            var dept = db.emp_dept.findOne({"TABLE":"DEPT", "DEPTNO":this["DEPTNO"]});
            var key = {"DEPTNO":dept["DEPTNO"], "DNAME":dept["DNAME"]};
            var value = {"SAL_DEPT":this["SAL"], "DEPT_COUNT":1};
            emit(key, value)
        }
    }
    
    red_avg_sal_dept = function(key, value) {
        var all_sal_dept = 0.0;
        var all_dept_count = 0;
        for (var i in value){
            all_sal_dept += value[i]["SAL_DEPT"];
            all_dept_count += value[i]["DEPT_COUNT"];
        }
        return {"AVG_SAL_DEPT":all_sal_dept/all_dept_count};
    }
    
    get_avg_sal = function(){
        var all_sal = 0.0;
        var all_count = 0;
        db.emp_dept.find({"TABLE":"EMP"}).forEach(
            function (e) {
                all_sal += e["SAL"];
                all_count ++;
            });
        return all_sal/all_count;
    }
    
    filter_avg_sal_dept = function(key, reduced) {
        if (reduced["AVG_SAL_DEPT"] > avg_sal){
            return reduced;
        }
    }
    
    db.runCommand({"mapreduce" : "emp_dept", "map" : map_avg_sal_dept, "reduce" : red_avg_sal_dept,
        "out":"result", "scope":{avg_sal:get_avg_sal()}, "finalize":filter_avg_sal_dept})
    
    db.result.find({"value":{"$ne":null}})

    输出如下

    { "_id" : { "DEPTNO" : 10, "DNAME" : "ACCOUNTING" }, "value" : { "AVG_SAL_DEPT" : 2916.6666666666665 } }
    { "_id" : { "DEPTNO" : 20, "DNAME" : "RESEARCH" }, "value" : { "AVG_SAL_DEPT" : 2258.3333333333335 } }

    总体看起来,用mongodb处理结构化数据,还是显得不那么直观,解法也显得很迂回。当然,如果适应了的话,写这些代码可能会变快,但是比起SQL对数据的抽象程度和友好性来说,mongodb还是差了不少,如果能有mongodb的类sql接口,那就会更好了。

  • 相关阅读:
    2021微软Power BI 每月功能更新系列——Power BI 6月版本功能完整解读 ​
    Power Automate实例应用
    微软Power BI 每月功能更新系列——2021年4月版本功能更新全面解读
    Power Apps 应用:构建一套简易的五一休假报备管理系统(一)
    如何在Azure上使用 Python 创建ADF
    微软Power BI 每月功能更新系列——2021年3月版本功能更新全面解读
    Power BI 制作技巧 — 按钮动画效果
    连接到Power BI Premium或 Azure Analysis Services时,如何更改Excel中的用户帐户
    在 Power BI 中连接到 SAP HANA 数据库
    2021微软Power BI 每月功能更新系列——Power BI 2月版本功能完整解读
  • 原文地址:https://www.cnblogs.com/valleylord/p/3590227.html
Copyright © 2011-2022 走看看