zoukankan      html  css  js  c++  java
  • Hive案例05-学生成绩表综合案例

    1. 数据说明

    (1) student表

    hive> select * from student;
    
    # 学生ID    学生姓名     性别    年龄    所在系
    # sid       sname       sex     age     dept
      95002     Aiden       female  19      IS
      95017     Jacob       female  18      IS
      95018     Ethan       female  19      IS
      95013     Matthew     male    21      CS
      95014     Nicholas    female  19      CS
    ......
    

    (2) course表

    hive> select * from course;
    
    # 课程ID    课程名称
    # cid       cname
      1         Chinese
      2         Math
      3         English
      4         Physics
      5         Chemistry
      6         Biology
    

    (3) sc表

    hive> select * from sc;
    
    # 学生ID    课程ID  成绩
    # sid       cid     score
      95001     1       81
      95001     2       85
      95001     3       88
      95001     4       70
      95002     2       90
    ......
    

    2. SQL查询练习题目

    (1) 基本练习

    <1> 查询全体学生的学号与姓名

    select sid, sname from student;
    

    <2> 查询选修了课程的学生姓名

    select distinct sid from sc;
    

    (2) hive的group by和集合函数

    <1> 查询学生的总人数

    select count(*) from student;
    

    <2> 计算1号课程的学生平均成绩

    select cid, avg(score) from sc
    where cid = '1'
    group by cid;
    

    <3> 查询各科成绩平均分

    select c.cname, tmp.avg_score from
    course c join 
    (select cid, avg(score) avg_score from sc
    group by cid) tmp
    on c.cid = tmp.cid;
    
    /*
    Chinese     83.66666666666667
    Math        88.66666666666667
    English     81.46153846153847
    Physics     83.125
    Chemistry   85.0
    Biology     89.45454545454545
    */
    

    <4> 查询1号课程的最高分数

    select cid, max(score) max_score from sc
    where cid = '1'
    group by cid;
    
    /*
    1   98
    */
    

    <5> 求各个课程号及相应的选课人数

    select cid, count(distinct sid) count_sid from sc
    group by cid;
    
    /*
    1   15
    2   15
    3   13
    4   16
    5   12
    6   11
    */
    

    <6> 查询选修了3门以上的课程的学生学号

    select sid, count(distinct cid) count_cid from sc
    group by sid
    having count_cid > 3;
    
    /*
    95001   4
    95002   4
    95004   4
    95005   4
    95006   6
    95007   4
    95011   4
    95012   4
    95013   4
    95015   4
    95018   4
    95019   5
    95022   4
    */
    

    (3) hive的order by/sort by/distribute by

    <1> 查询学生信息,结果按学号全局有序

    select * from student
    order by sid;
    

    <2> 查询学生信息,结果区分性别按年龄有序

    set mapred.reduce.tasks=2;(默认-1)
    
    select * from student
    distribute by sex
    sort by age;
    
    /*
    95009   Alexande    female  18  MA
    95017   Jacob       female  18  IS
    95008   Zachary     female  18  CS
    95014   Nicholas    female  19  CS
    95019   Jack        female  19  IS
    95018   Ethan       female  19  IS
    95002   Aiden       female  19  IS
    95007   Jaden       female  19  MA
    95012   Andrew      female  20  CS
    95003   Michael     female  22  MA
    95021   Connor      male    17  MA
    95005   Tyler       male    18  MA
    95011   Noah        male    18  MA
    95015   Jackson     male    18  MA
    95010   Caden       male    19  CS
    95004   Ryan        male    19  IS
    95022   Logan       male    20  MA
    95001   Caleb       male    20  CS
    95020   Joshua      male    21  IS
    95013   Matthew     male    21  CS
    95016   Brayden     male    21  MA
    95006   Dylan       male    23  CS
    */
    

    (4) join查询

    <1> 查询每个学生及其选修课程的情况

    select s.sname, c.cname from 
    student s join sc on s.sid = sc.sid
    join course c on c.cid = sc.cid;
    
    /*
    Caleb   Chinese
    Caleb   Math
    Caleb   English
    Caleb   Physics
    Aiden   Math
    Aiden   English
    Aiden   Physics
    Aiden   Chemistry
    Michael Chinese
    Michael English
    ......
    */
    

    <2> 查询学生的得分情况

    select s.sname, c.cname, sc.score from 
    student s join sc on s.sid = sc.sid
    join course c on c.cid = sc.cid;
    
    /*
    Noah    Chinese 81
    Noah    Math    91
    Noah    English 81
    Noah    Physics 86
    Andrew  Chinese 81
    Andrew  English 78
    Andrew  Physics 85
    Andrew  Biology 98
    ......
    */
    

    <3> 查询选修2号课程且成绩在90分以上的所有学生

    select s.sname, sc.cid, sc.score from 
    student s join sc on s.sid = sc.sid
    where sc.cid = '2' and sc.score > 90;
    
    /*
    Ryan        2   92
    Tyler       2   92
    Caden       2   98
    Noah        2   91
    Nicholas    2   100
    Brayden     2   99
    Ethan       2   100
    Joshua      2   99
    Connor      2   93
    */
    

    <4> 查询所有学生的信息,如果在成绩表中有成绩,则输出成绩表中的课程号

    select s.*, sc.cid from 
    student s left join sc 
    on s.sid = sc.sid;
    
    /*
    ......
    95015   Jackson male    18  MA  1
    95015   Jackson male    18  MA  3
    95015   Jackson male    18  MA  4
    95015   Jackson male    18  MA  6
    95016   Brayden male    21  MA  1
    95016   Brayden male    21  MA  2
    95016   Brayden male    21  MA  4
    */
    

    (5) LEFT SEMI JOIN

    查询与"Jackson"在同一个系学习的学生

    select student.sname, student.dept from student left semi join 
    (select dept from student where sname = 'Jackson') tmp
    on student.dept = tmp.dept;
    
    /*
    Michael     MA
    Tyler       MA
    Jaden       MA
    Connor      MA
    Logan       MA
    Noah        MA
    Alexande    MA
    Jackson     MA
    Brayden     MA
    */
  • 相关阅读:
    集中式(SVN)和分布式(Git)版本控制系统的简单比较
    Mac 提示安装包已损坏
    React 获取 url 参数 —— this.props.match
    编写一个 Chrome 浏览器扩展程序
    webpack 配置学习笔记
    Python 进阶学习笔记
    Python 入门学习笔记
    (转)Unity3d各种坑
    unity3d 网页游戏客户端工程构建方案
    (转)在Unity3D的网络游戏中实现资源动态加载
  • 原文地址:https://www.cnblogs.com/beiyi888/p/9599871.html
Copyright © 2011-2022 走看看