zoukankan      html  css  js  c++  java
  • hive多表联合查询(GroupLens->Users,Movies,Ratings表)

    hive (UserMovieRating)> create table if not exists Users(
                          > UserID int comment 'user id',
                          > Gender string comment 'user sex',
                          > Age int comment '1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+',
                          > Occupation int comment '0-20 represents different jobs',
                          > ZipCode string comment 'your home zip code')
                          > row format delimited fields terminated by ' '
                          > stored as textfile;
    OK
    Time taken: 0.249 seconds
    hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/users.txt' overwrite into table Users;
    Copying data from file:/home/landen/MahoutTest/users.txt
    Copying file: file:/home/landen/MahoutTest/users.txt
    Loading data to table usermovierating.users
    Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/users
    Table usermovierating.users stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 110208, raw_data_size: 0]
    OK
    Time taken: 0.745 seconds
    hive (UserMovieRating)> select * from Users limit 10;
    OK
    userid    gender    age    occupation    zipcode
    1    F    1    10    48067
    2    M    56    16    70072
    3    M    25    15    55117
    4    M    45    7    02460
    5    M    25    20    55455
    6    F    50    9    55117
    7    M    35    1    06810
    8    M    25    12    11413
    9    M    25    17    61614
    10    F    35    1    95370
    Time taken: 0.096 seconds

    hive (UserMovieRating)> create table if not exists Movies(            
                          > MovieID int comment 'movie id',               
                          > MovieName string comment 'movie name',        
                          > ReleasedDate int comment 'released year',     
                          > MovieType string comment 'movie type')        
                          > row format delimited fields terminated by ' '
                          > stored as textfile;                           
    OK
    Time taken: 0.183 seconds
    hive (UserMovieRating)> show tables;
    OK
    tab_name
    movies
    users
    Time taken: 0.083 seconds

    hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/Processedmovies.txt' overwrite into table Movies;
    Copying data from file:/home/landen/MahoutTest/Processedmovies.txt
    Copying file: file:/home/landen/MahoutTest/Processedmovies.txt
    Loading data to table usermovierating.movies
    Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/movies
    Table usermovierating.movies stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 155900, raw_data_size: 0]
    OK
    Time taken: 0.695 seconds
    hive (UserMovieRating)> select * from Movies limit 10;
    OK
    movieid    moviename    releaseddate    movietype
    1    Toy Story    1995    Animation,Children's,Comedy
    2    Jumanji    1995    Adventure,Children's,Fantasy
    3    Grumpier Old Men    1995    Comedy,Romance
    4    Waiting to Exhale    1995    Comedy,Drama
    5    Father of the Bride Part II    1995    Comedy
    6    Heat    1995    Action,Crime,Thriller
    7    Sabrina    1995    Comedy,Romance
    8    Tom and Huck    1995    Adventure,Children's
    9    Sudden Death    1995    Action
    10    GoldenEye    1995    Action,Adventure,Thriller
    Time taken: 0.095 seconds

    hive (UserMovieRating)> create table if not exists Rating(                      
                          > UserID int comment 'user id',                           
                          > MovieID int comment 'movie id(1-3952)',                 
                          > Rating int comment 'Ratings are made on a 5-star scale',
                          > RatingTime string comment 'user rates time')            
                          > row format delimited fields terminated by ' '          
                          > stored as textfile;                                     
    OK
    Time taken: 1.3 seconds
    hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/ProcessedRating.txt' overwrite into table Rating;
    Copying data from file:/home/landen/MahoutTest/ProcessedRating.txt
    Copying file: file:/home/landen/MahoutTest/ProcessedRating.txt
    Loading data to table usermovierating.rating
    Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/rating
    Table usermovierating.rating stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 21593504, raw_data_size: 0]
    OK
    Time taken: 3.293 seconds
    hive (UserMovieRating)> select * from Rating limit 10;
    OK
    userid    movieid    rating    ratingtime
    1    1193    5    978300760
    1    661    3    978302109
    1    914    3    978301968
    1    3408    4    978300275
    1    2355    5    978824291
    1    1197    3    978302268
    1    1287    5    978302039
    1    2804    5    978300719
    1    594    4    978302268
    1    919    4    978301368
    Time taken: 0.658 seconds
    hive (UserMovieRating)> describe users;
    OK
    col_name    data_type    comment
    userid    int    user id
    gender    string    user sex
    age    int    1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+
    occupation    int    0-20 represents different jobs
    zipcode    string    your home zip code
    Time taken: 0.514 seconds
    hive (UserMovieRating)> describe movies;
    OK
    col_name    data_type    comment
    movieid    int    movie id
    moviename    string    movie name
    releaseddate    int    released year
    movietype    string    movie type
    Time taken: 0.085 seconds
    hive (UserMovieRating)> describe ratings;
    FAILED: SemanticException [Error 10001]: Table not found ratings
    hive (UserMovieRating)> describe rating;
    OK
    col_name    data_type    comment
    userid    int    user id
    movieid    int    movie id(1-3952)
    rating    int    Ratings are made on a 5-star scale
    ratingtime    string    user rates time
    Time taken: 0.121 seconds
    Users,Movies,Rating三表联合查询:

    hive (UserMovieRating)> select u.userid,u.occupation,m.moviename,r.rating
                                       > from rating r   
                                       > join users u on r.userid = u.userid
                                       > join movies m on r.movieid = m.movieid;

  • 相关阅读:
    split函数的修改,通过存储过程实现整表所有列的拆分
    在本机上安装zabbix,来监控服务器 三
    我第一篇博客
    渗透测试常用工具-网络嗅探
    渗透测试常用工具-Metasploit
    渗透测试常用工具-ADMsnmp进行snmp分析
    渗透测试常用工具-amap服务枚举
    渗透测试常用工具-端口扫描
    渗透测试常用工具-目标识别
    Meterpreter(后渗透命令)笔记
  • 原文地址:https://www.cnblogs.com/likai198981/p/3461462.html
Copyright © 2011-2022 走看看