zoukankan      html  css  js  c++  java
  • Hive SQL基础操作

    创建表

    hive

    查看本地的文件
    #Can execute local commands within CLI, place a command in between ! and ;
    !cat data/text.txt;

    文件的内容
    user1,Funny Story,1343182026191
    user2,Cool Deal,1343182133839
    user4,Interesting Post,1343182154633
    user5,Yet Another Blog,13431839394

    建表
    CREATE TABLE posts (user STRING, post STRING, time BIGINT)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ','
    STORED AS TEXTFILE;

    hive> show tables;
    hive> describe posts;

    加载数据

    把文件加载到表中
    LOAD DATA LOCAL INPATH 'text.txt'
    OVERWRITE INTO TABLE posts;

    – Load data from HDFS location
    hive> LOAD DATA INPATH '/training/hive/user-posts.txt'
    > OVERWRITE INTO TABLE posts;

    – Load data from a local file system
    hive> LOAD DATA LOCAL INPATH 'data/user-posts.txt'
    > OVERWRITE INTO TABLE posts;

    如果在hdfs里存在大量的同类型格式的文件,可以通过已有文件来建表

    hive> CREATE EXTERNAL TABLE posts
    > (user STRING, post STRING, time BIGINT)
    > ROW FORMAT DELIMITED
    > FIELDS TERMINATED BY ','
    > STORED AS TEXTFILE
    > LOCATION '/training/hive/';


    测试一下是否在hdfs里
    hdfs dfs -cat /user/hive/warehouse/posts/text.txt

    查询数据


    select count (1) from posts;
    select * from posts where user="user2";
    select * from posts where time<=1343182133839 limit 2;

    DROP TABLE posts;
    将会在hdfs系统里执行删除
    hdfs dfs -ls /user/hive/warehouse/

    带分区的表

    为了提高性能,还可以使用分区,如果设置了分区,用户
    在插入数据时必须指定一个分区

    CREATE TABLE posts1 (user STRING, post STRING, time BIGINT)
    PARTITIONED BY(country STRING)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ','
    STORED AS TEXTFILE;

    describe posts1;

    用户在插入数据时必须指定一个分区
    LOAD DATA LOCAL INPATH 'text.txt'
    OVERWRITE INTO TABLE posts1 PARTITION(country='US');

    LOAD DATA LOCAL INPATH 'text.txt'
    OVERWRITE INTO TABLE posts1 PARTITION(country='AUSTRALIA');

    显示已有的分区
    show partitions posts;
    hdfs dfs -ls -R /user/hive/warehouse/posts

    查询
    select * from posts where country='US' limit 10;

    Join表

    CREATE TABLE posts_likes (user STRING, post STRING, likes_count INT);
    INSERT OVERWRITE TABLE posts_likes
    > SELECT p.user, p.post, l.count
    > FROM posts p JOIN likes l ON (p.user = l.user);

    Outer Join表
    SELECT p.*, l.*
    FROM posts p LEFT OUTER JOIN likes l ON (p.user = l.user)
    limit 10;
    SELECT p.*, l.*
    FROM posts p RIGHT OUTER JOIN likes l ON (p.user = l.user)
    limit 10;
    SELECT p.*, l.*
    FROM posts p FULL OUTER JOIN likes l ON (p.user = l.user)
    limit 10;
    Resources


    JDBC Program

    import java.sql.SQLException;
    import java.sql.Connection;
    import java.sql.ResultSet;
    import java.sql.Statement;
    import java.sql.DriverManager;
    
    public class HiveCreateTable {
    private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";
    
    public static void main(String[] args) throws SQLException {
    
    // Register driver and create driver instance
    Class.forName(driverName);
    
    // get connection
    Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", "");
    
    // create statement
    Statement stmt = con.createStatement();
    
    // execute statement
    stmt.executeQuery("CREATE TABLE IF NOT EXISTS "
    +" employee ( eid int, name String, "
    +" salary String, destignation String)"
    +" COMMENT ‘Employee details’"
    +" ROW FORMAT DELIMITED"
    +" FIELDS TERMINATED BY ‘	’"
    +" LINES TERMINATED BY ‘
    ’"
    +" STORED AS TEXTFILE;");
    
    System.out.println(“ Table employee created.”);
    con.close();
    }
    }

    $ javac HiveCreateDb.java
    $ java HiveCreateDb

    import java.sql.SQLException;
    import java.sql.Connection;
    import java.sql.ResultSet;
    import java.sql.Statement;
    import java.sql.DriverManager;
    
    public class HiveQLWhere {
    private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";
    
    public static void main(String[] args) throws SQLException {
    
    // Register driver and create driver instance
    Class.forName(driverName);
    
    // get connection
    Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", "");
    
    // create statement
    Statement stmt = con.createStatement();
    
    // execute statement
    Resultset res = stmt.executeQuery("SELECT * FROM employee WHERE salary>30000;");
    
    System.out.println("Result:");
    System.out.println(" ID 	 Name 	 Salary 	 Designation 	 Dept ");
    
    while (res.next()) {
    System.out.println(res.getInt(1) + " " + res.getString(2) + " " + res.getDouble(3) + " " + res.getString(4) + " " + res.getString(5));
    }
    con.close();
    }
    }
    View Code

    $ javac HiveQLWhere.java
    $ java HiveQLWhere

  • 相关阅读:
    webrtc系列之-像老鼠一样打洞
    Ubuntu记录用户IP访问操作信息工具
    OPENVIDU实现同一用户同时发布多个流媒体
    如何使用Nginx-rtmp搭建简易的HLS直播系统
    python实现数据库主从状态监控
    简单分析实现运维利器---批量操作bashshell
    《我与Windows Server 2008R2那点事儿》之域控账户故障事件
    常用动态路由协议之IS-IS
    在云服务器上搭建Python开发环境
    基于Python3接口自动化测试开发相关常用方法
  • 原文地址:https://www.cnblogs.com/starcrm/p/7451939.html
Copyright © 2011-2022 走看看