zoukankan      html  css  js  c++  java
  • sparksql hive作为数据源

    根据官方文档的说法,要把hive-site.xml,core-site.xml,hdfs-site.xml拷贝到spark的conf目录下,保证mysql已经启动

    java

     1 public class Demo {
     2     private static SparkSession session = SparkSession.builder().appName("demo").enableHiveSupport()
     3             .config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate();
     4 
     5     public static void main(String[] args) {
     6         session.sql("drop table if exists students_info");
     7         session.sql("create table if not exists students_info(name string,age int) "
     8                 + "row format delimited fields terminated by '	' 
    ");
     9 
    10         // 将数据导入学生信息表
    11         session.sql(
    12                 "load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info");
    13 
    14         session.sql("drop table if exists students_score");
    15         session.sql("create table if not exists students_score(name string,score int)  
    "
    16                 + "row format delimited fields terminated by '	' 
    ");
    17 
    18         // 将数据导入学生成绩表
    19         session.sql(
    20                 "load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score");
    21 
    22         // 查询
    23         Dataset<Row> dataset = session.sql(
    24                 "select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score>80");
    25 
    26         // 将dataset中的数据保存到hive中
    27         session.sql("drop table if exists students_result");
    28         dataset.write().saveAsTable("students_result");
    29 
    30         // 将hive中的表转成dataset,查看数据是否成功保存
    31         Dataset<Row> table = session.table("students_result");
    32         table.show();
    33 
    34         session.stop();
    35 
    36     }
    37 }

    scala

     1 object Demo {
     2   def main(args: Array[String]): Unit = {
     3     val session = SparkSession.builder().appName("demo").enableHiveSupport().config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate()
     4 
     5     session.sql("drop table if exists students_info")
     6     session.sql("create table if not exists students_info(name string,age int) 
     row format delimited fields terminated by '	'")
     7 
     8     session.sql("load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info")
     9 
    10     session.sql("drop table if exists students_score")
    11     session.sql("create table if not exists students_score(name string,score int) 
     row format delimited fields terminated by '	'")
    12 
    13     session.sql("load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score")
    14 
    15     //保存到hive中
    16     session.sql("drop table if exists students_result")
    17     session.sql("select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score >90").write.saveAsTable("students_result")
    18 
    19     //检查数据是否保存
    20     val df = session.table("students_result")
    21     df.show()
    22 
    23     session.stop()
    24   }
    25 }
  • 相关阅读:
    开源数学库
    ZendFramework综述
    MySQL临时表
    GMP for PHP
    ubuntu的LAMP环境搭建
    linux与windows区别
    js对象与打印对象
    MySQL 多库操作
    ubuntu文件系统综述
    波形声音整理资料(for Project1)
  • 原文地址:https://www.cnblogs.com/tele-share/p/10397525.html
Copyright © 2011-2022 走看看