zoukankan      html  css  js  c++  java
  • Scala_Load csv data to hive via spark2.1_via pass parameters_HiveAllType

    prepare CSV data

    NT,col_SMALLINT,col_BIGINT,col_INT,col_FLOAT,col_DOUBLE,col_DECIMAL,col_TIMESTAMP,col_DATE,col_INTERVAL,col_STRING,col_VARCHAR,col_CHAR,col_BOOLEAN,col_BINARY
    100Y,100S,100L,100,1.11,2.22,4.44,2013­01-01 01:50:50,2013­01-01,2 SECONDS,"""123""",ab,a,TRUE,111

    create CSV file

    [Dev root @ sd-9c1f-2eac /tmp/pl62716]
    # vi CsvLoadToHive.csv
    col_TINYINT,col_SMALLINT,col_BIGINT,col_INT,col_FLOAT,col_DOUBLE,col_DECIMAL,col_TIMESTAMP,col_DATE,col_INTERVAL,col_STRIN
    G,col_VARCHAR,col_CHAR,col_BOOLEAN,col_BINARY
    100Y,100S,100L,100,1.11,2.22,4.44,2013­01-01 01:50:50,2013­01-01,2 SECONDS,"""123""",ab,a,TRUE,111

    scala test code

    package com.liupu
    import org.apache.spark.{ SparkContext, SparkConf }
    import org.apache.spark.sql.hive.HiveContext
    import org.apache.spark.sql.hive.orc._
    object LoadCsv3 {
      def main(args: Array[String]) {
        var sourceCsvPath = args(0)
        var targetPath = args(1)
        var hiveTableName = args(2)
    
        var sc = new SparkContext()
        val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
        val df = hiveContext.read
          .format("com.databricks.spark.csv")
          .option("header", "true")
          .option("inferSchema", "true")
          .load(sourceCsvPath)
        val selectedData = df.select("col_TINYINT","col_SMALLINT","col_BIGINT","col_INT","col_FLOAT","col_DOUBLE","col_DECIMAL","col_TIMESTAMP","col_DATE","col_INTERVAL","col_STRING","col_VARCHAR","col_CHAR","col_BOOLEAN","col_BINARY")
        selectedData.write.format("orc").option("header", "true").save(targetPath)
        hiveContext.sql(s"create external table $hiveTableName(col_TINYINT TINYINT,col_SMALLINT SMALLINT,col_BIGINT BIGINT,col_INT INT,col_FLOAT FLOAT,col_DOUBLE DOUBLE,col_DECIMAL DECIMAL,col_TIMESTAMP TIMESTAMP,col_DATE DATE,col_INTERVAL INTERVAL,col_STRING STRING,col_VARCHAR VARCHAR,col_CHAR CHAR,col_BOOLEAN BOOLEAN,col_BINARY BINARY) stored as orc location '$targetPath'")
        hiveContext.sql("show tables").collect().foreach(println)
        sc.stop()
      }
    }

    spark test

  • 相关阅读:
    vsCode 使用 PHP Intelephense插件函数跳转跟踪
    acme.sh 生成证书一直卡在Getting domain auth token for each domain
    用命令行执行php脚本输出乱码
    js检测是否是手机平台
    个人知识结构索引
    离线环境下安装ansible,借助有网环境下pip工具
    zabbix之微信告警(python版):微信个人报警,微信企业号告警脚本
    使用ansible结合FTP部署zabbix_agent
    datax的可视化-datax-web
    Finereport决策报表
  • 原文地址:https://www.cnblogs.com/liupuLearning/p/6567822.html
Copyright © 2011-2022 走看看