zoukankan      html  css  js  c++  java
  • SparkSQL读写外部数据源-jext文件和table数据源的读写

    object ParquetFileTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("ParquetFileTest")
          .getOrCreate()
    
        //1: 将json文件数据转化成parquet文件数据
        val df = spark.read.json(s"${BASE_PATH}/people.json")
        df.show()
    
        //gzip、lzo、snappy
        df.write.option("compression", "snappy").parquet(s"${BASE_PATH}/parquet")
        //2: 读取parquet文件
        val parquetDF = spark.read.parquet(s"${BASE_PATH}/parquet")
        parquetDF.show()
    
        //3: parquet schema merge
        //全局设置spark.sql.parquet.mergeSchema = true
        df.toDF("age", "first_name").write.parquet(s"${BASE_PATH}/parquet_schema_change")
        val changedDF = spark.read.parquet(s"${BASE_PATH}/parquet_schema_change")
        changedDF.show()
    
        val schemaMergeDF = spark.read.option("mergeSchema", "true").parquet(s"${BASE_PATH}/parquet",
          s"${BASE_PATH}/parquet_schema_change")
        schemaMergeDF.show()
    
        spark.stop()
      }
    }
    

      

    object OrcFileTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("OrcFileTest")
          .getOrCreate()
    
        //1: 将json文件数据转化成orc文件数据
        val df = spark.read.json(s"${BASE_PATH}/people.json")
        df.show()
    
        df.write.option("compression", "snappy").orc(s"${BASE_PATH}/orc")
    
        val orcFileDF = spark.read.orc(s"${BASE_PATH}/orc")
        orcFileDF.show()
    
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    软件设计文档
    java基础路线与详细知识点
    hdu 2203 亲和串 kmp
    UVALive 6915 J
    UVALive 6911 F
    UVALive 6906 A
    hdu 3746 Cyclic Nacklace KMP
    hdu 1686 Oulipo kmp算法
    hdu1711 Number Sequence kmp应用
    hdu4749 kmp应用
  • 原文地址:https://www.cnblogs.com/tesla-turing/p/11489093.html
Copyright © 2011-2022 走看看