zoukankan      html  css  js  c++  java
  • SparkSQL读写外部数据源-jext文件和table数据源的读写

    object ParquetFileTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("ParquetFileTest")
          .getOrCreate()
    
        //1: 将json文件数据转化成parquet文件数据
        val df = spark.read.json(s"${BASE_PATH}/people.json")
        df.show()
    
        //gzip、lzo、snappy
        df.write.option("compression", "snappy").parquet(s"${BASE_PATH}/parquet")
        //2: 读取parquet文件
        val parquetDF = spark.read.parquet(s"${BASE_PATH}/parquet")
        parquetDF.show()
    
        //3: parquet schema merge
        //全局设置spark.sql.parquet.mergeSchema = true
        df.toDF("age", "first_name").write.parquet(s"${BASE_PATH}/parquet_schema_change")
        val changedDF = spark.read.parquet(s"${BASE_PATH}/parquet_schema_change")
        changedDF.show()
    
        val schemaMergeDF = spark.read.option("mergeSchema", "true").parquet(s"${BASE_PATH}/parquet",
          s"${BASE_PATH}/parquet_schema_change")
        schemaMergeDF.show()
    
        spark.stop()
      }
    }
    

      

    object OrcFileTest {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession
          .builder()
          .appName("OrcFileTest")
          .getOrCreate()
    
        //1: 将json文件数据转化成orc文件数据
        val df = spark.read.json(s"${BASE_PATH}/people.json")
        df.show()
    
        df.write.option("compression", "snappy").orc(s"${BASE_PATH}/orc")
    
        val orcFileDF = spark.read.orc(s"${BASE_PATH}/orc")
        orcFileDF.show()
    
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    javaweb
    反射 day1
    JDBC-day1
    总结
    day5
    day4
    day3
    18.10.17 考试总结
    洛谷P2172 [bzoj] 2150 部落战争
    18.10.15 考试总结
  • 原文地址:https://www.cnblogs.com/tesla-turing/p/11489093.html
Copyright © 2011-2022 走看看