zoukankan      html  css  js  c++  java
  • spark parquet 从hdfs 上读 和写

    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.sql.DataFrame;
    import org.apache.spark.sql.SQLContext;
    import org.apache.spark.sql.SaveMode;
    
    /**
     * @author Administrator
     *
     */
    public class GenericLoadSave {
    
    	public static void main(String[] args) {
    		SparkConf conf = new SparkConf() 
    				.setAppName("GenericLoadSave")
    				.setMaster("local");
    		JavaSparkContext sc = new JavaSparkContext(conf);
    		SQLContext sqlContext = new SQLContext(sc);
    	//parquet 带表结构 ???
    		DataFrame usersDF = sqlContext.read().load(	"hdfs://hadoop1:9000/input/users.parquet");
    		//没有指定format  就是写入到磁盘的数据格式     默认是parquet
    		usersDF.select("name", "favorite_color").write().mode(SaveMode.Overwrite).save("hdfs://hadoop1:9000/output/namesAndFavColors_scala");   
    		
    		DataFrame pDF = sqlContext.read().parquet("hdfs://hadoop1:9000/output/namesAndFavColors_scala");
    		pDF.show();
    	}
    	
    }
    

  • 相关阅读:
    lcx
    交换网络中存在的攻击及加固方法概括
    Hello world~
    CCSPSECURE1 安全理论
    SQL注入经验总结
    Access Control List
    初探java集合框架图
    深入浅出分析LinkedHashMap
    红黑树实现分析
    深入浅出的分析TreeMap
  • 原文地址:https://www.cnblogs.com/TendToBigData/p/10501300.html
Copyright © 2011-2022 走看看