zoukankan      html  css  js  c++  java
  • java实现spark常用算子之collect


    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.Function;

    import java.util.Arrays;
    import java.util.List;

    /**
    * collect 算子:
    * 将集群中其他节点的数据拉去到driver所在的机器上
    * 如果数据量比较大时,容易造成内存溢出
    * foreach遍历数据是在从节点上执行,collect是在driver机器上执行
    */
    public class CollectOpeartor {

    public static void main(String[] args){
    SparkConf conf = new SparkConf().setMaster("local").setAppName("collect");
    JavaSparkContext sc = new JavaSparkContext(conf);
    List<String> names = Arrays.asList("w1","w2","w3","w4","w5");

    JavaRDD<String> nameRdd = sc.parallelize(names);

    JavaRDD<String> tempRdd =nameRdd.map(new Function<String, String>() {
    @Override
    public String call(String s) throws Exception {
    return "001"+s;
    }
    });

    List<String> result = tempRdd.collect();

    for(int i = 0; i < result.size(); i++){
    System.err.println(result.get(i));
    }
    }

    }

    微信扫描下图二维码加入博主知识星球,获取更多大数据、人工智能、算法等免费学习资料哦!

  • 相关阅读:
    mysql复制那点事
    全排列问题
    56. Merge Interval
    2. Add Two Numbers
    20. Valid Parentheses
    121. Best Time to Buy and Sell Stock
    120. Triangle
    96. Unique Binary Search Trees
    91. Decode Ways
    72. Edit Distance
  • 原文地址:https://www.cnblogs.com/guokai870510826/p/11598603.html
Copyright © 2011-2022 走看看