zoukankan      html  css  js  c++  java
  • java实现spark常用算子之mapPartitions


    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    import java.util.*;

    /**
    * mapPartitions 算子
    * 针对partition的操作,一次会处理一个partition的所有数据
    */
    public class MapPartitionsOperator {

    public static void main(String[] args){
    SparkConf conf = new SparkConf().setMaster("local").setAppName("mapPartitions");
    JavaSparkContext sc = new JavaSparkContext(conf);
    List<String> names = Arrays.asList("w1","w2","w3","w4");
    JavaRDD<String> nameRdd = sc.parallelize(names,2);

    final Map<String,Integer> scoreMap = new HashMap<>();
    scoreMap.put("w1",1);
    scoreMap.put("w2",2);
    scoreMap.put("w3",3);
    scoreMap.put("w4",4);

    JavaRDD<Integer> result = nameRdd.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
    private static final long serialVersionUID = 1L;

    @Override
    public Iterator<Integer> call(Iterator<String> iterator) throws Exception{
    List<Integer> list = new ArrayList<>();
    while(iterator.hasNext()){
    String name = iterator.next();
    int score = scoreMap.get(name);
    list.add(score);
    }
    return list.iterator();
    }
    });


    result.foreach(new VoidFunction<Integer>() {
    @Override
    public void call(Integer integer) throws Exception {
    System.err.println("mapPartitions算子:"+integer);
    }
    });

    result.foreachPartition(new VoidFunction<Iterator<Integer>>() {
    @Override
    public void call(Iterator<Integer> integerIterator) throws Exception {
    while (integerIterator.hasNext()){
    System.err.println("mapPartitions算子遍历:"+integerIterator.next());
    }
    }
    });


    }
    }

    微信扫描下图二维码加入博主知识星球,获取更多大数据、人工智能、算法等免费学习资料哦!

  • 相关阅读:
    OSG中的示例程序简介(转)
    空间点到直线垂足坐标的解算方法 (转)
    OpenscenGraph中控制swapbuffer的方法(用于多机大屏幕同步显示机制)
    吏治 ? 官治 ?
    C++中使用union的几点思考(转)
    一个穷人移民美国三年的生活经历(转)
    展望99股市:谁是重组大黑马?(转)
    mysql 在一个实例运行情况下再搭建一个实例
    在CentOS下安装crontab服务
    Zabbix监控之迁移zabbix server
  • 原文地址:https://www.cnblogs.com/guokai870510826/p/11598933.html
Copyright © 2011-2022 走看看