zoukankan      html  css  js  c++  java
  • java spark转换算子join、leftOuterJoin、rightOuterJoin、fullOuterjoin

    /**
     * # _*_ coding:utf-8 _*_
     * # Author:xiaoshubiao
     * # Time : 2020/5/14 8:33
     **/
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.Function2;
    import scala.Tuple2;
    
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    public class union_test {
        public static void main(String[] args) {
            SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("union_test");
            JavaSparkContext sc = new JavaSparkContext(conf);
            List<String> list = Arrays.asList("a","b","c","d","e");
            List<String> list2 = Arrays.asList("a","b","c","f","h");
            JavaRDD<String> parallelize = sc.parallelize(list,2);
            JavaRDD<String> parallelize2 = sc.parallelize(list2,2);
            JavaPairRDD javaPairRDD = parallelize.mapToPair(x -> new Tuple2(x, 1));
            JavaPairRDD javaPairRDD1 = parallelize2.mapToPair(x -> new Tuple2(x, 2));
            javaPairRDD.join(javaPairRDD1).collect().forEach(x->System.out.println("join"+x));
            javaPairRDD.leftOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("leftOuterJoin"+x));
            javaPairRDD.rightOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("rightOuterJoin"+x));
            javaPairRDD.fullOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("fullOuterJoin"+x));
    
    
        }
    }
  • 相关阅读:
    信号量Semaphore
    进程锁Lock
    创建多进程Process
    什么是进程?什么是线程?进程和线程之间的区别是什么?
    Git命令
    xss攻击问题以及如何防范
    ORM跨表查询问题
    for循环将字典添加到列表中出现覆盖前面数据的问题
    Linux源码的目录结构
    嵌入式中 MMU的功能
  • 原文地址:https://www.cnblogs.com/7749ha/p/12888272.html
Copyright © 2011-2022 走看看