zoukankan      html  css  js  c++  java
  • 使用java开发spark的wordcount程序(多种实现)

    package spark;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.sql.SparkSession;
    import scala.Tuple2;
    import java.util.Arrays;
    import java.util.List;
    
    /**
     * Created by kkxwz on 2018/5/24
     */
    public class WordCountApp {
    
        public static void main(String[] args) {
    
    //        //spark 2.0版本之前
    //        SparkConf sparkConf = new SparkConf().setAppName("WordCountApp").setMaster("local[2]");
    //        JavaSparkContext spark = new JavaSparkContext(sparkConf);
    //        JavaRDD<String> lines= spark.textFile("/Users/zl/data/sparksqldata/hello.txt");
    
    //        spark 2.0版本之后(建议)
            SparkSession spark = SparkSession.builder()
                    .master("local[2]")
                    .appName("WordCountApp")
                    .getOrCreate();
    
            JavaRDD<String> lines= spark.read().textFile("/Users/zl/data/sparksqldata/hello.txt").javaRDD();
            JavaRDD<String> words = lines.flatMap(line -> Arrays.asList(line.split("	")).iterator());
            JavaPairRDD<String, Integer> counts = words
                    .mapToPair(word -> new Tuple2<String, Integer>(word, 1))
                    .reduceByKey((x, y)-> x+y);
    
            //第一种输出方式:
            counts.foreach(count -> System.out.println(count._1() + ":" + count._2()));
    
            //第二种输出方式:
    //        List<Tuple2<String, Integer>> output = counts.collect();
    //
    //        for(Tuple2<String, Integer> tuple : output){
    //            System.out.println(tuple._1() + ":" + tuple._2());
    //        }
    
            spark.stop();
        }
    
    }
    
    // PS:
    //   1、jdk版本至少为1.8
    //   2、最好关联源码,查看返回类型学习!!!
    

      

  • 相关阅读:
    Security headers quick reference Learn more about headers that can keep your site safe and quickly look up the most important details.
    Missing dollar riddle
    Where Did the Other Dollar Go, Jeff?
    proteus 与 keil 联调
    cisco router nat
    router dhcp and dns listen
    配置802.1x在交换机的端口验证设置
    ASAv931安装&初始化及ASDM管理
    S5700与Cisco ACS做802.1x认证
    playwright
  • 原文地址:https://www.cnblogs.com/kkxwz/p/9083796.html
Copyright © 2011-2022 走看看