zoukankan      html  css  js  c++  java
  • Spark入门案例


    Scala版

    import org.apache.spark.{SparkConf, SparkContext}
    
    object WordCountScala {
      def main(args: Array[String]): Unit = {
        val conf: SparkConf = new SparkConf().setAppName("WordCountScala").setMaster("local[1]")
        val sc: SparkContext = new SparkContext(conf)
        val data = Array("hello world", "simple app is good", "good world")
        val result: Array[(String, Int)] = sc.parallelize(data)
          .flatMap(_.split(" "))
          .map((_, 1))
          .reduceByKey(_ + _)
          .collect()
        result.foreach(println)
      }
    }
    

    Java版

    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaSparkContext;
    import scala.Tuple2;
    
    import java.util.Arrays;
    import java.util.List;
    
    
    public class WordCountJava {
        public static void main(String[] args) {
            SparkConf conf = new SparkConf().setAppName("WordCountJava").setMaster("local[1]");
            JavaSparkContext jsc = new JavaSparkContext(conf);
            List<String> data = Arrays.asList("hello world", "simple app is good", "good world");
            List<Tuple2<String, Integer>> result = jsc.parallelize(data)
                    .flatMap(s -> Arrays.asList(s.split(" ")).iterator())
                    .mapToPair(v -> new Tuple2<>(v, 1))
                    .reduceByKey(Integer::sum)
                    .collect();
            result.forEach(System.out::println);
        }
    }
    

    计算结果

    (is,1)
    (app,1)
    (simple,1)
    (hello,1)
    (good,2)
    (world,2)
    

    可以看出在Spark中,Scala的语法显然要比Java简洁许多,毕竟Spark是用Scala写的,更加纯粹的函数式编程,建议尽可能优先采用Scala学习与使用Spark。



    尊重写作权利,转载请注明出处 ^_^
  • 相关阅读:
    CentOS7安装mysql-8
    zabbix监控规划及实施
    集群技术
    自动化脚本-配置LVS(DR模式)
    Pacemaker+ISCSI实现Apache高可用-配置
    创建集群corosync
    我的第一个python程序——猜数字
    质量报告
    新需求测试与回归测试
    冒烟测试
  • 原文地址:https://www.cnblogs.com/convict/p/14828084.html
Copyright © 2011-2022 走看看