zoukankan      html  css  js  c++  java
  • spark入门: wordcount-java

    wordcount-java:

    pom.xml文件如下:

    <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-core_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-sql_2.10</artifactId>
          <version>1.3.0</version>
          </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-hive_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>2.4.1</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming-kafka_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
      </dependencies>
    package cn.spark.study.core;
    
    import java.util.Arrays;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    
    import scala.Tuple2;
    
    public class WordCount3 {
    	public static void main(String[] args) {
    		SparkConf conf=new SparkConf().setAppName("WorldCountLocal").setMaster("local");
    		JavaSparkContext sc=new JavaSparkContext(conf);
    		JavaRDD<String> lines=sc.textFile("C:\Users\wanglonglong\Desktop\word.txt");
    		JavaRDD<String> words=lines.flatMap(new FlatMapFunction<String, String>() {
    
    			@Override
    			public Iterable<String> call(String t) throws Exception {
    				// TODO Auto-generated method stub
    				return Arrays.asList(t.split(" "));
    			}
    		});
    		JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
    			
    			private static final long serialVersionUID=1;
    			@Override
    			public Tuple2<String, Integer> call(String word) throws Exception {
    				return new Tuple2<String, Integer>(word,1);
    			}
    		});
    		JavaPairRDD<String, Integer> wordCounts = pairs.reduceByKey(
    		        new Function2<Integer, Integer, Integer>() {
    		            private static final long serialVersionUID = 1L;
    		            public Integer call(Integer v1, Integer v2) throws Exception {
    		                return v1 + v2;
    		            }
    		        });
    		wordCounts.foreach(new VoidFunction<Tuple2<String,Integer>>() {
    		       private static final long serialVersionUID = 1L;
    		       public void call(Tuple2<String, Integer> wordCount) throws Exception {
    		           System.out.println("("+wordCount._1 + "," + wordCount._2 + " )");
    		       }
    		   });
    		   sc.close();
    			
    	}
    	
    }
    

     

  • 相关阅读:
    『数学』--数论--组合数+卢卡斯定理+扩展卢卡斯定理
    Lucene高亮
    Linux 计划任务
    Lucene.net(4.8.0) 学习问题记录二: 分词器Analyzer中的TokenStream和AttributeSource
    Asp.net Core 异步调用 Task await async 的梳理
    Asp.net core 中的依赖注入
    Lucene.net(4.8.0) 学习问题记录一:分词器Analyzer的构造和内部成员ReuseStategy
    Git 使用篇二:小组协作开发
    Git 使用篇二:搭建远程服务器
    Git 使用篇一:初步使用GitHub,下载安装git,并上传项目
  • 原文地址:https://www.cnblogs.com/wakerwang/p/9478516.html
Copyright © 2011-2022 走看看