zoukankan      html  css  js  c++  java
  • spark入门: wordcount-java

    wordcount-java:

    pom.xml文件如下:

    <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-core_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-sql_2.10</artifactId>
          <version>1.3.0</version>
          </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-hive_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>2.4.1</version>
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming-kafka_2.10</artifactId>
          <version>1.3.0</version>
        </dependency>
      </dependencies>
    package cn.spark.study.core;
    
    import java.util.Arrays;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    
    import scala.Tuple2;
    
    public class WordCount3 {
    	public static void main(String[] args) {
    		SparkConf conf=new SparkConf().setAppName("WorldCountLocal").setMaster("local");
    		JavaSparkContext sc=new JavaSparkContext(conf);
    		JavaRDD<String> lines=sc.textFile("C:\Users\wanglonglong\Desktop\word.txt");
    		JavaRDD<String> words=lines.flatMap(new FlatMapFunction<String, String>() {
    
    			@Override
    			public Iterable<String> call(String t) throws Exception {
    				// TODO Auto-generated method stub
    				return Arrays.asList(t.split(" "));
    			}
    		});
    		JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
    			
    			private static final long serialVersionUID=1;
    			@Override
    			public Tuple2<String, Integer> call(String word) throws Exception {
    				return new Tuple2<String, Integer>(word,1);
    			}
    		});
    		JavaPairRDD<String, Integer> wordCounts = pairs.reduceByKey(
    		        new Function2<Integer, Integer, Integer>() {
    		            private static final long serialVersionUID = 1L;
    		            public Integer call(Integer v1, Integer v2) throws Exception {
    		                return v1 + v2;
    		            }
    		        });
    		wordCounts.foreach(new VoidFunction<Tuple2<String,Integer>>() {
    		       private static final long serialVersionUID = 1L;
    		       public void call(Tuple2<String, Integer> wordCount) throws Exception {
    		           System.out.println("("+wordCount._1 + "," + wordCount._2 + " )");
    		       }
    		   });
    		   sc.close();
    			
    	}
    	
    }
    

     

  • 相关阅读:
    最新pear安装
    php垃圾收集机制
    strstr的实现
    PHP 快速生成目录树
    php 去掉字符串
    php批量生成mysql触发器定义语句
    HTML的知识点讲解(HTML版本)
    mysql数据库怎么使用,mysql的使用方法
    sublime text3Emmet:HTML/CSS代码快速编写神器
    图片滚动插件jquery bxslider
  • 原文地址:https://www.cnblogs.com/wakerwang/p/9478516.html
Copyright © 2011-2022 走看看