zoukankan      html  css  js  c++  java
  • Spark first example

    this code will count the number of words in a text file.

    package geo1.op1;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import scala.Tuple2;
    
    import java.util.Arrays;
    
    public class WordCount {
        private static final FlatMapFunction<String, String> WORDS_EXTRACTOR = new FlatMapFunction<String, String>() {
            // @Override
            public Iterable<String> call(String s) throws Exception {
                return Arrays.asList(s.split(" "));
            }
        };
    
        private static final PairFunction<String, String, Integer> WORDS_MAPPER = new PairFunction<String, String, Integer>() {
            // @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                return new Tuple2<String, Integer>(s, 1);
            }
        };
    
        private static final Function2<Integer, Integer, Integer> WORDS_REDUCER = new Function2<Integer, Integer, Integer>() {
            // @Override
            public Integer call(Integer a, Integer b) throws Exception {
                return a + b;
            }
        };
    
        public static void main(String[] args) {
    //        if (args.length < 1) {
    //            System.err
    //                    .println("Please provide the input file full path as argument");
    //            System.exit(0);
    //        }
    
            String inputPath="/home/steve/Documents/words.txt";
            String outputFolder="/home/steve/Documents/ans";
            SparkConf conf = new SparkConf().setAppName(
                    "org.sparkexample.WordCount").setMaster("local");
            JavaSparkContext context = new JavaSparkContext(conf);
    
            JavaRDD<String> file = context.textFile(inputPath);
            JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
            JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
            JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
    
            counter.saveAsTextFile(outputFolder);
        }
    }

    pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>geo1</groupId>
      <artifactId>op1</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>op1</name>
      <url>http://maven.apache.org</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.10</artifactId>
            <version>1.2.0</version>
        </dependency>
      </dependencies>
    </project>
  • 相关阅读:
    ubuntu下安装oracle
    网站框架策划时的小技巧--页面原型篇
    中国电商价格欺诈何时休?
    系统升级日记(4):如何快速的修改Infopath中的各种URL
    系统升级日记(3)- 升级SharePoint解决方案和Infopath
    系统升级日记(2)- 升级到SharePoint Server 2013
    系统升级日记(1)- 升级到SQL Server 2012
    【译】《C# Tips -- Write Better C#》
    [.NET] 一步步打造一个简单的 MVC 电商网站
    反骨仔的 2016 年度全文目录索引
  • 原文地址:https://www.cnblogs.com/phoenix13suns/p/4285752.html
Copyright © 2011-2022 走看看