zoukankan      html  css  js  c++  java
  • Spark first example

    this code will count the number of words in a text file.

    package geo1.op1;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import scala.Tuple2;
    
    import java.util.Arrays;
    
    public class WordCount {
        private static final FlatMapFunction<String, String> WORDS_EXTRACTOR = new FlatMapFunction<String, String>() {
            // @Override
            public Iterable<String> call(String s) throws Exception {
                return Arrays.asList(s.split(" "));
            }
        };
    
        private static final PairFunction<String, String, Integer> WORDS_MAPPER = new PairFunction<String, String, Integer>() {
            // @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                return new Tuple2<String, Integer>(s, 1);
            }
        };
    
        private static final Function2<Integer, Integer, Integer> WORDS_REDUCER = new Function2<Integer, Integer, Integer>() {
            // @Override
            public Integer call(Integer a, Integer b) throws Exception {
                return a + b;
            }
        };
    
        public static void main(String[] args) {
    //        if (args.length < 1) {
    //            System.err
    //                    .println("Please provide the input file full path as argument");
    //            System.exit(0);
    //        }
    
            String inputPath="/home/steve/Documents/words.txt";
            String outputFolder="/home/steve/Documents/ans";
            SparkConf conf = new SparkConf().setAppName(
                    "org.sparkexample.WordCount").setMaster("local");
            JavaSparkContext context = new JavaSparkContext(conf);
    
            JavaRDD<String> file = context.textFile(inputPath);
            JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
            JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
            JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
    
            counter.saveAsTextFile(outputFolder);
        }
    }

    pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>geo1</groupId>
      <artifactId>op1</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>op1</name>
      <url>http://maven.apache.org</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.10</artifactId>
            <version>1.2.0</version>
        </dependency>
      </dependencies>
    </project>
  • 相关阅读:
    CSS3 transform 属性(2D,3D旋转)
    django中视图函数中装饰器
    第55课 经典问题解析四
    第52课 C++中的抽象类和接口
    机器学习中的数学(5)-强大的矩阵奇异值分解(SVD)及其应用
    对称矩阵、Hermite矩阵、正交矩阵、酉矩阵、奇异矩阵、正规矩阵、幂等矩阵、合同矩阵、正定矩阵
    机器学习算法之决策树
    python读写csv时中文乱码问题解决办法
    shiro工作过程
    Nginx在windows上安装 及 Nginx的配置及优化
  • 原文地址:https://www.cnblogs.com/phoenix13suns/p/4285752.html
Copyright © 2011-2022 走看看