package sanjin;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.util.Arrays;
public class JavaWordCount {
public static void main(String[] args) {
String inputPath =args[0];
String outputPath = args[1];
SparkSession sparkSession = SparkSession.builder()
.appName(Thread.currentThread().getStackTrace()[1].getClassName())
.getOrCreate();
JavaSparkContext sc = new JavaSparkContext(sparkSession.sparkContext());
JavaPairRDD<String,String> textFiles = sc.wholeTextFiles(inputPath);
JavaPairRDD<String,Integer> counts = textFiles.flatMap(s -> Arrays.asList(s._2.split("\s+")).iterator())
.mapToPair(word -> new Tuple2<String, Integer>(word, 1))
.reduceByKey((x, y) -> x+y);
counts.saveAsTextFile(outputPath);
sparkSession.stop();
}
}