[学习笔记]
/*没有下面的话, 会报一个错误,java.lang.IllegalArgumentException: System memory 259522560 must be at least 4.718592E8(470M). Please use a larger heap size.这是memory不够,导致无法启动SparkContext*/
conf.set("spark.testing.memory", "2000000000");
JavaSparkContext sc = new JavaSparkContext(conf);
/*下面的这种倒入的方法也行*/
// JavaRDD<String> text = sc.textFile("hdfs://localhost:9000/README.txt");
/*原文件是:o1abc 45
o1abc 77
o1abc o1abc */
JavaRDD<String> text = sc.textFile("E://temp//input//friend.txt");
List<String> strList = text.collect();
/*输出str:o1abc 45
str:o1abc 77
str:o1abc o1abc*/
for (String str : strList) {
System.out.println("str:" + str);
}
/*Interface FlatMapFunction<T,R>, Iterable<R> call(T t)(注意之后的版本,返回值有所变化。)*/
JavaRDD<String> words = text.flatMap(new FlatMapFunction<String, String>() {
/*List的super Interface 是java.lang.Iterable*/
public Iterable<String> call(String line) throws Exception {
System.out.println("flatMap once, line is "+line );
String[] wordsArray=line.split(" ");
List<String> wordsList=Arrays.asList(wordsArray);
return wordsList;
}
});
List<String> wordsList = words.collect();
/*输出
flatMap once, line is o1abc 45
flatMap once, line is o1abc 77
flatMap once, line is o1abc o1abc
word:o1abc
word:45
word:o1abc
word:77
word:o1abc
word:o1abc*/
for (String word : wordsList) {
System.out.println("word:" + word);
}
/* http://spark.apache.org/docs/latest/
Interface PairFunction<T,K,V>
A function that returns key-value pairs (Tuple2<K, V>), and can be used to construct PairRDDs.
scala.Tuple2<K,V> call(T t)
*/
/*
flatMap once, line is o1abc 45(这句说明前面语句再次被执行)
in tuple2 word: o1abc
文章转载原文:https://blog.csdn.net/qq_44596980/article/details/93385009