作业:用scala统计一个文件夹下面所有文件的单词出现的总次数
package scala
object WordCounter {
//导入jar包
import scala.io.Source
import java.io._
//存储单词和个数
var map = Map.empty[String, Int]
def main(args: Array[String]): Unit = {
scanDir(new File("D:/workspace"))
map.foreach(f =>
println(f)
)
}
def scanDir(dir: File): Unit = {
dir.listFiles.foreach { file =>
if(file.isFile()){
readFile(file)
println(file)
}
}
}
def readFile(file: File){
val f = Source.fromFile(file)
for (line <- f.getLines()){
count(line)
}
}
def count(line: String) = {
for(word <- line.split("[,:.!\s?*\/-=+]()><")){
if(map.contains(word))
map += (word -> (map(word)+1))
else
map += (word -> 1)
}
}
}
DT大数据梦工厂大数据门徒3000第三讲 课堂笔记
scala函数式编程(简单易读)重中之重
spark当中的计算都是用scala函数式编程来做
高级函数:函数里面的参数是函数(与Java最大的区别之一)
spark基于集合
函数/变量同是一等公民
定义函数 给变量以函数赋值:
def fun1(name: String){println(name)}
var fun1_v = fun1 _//加空格加下划线
fun1("Spark")
fun1_v("Scala")
//匿名函数:
定义名字 参数 函数体
val fun2 = (content: String) => println(content)
fun2("Hadoop")
高级函数:
val hiScala = (content: String) => println(content)
def bigData(func: (String) => Unit, content: String){func(content)}
bigData(hiScala, "Spark")
val array = Array(1,2,3,4,5,6,7,8,9)
array.map(item => 2 * item)
array.map(item => println(item))
函数的返回值是个函数
def func_Return(content: String) = (message: String) =>println(message)
func_Return("Spark")
def func_Return(content: String) = (message: String) =>println(content+" "+message)
val returned = func_Return("Spark")
高级函数具有类型推断的功能
def spark(func: (String) => Unit, name: String){func(name)}
spark((name) => println(name), "Scala")
spark(name => println(name), "Scala")
spark(name => println, "Scala")
array.map(2*_)
array.map(2*_).foreach(println(_))
array.map(2*_).foreach(println)
array.map(2*_).foreach(println _)
array.map(2*_).filter(_ >10).foreach(println)
闭包就是当变量超出函数范围之外还能够访问
def scala(content: String) = (message: String) => println(content + ": " + message)
val funcResult = scala("Spark")
funcResult("Flink")
//柯里化函数
def sum(x: Int, y: Int) = x + y
sum(1,2)
def sum_Curring(x: Int) = (y: Int) => x + y
sum_Curring(1)(2)
def sum_Curring_Better(x: Int) (y: Int) = x + y
sum_Curring_Better(1)(3)
(1 to 100).reduceLeft(_+_)//return 5050
val list = List("Scala", "Spark", "Flink")
val cal = list.map("The content is : " + _)
list.map(println)
cal.flatMap(_.split(" "))
cal.flatMap(_.split(" ")).foreach(println)
list.zip(List(10,6,5))