zoukankan      html  css  js  c++  java
  • scala Wordcount

    
    
    package my.bigdata.scala08

    import scala.collection.mutable
    import scala.collection.mutable.ArrayBuffer
    import scala.io.Source


    /** scala word count
    * Created by lq on 2017/8/7.
    */
    object Task2 {

    /**
    * basic 核心是外部变量 + map.getOrElse
    */
    def scalaWC0(): Unit ={
    val in = new java.util.Scanner(new java.io.File("myfile.txt"))
    //var tt = null;
    val words = new ArrayBuffer[String]
    while(in.hasNext())
    {
    words ++= in.next().split("\s+")
    }
    var map = Map[String,Int]()
    for(key <- words) {
    map +=(key -> (map.getOrElse(key, 0)+1))
    println((key -> (map.getOrElse(key, 0)+1)))
    //
    }
    print(map)
    }

    /**
    * basic 核心是外部变量 + map.getOrElse
    */
    def scalaWC1(): Unit ={
    val lines = Source.fromFile("myfile.txt").getLines();
    val resMap = new mutable.HashMap[String,Long]()
    //lines.map(line=>{line.split("\s+")}).
    for(line <- lines){
    val fields = line.split("\s+")
    for(f <- fields){
    val v = resMap.getOrElse(f,0L);
    resMap.put(f,v+1L)
    }
    }
    resMap.foreach(println(_))
    }

    /**
    * 利用遍历+外部变量map实现
    */
    def scalaWC5(): Unit = {
    val lines = Source.fromFile("myfile.txt").getLines();
    val resMap = new mutable.HashMap[String, Long]()
    val res = lines.flatMap(_.split("\s+")).map(t=>{resMap+=((t,resMap.getOrElse(t,0L)+1L))})
    println(resMap)
    }



    /**
    * 利用遍历+外部变量map实现
    */
    def scalaWC4(): Unit = {
    val lines = Source.fromFile("myfile.txt").getLines();
    val resMap = new mutable.HashMap[String, Long]()
    val res = lines.flatMap(line => {
    line.split("\s+")}).foldLeft(resMap)((x,y:String)=>{
    resMap+=((y,resMap.getOrElse(y,0L)+1L))
    })
    println(resMap)
    }


    /**
    * advance 没有用到外部变量, foldLeft 传入一个map,然后传入一个偏函数,偏函数结合遍历的数据处理map返回一个map,最后整个函数返回一个map
    */
    def scalaWC41(): Unit = {
    val lines = Source.fromFile("myfile.txt").getLines();
    val res = lines.flatMap(_.split("\s+")).foldLeft(mutable.Map[String,Long]())((m,y:String)=>{
    m += ((y,m.getOrElse(y,0L)+1L))
    })
    println(res)
    }

    /**
    * advance 没有用到外部变量, foldLeft的遍历所有的功能
    */
    def scalaWC2(): Unit ={
    val lines = Source.fromFile("myfile.txt").getLines();
    //val resMap = new mutable.HashMap[String,Long]()
    val res = lines.map(line=>{line.split("\s+").toBuffer})
    .toList.flatMap(x=>x).map((_,1)).groupBy(_._1)
    .map(x=>(x._1,x._2.foldLeft(0)((sum,t) =>{sum + t._2})))
    println(res)
    }

    /**
    * advance 没有用到外部变量, reduceLeft的遍历所有的功能
    */
    def scalaWC3(): Unit ={
    val lines = Source.fromFile("myfile.txt").getLines();
    val res = lines.map(line=>{line.split("\s+").toBuffer})
    .toList.flatMap(x=>x).map((_,1)).groupBy(_._1)
    .map(x=>(x._2.reduceLeft((x,y)=>{
    (x._1,x._2+y._2)
    })))
    //出现
    println(res)
    }


    /**
    * 主函数
    *
    * @param args
    */
    def main(args: Array[String]): Unit = {
    // arrFun2()
    // printJavaProp()
    //customMap()
    scalaWC41()
    }
    /*
    总结:wc的实现思路
    1.循环+外部map变量+map特性
    2.利用集合的方法,通过各种变换,的到结果
    3.集合具有遍历的方法有map,filter,foreach,reduceLeft,foldLeft,这些加上外部map变量+map特性都能实现WordCount

    */
    }
     
    
    
  • 相关阅读:
    sqlserver中判断表或临时表是否存在
    Delphi 简单方法搜索定位TreeView项
    hdu 2010 水仙花数
    hdu 1061 Rightmost Digit
    hdu 2041 超级楼梯
    hdu 2012 素数判定
    hdu 1425 sort
    hdu 1071 The area
    hdu 1005 Number Sequence
    hdu 1021 Fibonacci Again
  • 原文地址:https://www.cnblogs.com/rocky-AGE-24/p/7301847.html
Copyright © 2011-2022 走看看