zoukankan      html  css  js  c++  java
  • scala实现wordcount方法-商品标签统计-气温统计

    scala实现单词统计
    ---------------------
        import scala.io.Source
    
        /**
          * Created by Administrator on 2018/5/7.
          */
        object WCApp {
            def main(args: Array[String]): Unit = {
                //1.加载文件
                val src = Source.fromFile("d:/mr/word.txt")
    
                //2.取得所有行
                val lines = src.getLines().toList
        //        for(line <- lines){
        //            println(line)
        //        }
    
                //3.压扁单词
                val words = lines.flatMap(_.split(" "))
    
                //标一成对
                val map1 = words.map((w:String) => (w, 1))
    
                //按照单词分组
                val map2 = map1.groupBy(t=>t._1)
                val map3 = map2.mapValues(list => list.size)
    
                lines.foreach(println)
            }
    
        }
    
    
    
    scala实现单词统计2
    ---------------------
        import scala.io.Source
    
        /**
          * Created by Administrator on 2018/5/7.
          */
        object WCApp2 {
            def main(args: Array[String]): Unit = {
                //1.加载文件
                val src = Source.fromFile("d:/mr/word.txt")
    
                //2.取得所有行
                val lines = src.getLines().toList
        //        for(line <- lines){
        //            println(line)
        //        }
    
                //3.压扁单词
                val words = lines.flatMap(_.split(" "))
    
                //标一成对
                val map1 = words.map((w:String) => (w, 1))
    
                //按照单词分组{hello->[(hello,1),(hello,1),(hello,1)]}
                val map2 = map1.groupBy(t=>t._1)
    
                //{hello->(hello,4) , ...}
                val map3 = map2.mapValues(list => {
        //            def op(a:Tuple2[String,Int] , b:Tuple2[String,Int]) = {
        //                val word = a._1
        //                val cnt = a._2 + b._2
        //                (word , cnt)
        //            }
        //            list.reduce(op _)
                    list.reduce((a,b)=>(a._1,a._2 + b._2 ))
                })
                //
                val map4 = map3.map((t:Tuple2[String,Tuple2[String,Int]])=>t._2)
                map4.foreach(println)
            }
    
        }
    
    
    
    
    Bitmap实现topn统计
    ------------------------
        import scala.io.Source
    
        /**
          * 气温的年度内topN查询,使用reduce实现
          */
        object TempTopN2_Bitmap {
            def main(args: Array[String]): Unit = {
                //1.加载气温文件
                val f = Source.fromFile("d:/mr/temp.dat")
    
                //2.取得所有行
                val temps = f.getLines().toList
    
                //3.提取每行的年度和气温,形成元组{(1900,28),....}
                val map1 = temps.map((line:String) => {
                    val arr = line.split(" ")
                    val year = arr(0).toInt
                    val temp = arr(1).toInt
                    (year, temp)
                })
    
                //4.按照年度分组{(1920->{(),(),(),...}),...}
                val map2 = map1.groupBy((t:Tuple2[Int,Int])=>t._1)
    
                //5.对每个key对应的value进行按照气温只top3聚合
                val map3 = map2.mapValues(list=>{
                    val bytes = list.foldLeft(new Array[Byte](128))((a,b)=>{
                        val temp = b._2
                        if(temp > 0){
                            val index = temp / 8
                            val mod = temp % 8
                            a(index) = (a(index) | (1 << mod)).toByte
                        }
                        a
                    })
    
                    //定义方法,处理bitmap
                    def process(): String ={
                        var count = 0;
                        var tempStr = "";
                        for (x <- (0 until bytes.length).reverse) {
                            val b = bytes(x)
                            for (y <- (0 to 7).reverse) {
                                if (((b >> y) & 1) != 0) {
                                    count += 1
                                    tempStr = tempStr + "," + (8 * x + y)
                                    if (count == 3) {
                                        return tempStr
                                    }
                                }
                            }
                        }
                        tempStr
                    }
                    process()
                })
    
                val map4 = map3.toList.sortBy(e=>e._1)
                map4.foreach(println(_))
            }
        }
    
    scala实现商品评论
    ---------------------
        1.TagUtil.java
            package com.oldboy.scala.util;
    
            import com.alibaba.fastjson.JSON;
            import com.alibaba.fastjson.JSONArray;
            import com.alibaba.fastjson.JSONObject;
    
            import java.util.ArrayList;
            import java.util.List;
    
            /**
             * 标签工具类
             */
            public class TagUtil {
                /**
                 * 从json数据中抽取出评论集合
                 */
                public static List<String> extractTags(String json){
                    //评论集合
                    List<String> tags = new ArrayList<String>() ;
    
                    //将文件解析成json对象
                    JSONObject obj = JSON.parseObject(json) ;
    
                    //得到数组
                    JSONArray array = obj.getJSONArray("extInfoList");
    
                    //判断数组有效性
                    if(array != null && array.size() > 0){
                        JSONObject obj2 = array.getJSONObject(0);
                        JSONArray arr2 = obj2.getJSONArray("values") ;
                        if(arr2 != null && arr2.size() > 0 ){
                            for(int i = 0 ; i < arr2.size() ; i ++){
                                tags.add(arr2.getString(i));
                            }
                        }
                    }
    
                    return tags ;
                }
            }
    
        2.TaggenDemo
            import javax.swing.text.html.HTML.Tag
    
            import com.oldboy.scala.util.TagUtil
    
            import scala.io.Source
    
            /**
              * 便签生成统计
              */
            object TaggenDemo {
    
                def main(args: Array[String]): Unit = {
                    //1.加载文件
                    val file = Source.fromFile("d:/mr/temptags.txt") ;
    
                    //2.提取所有行
                    val lines = file.getLines().toList
    
                    //3.压扁变换每行形成(busid,tag)
                    val map1 = lines.flatMap(line=>{
                        var list0:List[(String,String)] = Nil
                        var arr = line.split("	")
                        val busid = arr(0)
                        var json = arr(1)
                        import scala.collection.JavaConversions._
                        val list:List[String] = TagUtil.extractTags(json).toList ;
                        for(tag <- list){
                            list0 = (busid, tag) +: list0
                        }
                        list0
                    })
    
                    //4.对元组进行分组,{(busid,tag)->List((busid,tag),(busid,tag),...}
                    val map2 = map1.groupBy(t => t)
    
                    //5.统计每个key下List的size,{(busid,tag)->300}
                    val map3 = map2.mapValues(_.size)
    
                    //6.交换元素位置,List((busid , (tag,cnt)),...)
                    val map4 = map3.toList.map(t=>(t._1._1 , (t._1._2,t._2)))
    
                    //7.按照busid再次分组Map(busid->List((busid , (tag,cnt)),...))
                    val map5 = map4.groupBy(t=>t._1)
    
                    //8.对每个商家内的评论按照数量倒排序.Map(busid->List((busid,(tag,59)))
                    val map6 = map5.mapValues(list=>{
                        val list2 = list.sortBy(t=> -t._2._2).take(5)
                        val list3 = list2.map(t=>t._2)
                        list3
                    })
    
                    //9.对商家进行排序,按照商家的最大评论数倒排序
                    val map7 = map6.toList.sortBy(t=> -t._2(0)._2)
                    map7.foreach(t=>{
                        val busid = t._1
                        val str = t._2.mkString(";")
                        println(busid + "==>" + str)
                    })
                }
            }
  • 相关阅读:
    Druid 使用 Kafka 将数据载入到 Kafka
    Druid 使用 Kafka 数据加载教程——下载和启动 Kafka
    Druid 集群方式部署 —— 启动服务
    Druid 集群方式部署 —— 端口调整
    Druid 集群方式部署 —— 配置调整
    Druid 集群方式部署 —— 配置 Zookeeper 连接
    Druid 集群方式部署 —— 元数据和深度存储
    Druid 集群方式部署 —— 从独立服务器部署上合并到集群的硬件配置
    Druid 集群方式部署 —— 选择硬件
    Druid 独立服务器方式部署文档
  • 原文地址:https://www.cnblogs.com/zyde/p/9004770.html
Copyright © 2011-2022 走看看