单词统计
hello nihao
shi jie
shijie
hello jie
en en en
hao hao
hao en
scala> val lst = io.Source.fromFile("e:/1.txt").getLines.toList.flatMap(_.split(" +")).groupBy(s=>s).mapValues(_.length) lst: scala.collection.immutable.Map[String,Int] = Map(shi -> 1, en -> 4, jie -> 2, shijie -> 1, hao -> 3, hello -> 2, nihao -> 1) scala> lst.foreach(println) (shi,1) (en,4) (jie,2) (shijie,1) (hao,3) (hello,2) (nihao,1)
分解
//读取文件
scala> val it1 = io.Source.fromFile("e:/1.txt") it1: scala.io.BufferedSource = non-empty iterator
//获取所有行的迭代器
scala> val it2 = it1.getLines it2: Iterator[String] = non-empty iterator
//将其转为列表
scala> val lst1 = it2.toList lst1: List[String] = List(hello nihao, shi jie, shijie, hello jie, "en en en ", hao hao, hao en)
//压平,生成字符串类型的列表,而不是数组类型的列表
scala> val lst2 = lst1.flatMap(_.split(" +")) lst2: List[String] = List(hello, nihao, shi, jie, shijie, hello, jie, en, en, en, hao, hao, hao, en)
//按元素进行分组
scala> val map1 = lst2.groupBy(s=>s) map1: scala.collection.immutable.Map[String,List[String]] =
Map(shi -> List(shi),
en -> List(en, en, en, en),
jie -> List(jie, jie),
shijie -> List(shijie),
hao -> List(hao, hao, hao),
hello -> List(hello, hello),
nihao -> List(nihao))
//对 v 进行转换
scala> val map2 = map1.mapValues(_.length) map2: scala.collection.immutable.Map[String,Int] = Map(shi -> 1, en -> 4, jie -> 2, shijie -> 1, hao -> 3, hello -> 2, nihao -> 1)
最高气温
1971 42 1921 41 1902 -20 1993 19 1938 -12 1958 -10 1902 300 1918 45 1951 -22 1936 44 1955 -33 1995 -18
........
val lst1 = io.Source.fromFile("e:/t.txt").getLines.toList
val lst2 = lst1.map(s=>(s.split(" ")(0).toInt,s.split(" ")(1).toInt))
val lst3 = lst2.groupBy(_._1).mapValues(_.map(_._2).max).toList.sortBy(_._1)
求最大值、最小值、平均值
lst2.groupBy(_._1).mapValues(l=>((l.map(_._2).max),l.map(_._2).min,l.map(_._2).sum*1.0/l.size)).toList.sortBy(_._1).foreach(println)
年份升序、温度降序
lst2.sortWith((a,b) => {if(a._1 == b._1)a._2 > b._2 else a._1 < b._1}).foreach(println)
商家评价标签
import java.util.regex.Pattern import scala.collection.mutable import scala.collection.JavaConverters._ /* 抽取用户对商家的评价 要求: 全局按照每个商家的最大评价的数量降序排列 按照每种评价的个数,对评价列表降序排序 */ object 作业 { def main(args: Array[String]): Unit = { // mm1() mm2() } //方式1、使用正则匹配,得到评价字符串 def mm1() { val it1 = scala.io.Source.fromFile("e:/temptags.txt", "utf8").getLines() val it2 = it1.map(s => { val sp = s.split(" ") val p = Pattern.compile(".*"extInfoList":\[\{"title":"contentTags","values":\[(.+?)\]") val m = p.matcher(sp(1)) (sp(0), if (m.find()) m.group(1) else "") //没有评价则置为空串 }) val it3 = it2.filter(_._2 != "") //去除没有评价的组员 val map1 = it3.toList.groupBy(_._1) //按商家id分组 val map2 = map1.mapValues(l=>{ val lst1 = l.flatMap(_._2.split(",")) //切分评价串,并炸开为单个评价 val map3 = lst1.groupBy(s=>s) //按单个评价分组 val map4 = map3.map(t=>(t._1,t._2.size)) // 构造元组,( 评价,评价个数 ) val lst2 = map4.toList val lst3 = lst2.sortBy(-_._2) //按照评价个数倒排 lst3.take(5) //取出前5个 }) val lst33 = map2.toList.sortBy(-_._2(0)._2) //总倒排 lst33.foreach(println) } //方式2、解析json,得到评价字符串 def mm2() { val it1 = scala.io.Source.fromFile("E:\studynode\徐\文件\temptags.txt", "utf8").getLines() val it2 = it1.map(s => { val sp = s.split(" ") val lst = JSONUtil.parseJson(sp(1)) (sp(0), lst.asScala.mkString(",")) //没有评价则置为空串 }) val it3 = it2.filter(_._2 != "") //去除没有评价的组员 val map1 = it3.toList.groupBy(_._1) //按商家id分组 val map2 = map1.mapValues(l=>{ val lst1 = l.flatMap(_._2.split(",")) //切分评价串,并炸开为单个评价 val map3 = lst1.groupBy(s=>s) //按单个评价分组 val map4 = map3.map(t=>(t._1,t._2.size)) // 构造元组,( 评价,评价个数 ) val lst2 = map4.toList val lst3 = lst2.sortBy(-_._2) //按照评价个数倒排 lst3.take(5) //取出前5个 }) val lst33 = map2.toList.sortBy(-_._2(0)._2) //总倒排 lst33.foreach(println) } }
//方式2、pom中引入依赖
<dependencies> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency> </dependencies>
//方式2、解析json工具类 import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import java.util.ArrayList; import java.util.List; public class JSONUtil { private JSONUtil(){} public static List<String> parseJson(String line) { List<String> list = new ArrayList<String>(); JSONObject jsonObject = JSON.parseObject(line); JSONArray extInfoList = jsonObject.getJSONArray("extInfoList"); if(extInfoList != null && extInfoList.size() != 0){ for (Object o : extInfoList) { JSONObject jo = (JSONObject)o; if(jo.get("title").equals("contentTags")){ JSONArray values = jo.getJSONArray("values"); for (Object value : values) { list.add(value.toString()); } } } } return list; } }