zoukankan      html  css  js  c++  java
  • Java8新特性——lambda表达式.(案例:词频统计)

    需求:读入一个文本文件,确定所有单词的使用频率并从高到低排序,打印出所有单词及其频率的排序列表

    先用传统方法解:

     1 package cn._1.wordfrequency;
     2 
     3 import java.util.HashSet;
     4 import java.util.Map;
     5 import java.util.Set;
     6 import java.util.TreeMap;
     7 import java.util.regex.Matcher;
     8 import java.util.regex.Pattern;
     9 
    10 /*
    11  * Functional Thinking by Neal Ford(O'Reilly).
    12  */
    13 public class Word {
    14 
    15     @SuppressWarnings("serial")
    16     //统计除了以下单词的其他单词的使用频率
    17     private Set<String> NON_WORDS = new HashSet<String>() {{
    18         //匿名内部类+初始化块的初始化方式
    19             add("the");add("and");add("of");add("to");add("a");
    20             add("i");add("it");add("in");add("or");add("is");
    21             add("as");add("so");add("but");add("be");
    22         }};
    23     public Map<String, Integer> wordFreq(String words) {
    24         TreeMap<String,Integer> wordMap = new TreeMap<>();
    25         Matcher m = Pattern.compile("\w+").matcher(words);
    26         while(m.find()){
    27             String word = m.group().toLowerCase();
    28             if (!NON_WORDS.contains(word)) {
    29                 if (wordMap.get(word) == null) {
    30                     wordMap.put(word, 1);
    31                 }else {
    32                     wordMap.put(word, wordMap.get(word)+1);
    33                 }
    34             }
    35         }
    36         return wordMap;
    37     }
    38 }

    再使用Java8的新特性解:

     1 package cn._1.wordfrequency;
     2 
     3 import java.util.ArrayList;
     4 import java.util.HashSet;
     5 import java.util.List;
     6 import java.util.Map;
     7 import java.util.Set;
     8 import java.util.TreeMap;
     9 import java.util.regex.Matcher;
    10 import java.util.regex.Pattern;
    11 
    12 /*
    13  * Functional Thinking by Neal Ford(O'Reilly).
    14  */
    15 public class Word2 {
    16     @SuppressWarnings("serial")
    17     private Set<String> NON_WORDS = new HashSet<String>() {{
    18         //匿名内部类+初始化块的初始化方式
    19             add("the");add("and");add("of");add("to");add("a");
    20             add("i");add("it");add("in");add("or");add("is");
    21             add("as");add("so");add("but");add("be");
    22         }};
    23     /*
    24      * 使用正则表达式获得包含所有单词的List
    25      */
    26     private List<String> regexToList(String words,String regex){
    27         List<String> wordList = new ArrayList<>();
    28         Matcher m = Pattern.compile(regex).matcher(words);
    29         while(m.find())
    30             wordList.add(m.group());
    31         return wordList;
    32     }
    33     public Map<String, Integer> wordFreq(String words){
    34         TreeMap<String, Integer> wordMap = new TreeMap<>();//使用TreeMap是为了使输出结果自然排序
    35         /*
    36          * java.util.stream.Stream:A sequence of elements supporting sequential and parallel aggregate operations.
    37          * map:Returns a stream consisting of the results of applying the given function to the elements of this stream.
    38          * filter:Returns a stream consisting of the elements of this stream that match the given predicate.
    39          * forEach:Performs an action for each element of this stream.
    40          */
    41         regexToList(words, "\w+").stream()//将collection对象变为stream
    42             .map(w -> w.toLowerCase())//返回一个经过小写处理的stream
    43             .filter(w -> !NON_WORDS.contains(w))//过滤,使流中的元素都是NON_WORDS集合中不包含的元素
    44             .forEach(w -> wordMap.put(w, wordMap.getOrDefault(w, 0)+1));//遍历执行操作
    45         return wordMap;
    46     }
    47 }

    测试类:

     1 package cn._1.wordfrequency;
     2 
     3 import java.io.FileInputStream;
     4 import java.io.IOException;
     5 import java.util.ArrayList;
     6 import java.util.Collections;
     7 import java.util.Comparator;
     8 import java.util.List;
     9 import java.util.Map;
    10 import java.util.Map.Entry;
    11 
    12 public class Mmain {
    13 
    14     public static void main(String[] args) throws IOException {
    15         String str = readText("/home/yanshaochen/workspace/Functional_Thinking_Examples/mflie/sucai.txt");
    16         //调用老方法
    17         /*Map<String, Integer> map = new Word().wordFreq(str);*/
    18         //调用新方法:
    19         Map<String, Integer> map = new Word2().wordFreq(str);
    20         //自然排序:
    21         for (Entry<String, Integer> item : map.entrySet()) {
    22             System.out.println(item.getKey()+","+item.getValue());
    23         }
    24         //按照value进行排序(摘自网络):
    25         /*List<Map.Entry<String, Integer>> infoIds = new ArrayList<>(map.entrySet());
    26         Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
    27             public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {      
    28                 return (o2.getValue() - o1.getValue()); 
    29                 //return (o1.getKey()).toString().compareTo(o2.getKey());
    30                 }
    31             }); 
    32         for (Entry<String, Integer> item : infoIds) {
    33             System.out.println(item.getKey()+","+item.getValue());
    34         }*/
    35     }
    36 
    37     /*
    38      * IO流
    39      */
    40     private static String readText(String path) throws IOException {
    41         FileInputStream fis = new FileInputStream(path);
    42         byte[] bytes = new byte[1024];
    43         int data;
    44         String str ="";
    45         while((data = fis.read(bytes))!=-1){
    46             str += new String(bytes, 0, data);
    47         }
    48         fis.close();
    49         return str;
    50     }
    51 }
  • 相关阅读:
    诊断Oracle 服从成绩
    联机热备份失踪败后,怎样翻开数据库?
    Oracle 8.0.4 for Windows NT的装配
    Oracle常用数据字典
    怎样快速查出Oracle数据库中的锁等待
    Oracle不凡包
    Developer/2000 R2.1 中文版 在 Windows NT 上的安置
    Oracle中巧用FORMS_DDL
    Oracle 基本常识
    autorun的执行的命令行
  • 原文地址:https://www.cnblogs.com/tomasman/p/7067876.html
Copyright © 2011-2022 走看看