zoukankan      html  css  js  c++  java
  • 统计

    用户需求:英语的26 个字母的频率在一本小说中是如何分布的?某类型文章中常出现的单词是什么?某作家最常用的词汇是什么?《哈利波特》 中最常用的短语是什么,等等。

    要求:输出单个文件中的前 N 个最常出现的英语单词,并将结果输入到文本文件中

    package wordcont;
    
    
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    import java.util.StringTokenizer;
    import java.util.TreeMap;
    import java.util.TreeSet;
    import wordcont.WordEntity;
     
    public class WordCont {
     
        public void displayWordCount(String fileName){
            //字符统计
            try {
                BufferedReader reader = new BufferedReader(new FileReader(fileName));
                String line = null;
                TreeMap<String,Integer> tm = new TreeMap<String,Integer>();
                
                while((line=reader.readLine())!=null){
                    line = line.toLowerCase();
                    String str[] = line.split("\\s+");
                    for(int i = 0; i<str.length; i++){
                        String word = str[i].trim();
                        if(tm.containsKey(word)){
                            tm.put(word, tm.get(word)+1);
                        }else{
                            tm.put(word, 1);
                        }
                    }
                }
                //输出我们想要的字符串格式
                System.out.println("按字典序输出为:");
                Iterator iterator=tm.entrySet().iterator();
                while(iterator.hasNext())
                {
                    System.out.println(iterator.next());
                }
                
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }catch (IOException e) {
                e.printStackTrace();
            }
        }
        
        public void displayFrequencyWord(String fileName){
            //显示输出
            try {
                BufferedReader br = new BufferedReader(new FileReader(fileName));
                String s;
                StringBuffer sb = new StringBuffer();
                while ((s = br.readLine()) != null) {
                    sb.append(s);
                }
                
                Map<String,Integer> map = new HashMap<String, Integer>();
                StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
                while (st.hasMoreTokens()) {
                    String letter = st.nextToken().trim();
                    int count;
                    if (!map.containsKey(letter)) {
                        count = 1;
                    } else {
                        count = map.get(letter).intValue() + 1;
                    }
                    map.put(letter,count);
                }
                
                Set<WordEntity> set = new TreeSet<WordEntity>();
                for (String key : map.keySet()) {
                    set.add(new WordEntity(key,map.get(key)));
                }
     
                System.out.println("出现频率最高的单词:");
                Iterator<WordEntity> it1 = set.iterator();
                int count=it1.next().getCount();
                for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
                    WordEntity w = it.next();
                    
                    if (w.getCount()==count)// 当输出3个后跳出循环
                        //break;
                    
                    System.out.println(w.getKey() + " 出现的次数为: "+ w.getCount());
                    
                }
            } catch (FileNotFoundException e) {
                System.out.println("文件未找到~!");
            } catch (IOException e) {
                System.out.println("文件读异常~!");
            }
     
        }
     
    }
    
    package wordcont;
     
    import wordcont.WordEntity;
     
    public class WordEntity implements Comparable<WordEntity>{
        @Override
        public int compareTo(WordEntity o) {
            int cmp = count.intValue() - o.count.intValue();
            return (cmp == 0 ? key.compareTo(o.key) : -cmp);
            //只需在这儿加一个负号就可以决定是升序还是降序排列  -cmp降序排列,cmp升序排列
            //因为TreeSet会调用WorkForMap的compareTo方法来决定自己的排序
        }
     
        private String key;
        private Integer count;
     
        public WordEntity ( String key,Integer count) {
            this.key = key;
            this.count = count;
        }
     
        public WordEntity(){
     
        }
     
        @Override
        public String toString() {
            return key + " 出现的次数为:" + count;
        }
     
        public String getKey() {
            return key;
        }
     
        public Integer getCount() {
            return count;
        }
    }
    package wordcont;
     
    import java.util.Scanner;
     
    import wordcont.WordCont;
     
    public class Main {
     
        /**
         * @param args
         */
        public static void main(String[] args) {
            System.out.println("输入文件路径:\n");            
            Scanner in=new Scanner(System.in);
            String line=in.nextLine();
            String fileName= line.trim();
            WordCont wc = new WordCont();
            wc.displayWordCount(fileName);
            wc.displayFrequencyWord(fileName);
        }
     
    }
  • 相关阅读:
    function与感叹号
    js中的|| 与 &&
    [转] html屏蔽右键、禁止复制
    ExtJS 5.1 WINDOW BLUR
    ExtJS 网页版执行工具
    Excel 随即获得一组数据集中的数据
    [转] Spring Data JPA Tutorial: Pagination
    Set up eclipse for Ext js and spket IDE plugin
    ExtJS Alias, xtype and widget
    ExtJS stores
  • 原文地址:https://www.cnblogs.com/zhangzhongkun/p/9787924.html
Copyright © 2011-2022 走看看