zoukankan      html  css  js  c++  java
  • 文本统计器(Java)

    1. 创建一个类,实现统计文本文件中各类字符和字符串的个数的功能,要求实现:

    a) 按字符统计,输出各个字符的数量

    b) 按单词统计,输出各个单词的数量

    2. 在b)的基础上实现一个类keywordIdentifier,读入一个Java程序源文件,输出各个关键字的个数(注释中出现的关键字不计入关键字个数)

    思考:统计1:如果文本文件只包含26个英文字母并且用空格分离,那么只需要使用数组就可以对字符计数,用空格分离得到字符串可以对字符串计数(是否区分大小写问题)。如果文本文件是英文,但是包含各种标点及其他符号,则使用集合key-value的形式来对字符计数,按字符依次读取,然后将26个字母以外的所有字符作为空格处理,再使用空格将其分离,最后对字符串统计计数。

    关键字统计:需要所有关键字集合,对每行判断是否为注释,再对非注释进行空格分离,对关键字计数(采用key-value形式)

    1.统计字符和单词 StringCounter.java

    import java.util.Map;
    import java.util.TreeMap;
    
    public class StringCounter {
        
        private Map<String, Integer> charCount = new TreeMap<String, Integer>();
        
        private Map<String, Integer> wordCount = new TreeMap<String, Integer>();
        
        public void charCounter(String s) {
            Integer freq;
            for(int i = 0; i < s.length(); i++) {
                freq = charCount.get(s.substring(i, i+1));
                if(freq == null) {
                    freq = new Integer(1);
                }
                else {
                    freq = new Integer(freq.intValue() + 1);
                }
                charCount.put(s.substring(i, i+1), freq);
            }
        }
        
        public void wordCounter(String s) {
            //将除过a-zA-Z0-9的其他符号转为空格,再将多个空格转为一个空格
            String s1 = s.replaceAll("\W", " ");
            String s2 = s1.replaceAll(" +", " ");
            String[] s3 = s2.split(" ");
            Integer freq;
            for(String str : s3) {
                freq = wordCount.get(str);
                if(freq == null) {
                    freq = new Integer(1);
                }
                else {
                    freq = new Integer(freq.intValue() + 1);
                }
                wordCount.put(str, freq);
            }
        }
        
        public void output() {
            System.out.println(charCount);
            System.out.println(wordCount);
        }
    }

    2.测试 使用的文本文件为随意的英文文本

    import java.io.IOException;
    import java.io.RandomAccessFile;
    
    public class CountTest {
    
        public static void main(String[] args) throws IOException {
            StringCounter sc = new StringCounter();
            
            long filePoint = 0;
            String s;
            RandomAccessFile file = new RandomAccessFile("string.txt", "r");
            long fileLength = file.length();
            while(filePoint < fileLength) {
                s = file.readLine();
                //处理及计数
                sc.charCounter(s);
                sc.wordCounter(s);
                filePoint = file.getFilePointer();
            }
            file.close();
            
            sc.output();
        }
    
    }

    3.关键词统计

      先将所有关键字放入map中,以关键字为键,次数0为值,再读文本统计个数

    import java.io.IOException;
    import java.io.RandomAccessFile;
    import java.util.Map;
    import java.util.TreeMap;
    
    public class keywordIdentifier {
    
        private Map<String, Integer> keyWordCount = new TreeMap<String, Integer>();
        
        public void keyWord() throws IOException {
            long filePoint = 0;
            String s;
            RandomAccessFile file = new RandomAccessFile("keyword.txt", "r");
            long fileLength = file.length();
            while(filePoint < fileLength) {
                s = file.readLine();
                
                String[] s1 = s.split(" ");
                Integer freq = new Integer(0);
                for(String word : s1) {
                    keyWordCount.put(word, freq);
                }
                
                filePoint = file.getFilePointer();
            }
            file.close();
        }
        
        public void keyWordCounter(String s){
            int pos = s.indexOf("//");
            if(pos == -1) {
                pos = s.length();
            }
            String sub = s.substring(0, pos);
         String sub1 = sub.replaceAll("\W", " "); String str
    = sub1.replaceAll(" +", " "); String[] s1 = str.split(" "); Integer freq; for(String word : s1) { freq = keyWordCount.get(word); if(freq != null) { freq = new Integer(freq.intValue() + 1); keyWordCount.put(word, freq); } } } public void output() { System.out.println(keyWordCount); } }

    4.测试关键词统计

    import java.io.*;
    
    public class CountTest {
    
        public static void main(String[] args) throws IOException {
            keywordIdentifier sc = new keywordIdentifier();
            sc.keyWord();
            long filePoint = 0;
            String s;
            RandomAccessFile file = new RandomAccessFile("CountTest.java", "r");
            long fileLength = file.length();
            while(filePoint < fileLength) {
                s = file.readLine();
                sc.keyWordCounter(s);
                filePoint = file.getFilePointer();
            }
            file.close();
            
            sc.output();
        }
    
    }

    输出:

    {abstract=0, assert=0, boolean=0, break=0, byte=0, case=0, catch=0, char=0, class="1", const=0, continue=0, default=0, do=0, double=0, else=0, enum=0, extends=0, final=0, finally=0, float=0, for=0, goto=0, if=0, implements=0, import=1, instanceof=0, int=0, interface=0, long=2, native=0, new=2, package=1, private=0, protected=0, public=2, return=0, short=0, static=1, strictfp=0, super=0, switch=0, synchronized=0, this=0, throw=0, throws=1, transient=0, try=0, void=1, volatile=0, while=1}

  • 相关阅读:
    mysql存储过程基本函数
    Java多线程程序设计详细解析
    手把手教你写Undo、Redo程序
    mysql存储过程学习总结-操作符
    深入解析ATL第二版(ATL8.0)笔记--(2.3节)
    mysql 5.0存储过程学习总结
    php判断浏览器和语言
    Windows7系统环境安装配置PHP开发环境
    Nginx环境下Php安装
    php学习
  • 原文地址:https://www.cnblogs.com/datamining-bio/p/9532925.html
Copyright © 2011-2022 走看看