zoukankan      html  css  js  c++  java
  • 文本内容统计

    1.一个用于统计文本文件中的英语单词出现频率的控制台程序

    package com.word;
    
    import java.io.BufferedReader;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    import java.text.NumberFormat;
    
    public class test {
    
        public class java {
    
        }
    
        public static void main(String[] args) throws Exception {
            BufferedReader bf = new BufferedReader(new InputStreamReader(
                    new FileInputStream("E:\\qq  receive files\\Harry Potter and the Sorcerer's Stone.txt")));
            int[] count = new int[26];
    
            char[] c = new char[1];
            int len = bf.read(c);
            while (len != -1) {
    
                if (c[0] <= 'Z' && c[0] >= 'A') {
                    int number = c[0];
                    count[number - 65]++;
                }
                if (c[0] <= 'z' && c[0] >= 'a') {
                    int number = c[0];
                    count[number - 97]++;
                }
                len = bf.read(c);
            }
            count = Sort(count);
            Print(count);
            bf.close();
    
        }
    
        public static int[] Sort(int[] count) {
            int temp;
            int size = count.length;
            for (int i = 0; i < size - 1; i++) {
                for (int j = i + 1; j < size; j++) {
                    if (count[i] < count[j]) {
                        temp = count[j];
                        count[j] = count[i];
                        count[i] = temp;
    
                    }
                }
            }
            return count;
    
        }
    
        public static void Print(int[] count) {
            NumberFormat df = NumberFormat.getInstance();
    
            
    
           df.setMaximumFractionDigits(2);
    
            int sum = 0;
            for (int i = 0; i < count.length; i++) {
                sum = count[i] + sum;
            }
            String[] a = new String[count.length];
            for (int i = 0; i < count.length; i++) {
                a[i] = dt.format((float) count[i] / (float) sum * 100);
            }
            for (int i = 0; i < 26; i++) {
                if (count[i] > 0) {
                    char lowerCase = (char) (i + 101);
                    System.out.println(lowerCase + "(" + a[i] + "%)");
                }
            }
    
        }
    
    }

    2.要求:输出单个文件中的前 N 个最常出现的英语单词。
    功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。
    功能2:指定文件目录,对目录下每一个文件执行  功能1的操作。
    功能3:指定文件目录, 但是会递归遍历目录下的所有子目录,每个文件执行功能1的做操。


    只完成了输出文件中的前N个单词的功能....

    package com.word;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.*;
    import java.util.StringTokenizer;
    public class Test2 {
        private static String str1;
        private static String[] str3 = new String[20000];
        private static int[] n=new int[20000];
        
        public static void main(String[] args) throws IOException {
            
            Scanner sc=new Scanner(System.in);
            File file = new File("//E:\\\\qq  receive files\\\\Harry Potter and the Sorcerer's Stone.txt");
            String tempstr=null;
            BufferedReader reader = null;
            reader = new BufferedReader(new FileReader(file));
            int i=0,sum=0,j=0;
            while((tempstr=reader.readLine())!=null) {
            StringTokenizer st = new StringTokenizer(tempstr," .,?”“;:''   !—‘");
            while(st.hasMoreElements()) {
            str1=(String)st.nextElement();
            str1=str1.toLowerCase();
            for(i=0;i<=sum;i++)
            {
                if(str1.equals(str3[i])) {
                    n[i]++;
                    break;}
            }
            if(i>sum) {
                str3[sum]=str1;
                n[sum]=1;
                sum++;
                }
                    }
                        }
            
            for( i=0;i<sum;i++) {
                for( j=i+1;j<sum;j++) {
                    if(n[j]>n[i]) {
                        int temp=n[i];
                        n[i]=n[j];
                        n[j]=temp;
                        String Temp=str3[i];
                        str3[i]=str3[j];
                        str3[j]=Temp;
                    }
                }
            }
    
            System.out.print("你要前几个最常出现的单词:");
             int choose = sc.nextInt();
             for(i=0;i<choose;i++) {
                 System.out.println(str3[i]+"    "+n[i]);
             }
        }
    }

  • 相关阅读:
    POJ 3009 Curling 2.0 简单DFS 好题
    POJ 3253 Fence Repair 贪心
    python_13 面向对象
    python_12 模块
    python练习题_04
    python_11 装饰器,闭包
    python练习题_03
    python_10 迭代器和生成器
    python_09 文件处理流程,文件操作方法
    python_08 函数式编程、高阶函数、map、filter、reduce函数、内置函数
  • 原文地址:https://www.cnblogs.com/ywqtro/p/11801817.html
Copyright © 2011-2022 走看看