zoukankan      html  css  js  c++  java
  • 单词 统计续

    第1步:输出单个文件中的前 N 个最常出现的英语单词。

    功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。

    功能2: 指定文件目录,对目录下每一个文件执行统计的操作。 

    功能3:指定文件目录,是会递归遍历目录下的所有子目录的文件进行统计单词的功能。

    功能4:输出出现次数最多的前 n 个单词,

    package test;
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.text.DecimalFormat;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.List;
    import java.util.Scanner;
    import java.util.StringTokenizer;
    
    public class test2 {
    public static void main(String[] args)throws IOException
    {
        
        List<Integer> list=new ArrayList<>();
        Scanner scan=new Scanner(System.in);
        File f = new File("D:\飘c1.txt");
         FileInputStream fip = new FileInputStream(f);
        InputStreamReader reader = new InputStreamReader(fip, "gbk");
        StringBuffer sb = new StringBuffer();
        while (reader.ready()) {
            sb.append((char) reader.read());
        }
    
        reader.close();
        fip.close();
        int i;
        int option=10;
        while(option!=0)
        {
             System.out.println("1、统计字母的个数  2、统计单词个数 3、统计出现最多次数的几个单词  4、统计删除无用表后的单词 0、退出");
            option=scan.nextInt();
        if(option==1)
      tongjizimu(sb.toString());
       if(option==2)
           tongjidanci(sb.toString());
       if(option==3)
       {
           int sum1=0;
           System.out.println("显示前n个出现最多的单词,请输入n");
           sum1=scan.nextInt();
           tongjidanci1(sb.toString(),sum1);
       }
        
        if(option==4) {
             tongjidanci2(sb.toString());
        }
        if(option==0)
        {
            System.out.println("已退出。");
        }
    }}
    static char ch(char c)
    {
        if(!(c>=97&&c<=122))
            c+=32;
        return c;
    }
    static String[] StatList(String str) {
           StringBuffer sb = new StringBuffer();
           HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表
           String[] slist = str.split("\W+");
           int sum=0;
           int sum1=0;
           for (int i = 0; i < slist.length; i++) {
                 if (!has.containsKey(slist[i])) { // 若尚无此单词
                         has.put(slist[i], 1);
                         sum++;
                         sum1++;
                 } else {//如果有,就在将次数加1
                        Integer nCounts = has.get(slist[i]);
                        
                         has.put(slist[i],nCounts+1 );
                }
             }
           int temp=0;
           int temp1=0;
           String []a=new String[sum];
           int []b=new int[sum1];
           Iterator iterator = has.keySet().iterator();
              while(iterator.hasNext()){
                     String word = (String) iterator.next();
                     a[temp]=word;
                     temp++;
              }
              return a;
    }
    static int[] StatList1(String str) {
           StringBuffer sb = new StringBuffer();
           HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表
           String[] slist = str.split("\W+");
           int sum=0;
           int sum1=0;
           for (int i = 0; i < slist.length; i++) {
                 if (!has.containsKey(slist[i])) { // 若尚无此单词
                         has.put(slist[i], 1);
                         sum++;
                         sum1++;
                 } else {//如果有,就在将次数加1
                        Integer nCounts = has.get(slist[i]);
                         has.put(slist[i],nCounts+1 );
                         
                }
             }
           int temp=0;
           int temp1=0;
           String []a=new String[sum];
        int []b=new int[sum1];
           Iterator iterator = has.keySet().iterator();
              while(iterator.hasNext()){
                     String word = (String) iterator.next();
                     b[temp1]=has.get(word);
                     temp1++;
              }
              return b;
    }
    public static void tongjizimu(String a)
    {
        DecimalFormat df=new DecimalFormat("######0.00");
        int i;
        String A=a;
        String M="abcdefghijklmnopqrstuvwxyz";
       String temp = "";
        char NUM[]=new char[A.length()];
        char Z[]=new char[26];
        int X[]=new int[26];
        int MAX=0;
        Z=M.toCharArray();
        for(int k=0;k<26;k++)
        {
            X[k]=0;
        for(i=0;i<A.length();i++)
        {
            NUM[i]=A.charAt(i);
            if(Z[k]==NUM[i]||Z[k]==ch(NUM[i]))
            {
                X[k]++;
            }
        }
        }
        System.out.println("这篇文章中英文字母个数分别为:");
        double sum=0;
        System.out.println("排序如下:");
        for(i=0;i<25;i++)
            for(int k=0;k<25-i;k++)
            {
                if(X[k]<X[k+1])
                {
                    int temp2=X[k];
                    X[k]=X[k+1];
                    X[k+1]=temp2;
                    char temp3=Z[k];
                    Z[k]=Z[k+1];
                    Z[k+1]=temp3;
                }
            }
        for(i=0;i<26;i++)
        {
        System.out.println(Z[i]+"字母个数为:"+X[i]);
        sum=sum+X[i];
        }
        for(i=0;i<26;i++)
        {
            double jkl=(X[i])/sum*100;
            System.out.println(Z[i]+"字母频率为:"+df.format(jkl)+"%");
        }
    }
    public static void tongjidanci(String a)
    {
        int i;
         StringTokenizer st = new StringTokenizer(a,",.! 
    ");
            String []a1=StatList(a);
            int[]b1=StatList1(a);
            System.out.println("//////////////////////////////");
            for(i=0;i<a1.length-1;i++)
                for(int j=0;j<a1.length-1-i;j++)
                {
                    if(b1[j]<b1[j+1])
                    {
                        int temp6=b1[j];
                        b1[j]=b1[j+1];
                        b1[j+1]=temp6;
                        String temp7=a1[j];
                        a1[j]=a1[j+1];
                        a1[j+1]=temp7;
                    }
                }
           for(i=0;i<a1.length-1;i++)
           {
               System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
           }
    }
    public static void tongjidanci1(String a,int n)
    {
        int i;
         StringTokenizer st = new StringTokenizer(a,",.! 
    ");
            String []a1=StatList(a);
            int[]b1=StatList1(a);
            System.out.println("//////////////////////////////");
            for(i=0;i<a1.length-1;i++)
                for(int j=0;j<a1.length-1-i;j++)
                {
                    if(b1[j]<b1[j+1])
                    {
                        int temp6=b1[j];
                        b1[j]=b1[j+1];
                        b1[j+1]=temp6;
                        String temp7=a1[j];
                        a1[j]=a1[j+1];
                        a1[j+1]=temp7;
                    }
                }
           for(i=0;i<n;i++)
           {
               System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
           }
    }
    public static void tongjidanci2(String a)
    {
        int i;
         StringTokenizer st = new StringTokenizer(a,"");
            String []a1=StatList(a);
            int[]b1=StatList1(a);
            System.out.println("//////////////////////////////");
            for(i=0;i<a1.length-1;i++)
                for(int j=0;j<a1.length-1-i;j++)
                {
                    if(b1[j]<b1[j+1])
                    {
                        int temp6=b1[j];
                        b1[j]=b1[j+1];
                        b1[j+1]=temp6;
                        String temp7=a1[j];
                        a1[j]=a1[j+1];
                        a1[j+1]=temp7;
                    }
                }
           for(i=0;i<a1.length-1;i++)
           {                                                        
               System.out.println("单词:"+a1[i]+"  且出现的次数:"+b1[i]);
           }
    }
    
    }

  • 相关阅读:
    JavaSE--注解
    JavaSE--【JAVA】unicode为12288字符
    Spring--Spring 注入
    Spring--@configuration 和 @Bean
    JavaEE--分布式对象
    JavaSE--jdom解析之bom
    JavaEE--分布式与集群
    JavaEE--调用 WSDL -- httpclient 4.x.x
    JavaSE--RMI初识
    Redis--初识Redis
  • 原文地址:https://www.cnblogs.com/zlj843767688/p/11001201.html
Copyright © 2011-2022 走看看