zoukankan      html  css  js  c++  java
  • java单词统计

    要求1:输出某个英文文本文件中26字母出现的频率,由高到低排序,并显示字母出现的百分比,精确到小数点后两位。

    思路:分别设存放字母和字母出现次数的数组,遍历文件内容,将字母及出现频率按由高到低的顺序输出

    源码:

    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.Scanner;
    public class word
    {
        static String str="";
        static String str1="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
        static char ch1 []=str1.toCharArray();//存放字母的数组
        public static double num[]=new double[100];//存放字母出现次数的数组
        public static int sum=0;//出现的字母个数
        //读取文件内容
        public static void read()
        {  
            Scanner scan  =new Scanner(System.in);
            File file = new File("D:\h\halibote\Harry Potter and the Sorcerer's Stone.txt");
            int score = 0;
            StringBuffer result = new StringBuffer();
            try
            {
                FileReader r = new FileReader(file);
                BufferedReader br = new BufferedReader(r);
                int i=0;
                str=br.readLine();
                while(str!=null)
                {
                   for(int j=0;j<str.length();j++)
                   {
                       for(int k=0;k<str1.length();k++)
                       {
                           if(str.charAt(j)==str1.charAt(k))
                           {
                               sum++;
                               num[k]++;
                           }
                       }
                   }
                   str=br.readLine();
               }
                br.close();
                for(int p=0;p<str1.length()-1;p++)
                {
                    int o=p;
                    for(int q=p;q<str1.length();q++)
                    {
                        if(num[o]<num[q])
                        {
                            o=q;
                        }
                    }
                    if(o!=p)
                    {
                        char ff=ch1[o];
                        ch1[o]=ch1[p];
                        ch1[p]=ff;
                        double fff=num[o];
                        num[o]=num[p];
                        num[p]=fff;   
                    }
                }
                for(int k=0;k<str1.length();k++)
                {
                       num[k]=num[k]/sum*100;
                       System.out.print(ch1[k]);
                       System.out.printf("%.2f",num[k]);
                       System.out.println("%");
                }   
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
        } 
        public static void main(String[] args)
        {
            read();
        }
    }
     

    要求2:输出单个文件中的前n个最常出现的单词

    思路:

    遍历文件,读取所有单词并存入数组

    对读取的单词进行去重并存入新数组

    统计单词出现次数并将所统计每个单词的出现次数存入一数组

    按出现次数由高到低的顺序输出n个单词及出现次数

    源码

    import java.io.File;
    import java.io.InputStreamReader;
    import java.io.Reader;
    import java.nio.file.FileVisitResult;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.nio.file.SimpleFileVisitor;
    import java.nio.file.attribute.BasicFileAttributes;
    import java.util.Scanner;
    import java.io.BufferedReader; 
    import java.io.BufferedWriter; 
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException; 
    public class word1

        private static String str="";
        private static Scanner sc=new Scanner(System.in);
        private static BufferedReader cin=null;
        private static String a[]=new String[1000000];//存放从文件中读取的所有单词
        private static String c[]=new String[10000000];//存放去重后单词
        private static int b[]=new int[1000000];//存放单词出现次数
        private static int length=0;//单词总个数
        private static int length1=0;//去重后单词个数
        private static int nn=0;
        private static int j=0;
        static File[] list = new File("D:\h").listFiles();
       
        //读取文件内容
        public static void Readfile()
        {
            File file=new File("D:\h\halibote\Harry Potter and the Sorcerer's Stone.txt");                               
            try
            {
                InputStreamReader read = new InputStreamReader(new FileInputStream(file),"UTF-8");
                cin=new BufferedReader(read);
                str=cin.readLine();
                cun();
                cin.close();
                read.close();
            }
            catch(IOException e) {
                System.out.println("读取失败!");
                e.printStackTrace();
            }
        }
       
        //将单词存到数组a
        public  static void cun() throws IOException
        {
         { 
          while(str!=null)
          {
           int i=0;
           str=str.toLowerCase(); //把大写改成小写
           for(i=0;i<str.length();i++)
           {
            if((str.charAt(i)>96&&str.charAt(i)<123))
            {                   
             a[j]=a[j]+str.charAt(i);   
            }
            if(str.charAt(i)==' '||str.charAt(i)==','||str.charAt(i)=='.')
            {
             if(!a[j].equals(""))
             {
              j=j+1;
              a[j]="";
             }
            }  
           }             
           str=cin.readLine();
          }
          length=j;
         }
        }
        //去重
        public static void Statistics()
        {
            for(int k=0;k<length;k++)
            {
                b[k]=0;
            }
            c[0]=a[0];
            int tt=1;
            Boolean rt=true;
            for(int i=1;i<length;i++)
            {
                rt=false;
                for(int j=0;j<tt;j++)
                {
                    if(a[i].equals(c[j]))
                    {
                        rt=true;
                        break;
                    }
                }
                if(!rt)
                {
                    c[tt]=a[i];
                    tt++;
                }
            }
            length1=tt;
            for(int i=0;i<length1;i++)
            {
                for(int j=0;j<length;j++)
                {
                    if(c[i].equals(a[j]))
                    {
                        b[i]++;
                    }
                }
            }
        }
       
        //排序
        public  static void Sorting()
        {
            int t3=0;
            int t2=0;
            String sr="";
            for(int i=0;i<length1-1;i++)
            {
                t3=i;
                for(int j=i+1;j<length1;j++)
                {
                    if(b[t3]<b[j])
                    {
                        t3=j;
                    }
                }
                if(t3!=i)
                {
                   t2=b[i];
                   b[i]=b[t3];
                   b[t3]=t2;
                   sr=c[i];
                   c[i]=c[t3];
                   c[t3]=sr;
                }
             }
        }
       
        //显示
        public  static void show()
        {
            for(int k=0;k<nn;k++)
            {
                System.out.print(c[k]+" "+b[k]+"   ");
                System.out.printf("%.2f",(double)b[k]/length1*100);
                System.out.print("%");
                System.out.println("");
            }
        }
      
        public static void main(String[] args) throws IOException
        {
               System.out.println("请输入需要统计的个数:");
               nn=sc.nextInt();
               a[0]="";
               Readfile();                   
               Statistics();
               Sorting();
               show();  
        } 
    }
     
    功能1:输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列
     
    思路:只需将输出结果改为单词加出现次数并写入文件,其他与要求2一致
     
    源码:
    import java.io.File;
    import java.io.InputStreamReader;
    import java.io.Reader;
    import java.nio.file.FileVisitResult;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.nio.file.SimpleFileVisitor;
    import java.nio.file.attribute.BasicFileAttributes;
    import java.util.Scanner;
    import java.io.BufferedReader; 
    import java.io.BufferedWriter; 
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException; 
    public class word2

        private static String str="";
        private static Scanner sc=new Scanner(System.in);
        private static BufferedReader cin=null;
        private static String a[]=new String[1000000];
        private static String c[]=new String[10000000];
        private static int b[]=new int[1000000];
        private static int length=0;
        private static int length1=0;
        private static int nn=0;
        private static int j=0;
        static File[] list = new File("D:\h").listFiles();
        public static void Readfile()
        {
            File file=new File("D:\h\halibote\Harry Potter and the Sorcerer's Stone.txt");                               
            try
            {
                InputStreamReader read = new InputStreamReader(new FileInputStream(file),"UTF-8");
                cin=new BufferedReader(read);
                str=cin.readLine();
                cun();
                cin.close();
                read.close();
            }
            catch(IOException e) {
                System.out.println("读取失败!");
                e.printStackTrace();
            }
        }
       
        //将单词存到数组a
        public  static void  cun() throws IOException
        {
         {
          while(str!=null)
          {
           int i=0;
           str=str.toLowerCase(); //把大写改成小写
           for(i=0;i<str.length();i++)
           {
            if((str.charAt(i)>96&&str.charAt(i)<123))
            {                   
             a[j]=a[j]+str.charAt(i);   
            }
            if(str.charAt(i)==' '||str.charAt(i)==','||str.charAt(i)=='.')
            {
             if(!a[j].equals(""))
             {
              j=j+1;
              a[j]="";
             }
                        }
           }             
           str=cin.readLine();
          }
          length=j;
         }
        }
       
        //去重
        public static void Statistics()
        {
            for(int k=0;k<length;k++)
            {
                b[k]=0;
            }
            c[0]=a[0];
            int tt=1;
            Boolean rt=true;
            for(int i=1;i<length;i++)
            {
                rt=false;
                for(int j=0;j<tt;j++)
                {
                    if(a[i].equals(c[j]))
                    {
                        rt=true;
                        break;
                    }
                }
                if(!rt)
                {
                    c[tt]=a[i];
                    tt++;
                }
            }     
            length1=tt;
            for(int i=0;i<length1;i++)
            {
                for(int j=0;j<length;j++)
                {
                    if(c[i].equals(a[j]))
                    {
                        b[i]++;
                    }
                }
            }
        }
       
        //排序
        public  static void  Sorting()
        {
            int t3=0;
            int t2=0;
            String sr="";
            for(int i=0;i<length1-1;i++)
            {
                t3=i;
                for(int j=i+1;j<length1;j++)
                {
                    if(b[t3]<b[j])
                    {
                        t3=j;
                    }
                }
               if(t3!=i)
               {
                   t2=b[i];
                   b[i]=b[t3];
                   b[t3]=t2;
                   sr=c[i];
                   c[i]=c[t3];
                   c[t3]=sr;
               }
            }
        }
        //将输出结果写入文本文件
        public static void Writefile() throws IOException
        {
            File file=new File("D:\h\halibote\t1.txt");
            if(!file.exists())
                file.createNewFile();
            FileWriter write = new FileWriter(file,true);
            BufferedWriter out=new BufferedWriter(write);
            for(int i=0;i<length1;i++)
            {
                StringBuffer sb=new StringBuffer();
                out.write("这是第"+(i+1)+"个: ");   
                out.write(c[i]+" "+b[i]);
                out.write(" ");
            }       
            out.close();
        }
       
        //显示
        public static void show1()
        {
            for(int k=0;k<length1;k++)
            {
                    System.out.print(c[k]+" "+b[k]+" ");       
            }
        }
       
        public static void main(String[] args) throws IOException
        {
               a[0]="";
               Readfile();                   
               Statistics();
               Sorting(); 
               System.out.println("程序中所有不重复的单词!");
               show1();
               Writefile();
        } 
    }
  • 相关阅读:
    阿里云OSS进行文件下载时,报NOSuchKeys: com.aliyun.oss.OSSException: The specified key does not exist.
    [JAVA异常]ERROR: JDWP Unable to get JNI 1.2 environment, jvm->GetEnv() return code = -2 JDWP exit erro
    mybatis 中的<![CDATA[ ]]>
    HttpClients.custom的创建
    RestTemplate可以自定义重试次数
    RegxUtils正则表达式工具类
    MYSQL中 != 和 is not的区别
    ccna ccnp ccie 区别
    【IDEA】IDEA SpringBoot访问不到webapp下的内容
    日志 | logback | logback-spring.xml
  • 原文地址:https://www.cnblogs.com/songxinai/p/11794918.html
Copyright © 2011-2022 走看看