zoukankan      html  css  js  c++  java
  • 指定文件目录遍历所有子目录统计文档的单词出现数量

    package javaClassHomework;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.text.DecimalFormat;
    import java.util.Comparator;
    import java.util.Scanner;
    import java.util.TreeMap;
    import java.util.TreeSet;
    
    public class test3
    {
        //C:UsersAdministratorDesktophhh
       public static void main(String[] args) throws IOException
    {
           Scanner sc=new Scanner(System.in);
            File file=getfile();
            bianli(file);
            System.out.println("请选择要查询的文档");
            String name=sc.next();
            display(name);
    
            
           
            
    }
        public static void bianli(File file) {
            File [] yue=file.listFiles();
            for(File fi:yue) {
                if(!fi.isDirectory()) {
                if(fi.getName().endsWith(".txt")) {
                    System.out.println(fi.getPath());
                }
                }
                else {
                    bianli(fi);
                }
            }
          
        }
       public static void display(String path) throws IOException {
           Scanner sc=new Scanner (System.in);
           BufferedReader br=new BufferedReader(new FileReader(path));        
            int c;
            TreeMap<String,Integer> hm=new TreeMap<>();
            String line;
            int kt=0;
            while((line=br.readLine())!=null) {
                String [] str=line.split("[^a-zA-Z]");            
                for(int i=0;i<str.length;i++) {
                    if(!str[i].equals("")) {
                hm.put(str[i],hm.containsKey(str[i])?hm.get(str[i])+1:1);}
                }
                }    
            br.close();
            int max=0;
            int sum=0;
            int t=0;
            for(String k: hm.keySet()) {
                sum=sum+hm.get(k);
                if(max<=hm.get(k)) {
                    max=hm.get(k);
                }
              }
           
           TreeSet<String> ts=new TreeSet<>(new Comparator<String>()
            {
               public int compare(String a,String b) {
                   int num=hm.get(a)-hm.get(b);
                   return num==0?1:(-num);
               }
            });
           for(String k: hm.keySet()) {
               ts.add(k);
           }
           DecimalFormat df = new DecimalFormat("0.00%");
            System.out.println("请输入要查询的个数");
           int count=sc.nextInt();
           int q=0;
           for (String s : ts)
            {
               if(q==count) {
                   break;
               }
               else {
                   q++;
                   float bai=(float)hm.get(s)/sum;
                   System.out.println(s+" "+hm.get(s)+" "+df.format(bai));
               }
              
                
            }
              
            
            
            System.out.println(sum);
       }
       public static File getfile() {
           Scanner sc=new Scanner(System.in);
           while(true) {
           String line=sc.nextLine();
           File kk=new File(line);
           if(!kk.exists()) {
               System.out.println("输入的不是文件夹,请重新输入");
           }
           else if(kk.isFile()) {
               System.out.println("输入的是文件路径,请重新输入");
           }
           else {
               return kk;
           }
           
    }
    }
    }

    代码思路:

    三个方法,1.通过给出路径查找文件

                      2.通过递归遍历文件的子目录,找到后缀名为.txt的文档

                      3.为统计单词数量的方法通过输入输出流进行操作

  • 相关阅读:
    Data Security---->Control Access to the Organization
    Data Modeling
    Slaesforce Paltform Development Basic
    Customize your Chatter Experience.
    wamp自定义网站根目录及多站点配置
    1053-1055
    1046-1052
    1044-1045
    HDOJ 1038-1043
    HDOJ 1031-1037
  • 原文地址:https://www.cnblogs.com/yanwenhui/p/11794805.html
Copyright © 2011-2022 走看看