zoukankan      html  css  js  c++  java
  • 对于文章的字母、单词、短语,(无用词表)的检索Java代码实现

    日期:2019.5.9

    博客期:073

    星期四

       今天软件工程课上,又做了测试,老师说我们的速度太慢了,实际上我也觉得自己很慢。老师说了这是我们的上一届的大二上半学期学习中的速度,所以呢?意思就是说我们和上一届的学长学姐们相比差的是天与地的距离啊!emmmm......唉~我也承认了!以下是我提交的源码,请各位欣赏,有疑问的话,评论区里见!

     源码:

    1 package basic;
    2 
    3 public class Chara {
    4     public char str;
    5     public int times;
    6 }
    char.Chara.java
     1 package basic;
     2 
     3 import java.io.File;
     4 import java.util.ArrayList;
     5 import java.util.List;
     6 import java.util.Scanner;
     7 
     8 public class Justice {
     9     public List <Chara> list = new ArrayList<Chara>();
    10     public int length = 0;
    11     public boolean isAccess(char c){
    12         return ((c>='a'&&c<='z')||(c>='A'&&c<='Z'));
    13     }
    14     public double p(char c){
    15         int seat = (changeToSmall(c)-'a');
    16         int time = list.get(seat).times;
    17         return (double)((double)time/(double)length);
    18     }
    19     public char changeToSmall(char c){
    20         if(c>='a'&&c<='z')
    21             return c;
    22         else
    23             return (char)(c-'A'+'a');
    24     }
    25     public void dealFile(String filename) throws Exception{
    26         File f = new File(filename);
    27         if(!f.exists())
    28         {
    29             f.createNewFile();
    30         }
    31         Scanner sc = new Scanner(f);
    32         while(sc.hasNext())
    33         {
    34             String s = sc.next();
    35             for(int t=0;t<s.length();++t)
    36             {
    37                 char c = s.charAt(t);
    38                 if(this.isAccess(c))
    39                 {
    40                     int seat = (changeToSmall(c)-'a');
    41                     Chara ch = list.get(seat);
    42                     ch.times++;
    43                     list.set(seat, ch);
    44                     ++length;
    45                 }
    46             }
    47         }
    48         
    49         sc.close();
    50     }
    51     public void changeChannel(){
    52         int size = list.size();
    53         for(int i=0;i<size;++i)
    54         {
    55             for(int j=0;j<size-1;++j)
    56             {
    57                 if(list.get(j).times<list.get(j+1).times)
    58                 {
    59                     Chara temp = list.get(j);
    60                     list.set(j,list.get(j+1));
    61                     list.set(j+1, temp);
    62                 }
    63             }
    64         }
    65     }
    66     public void display(){
    67         int leng = list.size();
    68         for(int i=0;i<leng;++i)
    69         {
    70             char c = (char)(i+'a');
    71             System.out.println((char)(i+'a')+"出现了"+(list.get(i).times)+"次,它的频率是"+p(c)+"	");
    72         }
    73     }
    74     public Justice(){
    75         for(char i='a';i<='z';++i)
    76         {
    77             Chara c = new Chara();
    78             c.str = i;
    79             c.times = 0;
    80             list.add(c);
    81         }
    82     }
    83 }
    Justice.java
     1 package basic;
     2 
     3 public class Main {
     4     public static void main(String[] args) throws Exception {
     5         Justice justice = new Justice();
     6         justice.dealFile("txt/piao.txt");
     7         justice.changeChannel();
     8         justice.display();
     9     }
    10 }
    Main.java
     1 package more;
     2 
     3 public class Chara {
     4     public String str = "";
     5     public int times = 0;
     6     public Chara(){
     7         
     8     }
     9     public Chara(String str,int times){
    10         this.str = str;
    11         this.times = times;
    12     }
    13 }
    word.Chara.java
      1 package more;
      2 
      3 import java.io.File;
      4 import java.io.FileNotFoundException;
      5 import java.util.ArrayList;
      6 import java.util.List;
      7 import java.util.Scanner;
      8 
      9 public class EditTable{
     10 
     11     public List<Chara> list = new ArrayList<Chara>();
     12     public List<String> listString = new ArrayList<String>();
     13     public int length = 0;
     14     public boolean isAddmitted = false;
     15     //单词处理
     16     public String changeWord(String str){
     17         String newStr = str.replace("“","");
     18         newStr = newStr.replace(",","");
     19         newStr = newStr.replace("”","");
     20         newStr = newStr.replace(".","");
     21         newStr = newStr.replace("?","");
     22         newStr = newStr.replace("!","");
     23         newStr = newStr.replace(":","");
     24         return newStr.toLowerCase();
     25     }
     26     public boolean isInUnUseTable(String str){
     27         int leng = listString.size();
     28         for(int i=0;i<leng;++i)
     29         {
     30             if(str.toLowerCase().compareTo(listString.get(i))==0)
     31                 return true;
     32         }
     33         return false;
     34     }
     35     //判断是否可以通过
     36     public boolean isAccess(String str){
     37         int leng = str.length();
     38         str = this.changeWord(str);
     39         
     40         if(isAddmitted)
     41         {
     42             if(isInUnUseTable(str))
     43                 return false;
     44         }
     45         
     46 
     47         for(int i=0;i<leng;++i)
     48         {
     49             char c = str.charAt(i);
     50             if(!((c>='a'&&c<='z')||(c>='A'&&c<='Z')))
     51                 return false;
     52         }
     53         return true;
     54     }
     55     public boolean isAddmitted() {
     56         return isAddmitted;
     57     }
     58     public void setAddmitted(boolean isAddmitted) {
     59         this.isAddmitted = isAddmitted;
     60     }
     61     //判断位置
     62     public int seatAt(String c){
     63         int leng = list.size();
     64         for(int i=0;i<leng;++i)
     65         {
     66             String trp = list.get(i).str;
     67             if(trp.compareTo(c)==0)
     68                 return i;
     69         }
     70         return -1;
     71     }
     72     //判断并添加
     73     public void AddString(String str){
     74         if(isAccess(changeWord(str)))
     75         {
     76             int seat = seatAt(changeWord(str));
     77             if(seat==-1)
     78             {
     79                 Chara chara = new Chara(changeWord(str),1);
     80                 list.add(chara);
     81             }
     82             else
     83             {
     84                 Chara chara = list.get(seat);
     85                 chara.times++;
     86                 list.set(seat, chara);
     87             }
     88             ++length;
     89         }
     90     }
     91     //排序
     92     public void changeChannel(){
     93         int size = list.size();
     94         for(int i=0;i<size;++i)
     95         {
     96             for(int j=0;j<size-1;++j)
     97             {
     98                 if(list.get(j).times<list.get(j+1).times)
     99                 {
    100                     Chara temp = list.get(j);
    101                     list.set(j,list.get(j+1));
    102                     list.set(j+1, temp);
    103                 }
    104             }
    105         }
    106     }
    107     //概率
    108     public double p(String str){
    109         int seat = seatAt(changeWord(str));
    110         if(seat==-1)
    111             return 0;
    112         int time = list.get(seat).times;
    113         return ((double)time/(double)length);
    114     }
    115     //展示
    116     public void display(int seat){
    117         Chara cha = list.get(seat);
    118         System.out.println("单词:"+cha.str+(cha.str.length()<=2?"		":"	")+"出现次数:"+cha.times+"	"+"概率为:"+((double) Math.round(p(cha.str) * 10000) / 100)+"%	");
    119     }
    120     public void Display(int N){
    121         for(int i=0;i<N&&i<list.size();++i)
    122             display(i);
    123     }
    124     public EditTable() throws FileNotFoundException{
    125 
    126         Scanner sc = new Scanner(new File("txt/stopword.txt"));
    127         while(sc.hasNext())
    128         {
    129             String str = sc.next();
    130             listString.add(str);
    131         }
    132     }
    133     public EditTable(boolean isAd) throws FileNotFoundException{
    134         this.isAddmitted = isAd;
    135         Scanner sc = new Scanner(new File("txt/stopword.txt"));
    136         while(sc.hasNext())
    137         {
    138             String str = sc.next();
    139             listString.add(str);
    140         }
    141     }
    142     //处理文件
    143     public void DealFile(String fileName) throws Exception{
    144         File f = new File(fileName);
    145         Scanner sc = new Scanner(f);
    146         while(sc.hasNext())
    147         {
    148             String str = sc.next();
    149             AddString(str);
    150         }
    151         sc.close();
    152     }
    153 
    154 }
    EditTable.java
      1 package more;
      2 
      3 import java.io.File;
      4 import java.io.FileNotFoundException;
      5 import java.util.ArrayList;
      6 import java.util.List;
      7 import java.util.Scanner;
      8 
      9 import more.Chara;
     10 
     11 public class Table {
     12     public List<Chara> list = new ArrayList<Chara>();
     13     public List<String> listString = new ArrayList<String>();
     14     public int length = 0;
     15     public boolean isAddmitted = false;
     16     //单词处理
     17     public String changeWord(String str){
     18         String newStr = str.replace("“","");
     19         newStr = newStr.replace(",","");
     20         newStr = newStr.replace("”","");
     21         newStr = newStr.replace(".","");
     22         newStr = newStr.replace("?","");
     23         newStr = newStr.replace("!","");
     24         newStr = newStr.replace(":","");
     25         return newStr.toLowerCase();
     26     }
     27     public static boolean isContainSpecifical(String str){
     28         if(str.contains("“"))
     29             return false;
     30         if(str.contains("”"))
     31             return false;
     32         if(str.contains(","))
     33             return false;
     34         if(str.contains("."))
     35             return false;
     36         if(str.contains("?"))
     37             return false;
     38         if(str.contains("!"))
     39             return false;
     40         if(str.contains(":"))
     41             return false;
     42         return true;
     43     }
     44     public boolean isInUnUseTable(String str){
     45         int leng = listString.size();
     46         for(int i=0;i<leng;++i)
     47         {
     48             if(str.toLowerCase().compareTo(listString.get(i))==0)
     49                 return true;
     50         }
     51         return false;
     52     }
     53     //判断是否可以通过
     54     public boolean isAccess(String str){
     55         int leng = str.length();
     56         str = this.changeWord(str);
     57         
     58         if(isAddmitted)
     59         {
     60             if(isInUnUseTable(str))
     61                 return false;
     62         }
     63         
     64 
     65         for(int i=0;i<leng;++i)
     66         {
     67             char c = str.charAt(i);
     68             if(!((c>='a'&&c<='z')||(c>='A'&&c<='Z')))
     69                 return false;
     70         }
     71         return true;
     72     }
     73     public boolean isAddmitted() {
     74         return isAddmitted;
     75     }
     76     public void setAddmitted(boolean isAddmitted) {
     77         this.isAddmitted = isAddmitted;
     78     }
     79     //判断位置
     80     public int seatAt(String c){
     81         int leng = list.size();
     82         for(int i=0;i<leng;++i)
     83         {
     84             String trp = list.get(i).str;
     85             if(trp.compareTo(c)==0)
     86                 return i;
     87         }
     88         return -1;
     89     }
     90     //判断并添加
     91     public void AddString(String str){
     92         if(isAccess(changeWord(str)))
     93         {
     94             int seat = seatAt(changeWord(str));
     95             if(seat==-1)
     96             {
     97                 Chara chara = new Chara(changeWord(str),1);
     98                 list.add(chara);
     99             }
    100             else
    101             {
    102                 Chara chara = list.get(seat);
    103                 chara.times++;
    104                 list.set(seat, chara);
    105             }
    106             ++length;
    107         }
    108     }
    109     //排序
    110     public void changeChannel(){
    111         int size = list.size();
    112         for(int i=0;i<size;++i)
    113         {
    114             for(int j=0;j<size-1;++j)
    115             {
    116                 if(list.get(j).times<list.get(j+1).times)
    117                 {
    118                     Chara temp = list.get(j);
    119                     list.set(j,list.get(j+1));
    120                     list.set(j+1, temp);
    121                 }
    122             }
    123         }
    124     }
    125     //概率
    126     public double p(String str){
    127         int seat = seatAt(changeWord(str));
    128         if(seat==-1)
    129             return 0;
    130         int time = list.get(seat).times;
    131         return ((double)time/(double)length);
    132     }
    133     //展示
    134     public void display(int seat){
    135         Chara cha = list.get(seat);
    136         System.out.println("单词:"+cha.str+(cha.str.length()<=2?"		":"	")+"出现次数:"+cha.times+"	"+"概率为:"+((double) Math.round(p(cha.str) * 10000) / 100)+"%	");
    137     }
    138     public void Display(int N){
    139         for(int i=0;i<N&&i<list.size();++i)
    140             display(i);
    141     }
    142     public Table() throws FileNotFoundException{
    143 
    144         Scanner sc = new Scanner(new File("txt/stopword.txt"));
    145         while(sc.hasNext())
    146         {
    147             String str = sc.next();
    148             listString.add(str);
    149         }
    150     }
    151     public Table(boolean isAd) throws FileNotFoundException{
    152         this.isAddmitted = isAd;
    153         Scanner sc = new Scanner(new File("txt/stopword.txt"));
    154         while(sc.hasNext())
    155         {
    156             String str = sc.next();
    157             listString.add(str);
    158         }
    159     }
    160     //读取
    161     public void AddMenu(List <String> sqlList){
    162         int length = sqlList.size();
    163         String ss = "";
    164         if(length!=0)
    165             ss = sqlList.get(length-1);
    166         for(int i=length-2;i>=0;--i)
    167         {
    168             ss = sqlList.get(i) + " " + ss;
    169             AddString(ss);
    170         }
    171     }
    172     //处理文件
    173     public void DealFile(String fileName) throws Exception{
    174         File f = new File(fileName);
    175         Scanner sc = new Scanner(f);
    176         boolean isT = true;
    177         while(sc.hasNext())
    178         {
    179             while(isT&&sc.hasNext())
    180             {
    181                 List <String> strSql = new ArrayList<String>();
    182                 String str = sc.next();
    183                 if(isInUnUseTable(str))
    184                     break;
    185                 strSql.add(str);
    186                 AddMenu(strSql);
    187                 if(Table.isContainSpecifical(str))
    188                     break;
    189             }
    190         }
    191         sc.close();
    192     }
    193 }
    Table.java
      1 package running;
      2 
      3 import java.io.File;
      4 import java.io.FileWriter;
      5 import java.io.PrintWriter;
      6 import java.util.Scanner;
      7 
      8 import more.EditTable;
      9 import more.Table;
     10 
     11 import basic.Justice;
     12 
     13 public class TestMain {
     14     //第0步
     15     public static void process0(String file) throws Exception{
     16         Justice justice = new Justice();
     17         justice.dealFile(file);
     18         justice.changeChannel();
     19         justice.display();
     20     }
     21     //第1步
     22     public static void process1(String file) throws Exception{
     23         EditTable table = new EditTable();
     24         table.DealFile(file);
     25         table.changeChannel();
     26         table.Display(99999999);
     27     }
     28     //第2步
     29     public static void process2(String file,int num) throws Exception{
     30         EditTable table = new EditTable();
     31         table.DealFile(file);
     32         table.changeChannel();
     33         table.Display(num);
     34     }
     35     //第3步
     36     public static void process3(String file,int num) throws Exception{
     37         EditTable table = new EditTable(true);
     38         table.DealFile(file);
     39         table.changeChannel();
     40         table.Display(num);
     41     }
     42     //第4步
     43     public static void process4(String file,int num) throws Exception{
     44         Table table = new Table();
     45         table.DealFile(file);
     46         table.changeChannel();
     47         table.Display(999999);
     48     }
     49     //停用词
     50     public static void stopWord(String file) throws Exception{
     51         File f = new File("txt/stopword.txt");
     52         PrintWriter pw = new PrintWriter(new FileWriter(f,true));
     53         pw.println();
     54         pw.println(file);
     55         pw.close();
     56     }
     57     //
     58     public static void main(String[] args) throws Exception {
     59         Scanner sc = new Scanner(System.in);
     60         boolean isAccess = true;
     61         while(isAccess)
     62         {
     63             System.out.println("===============================================");
     64             System.out.println(" p0 + 文件路径                    ----------->      分析字母");
     65             System.out.println(" p1 + 文件路径                    ----------->      分析单词");
     66             System.out.println(" p2 + 文件路径  + 前几项     -----------> 分析频率最高的单词");
     67             System.out.println(" p3 + 文件路径  + 前几项     -----------> 分析频率最高的单词(过滤)");
     68             System.out.println(" sw + 单词                           ----------->      添加停用词");
     69             System.out.println(" p4 + 文件路径  + 前几项    ----------->      分析短语");
     70             System.out.println(" q                     ----------->        退出");
     71             System.out.println();
     72             String order = sc.next();
     73             String file = sc.next();
     74             if(order.compareTo("p0")==0)
     75             {
     76                 TestMain.process0(file);
     77             }
     78             else if(order.compareTo("p1")==0)
     79             {
     80                 TestMain.process1(file);
     81             }
     82             else if(order.compareTo("p2")==0)
     83             {
     84                 int num = sc.nextInt();
     85                 TestMain.process2(file,num);
     86             }
     87             else if(order.compareTo("p3")==0)
     88             {
     89                 int num = sc.nextInt();
     90                 TestMain.process3(file,num);
     91             }
     92             else if(order.compareTo("sw")==0)
     93             {
     94                 TestMain.stopWord(file);
     95             }
     96             else if(order.compareTo("p4")==0)
     97             {
     98                 int num = sc.nextInt();
     99                 TestMain.process4(file,num);
    100             }
    101             else if(order.compareTo("q")==0)
    102             {
    103                 System.out.println("欢迎下次使用!");
    104                 break;
    105             }
    106             System.out.println("===============================================");
    107         }
    108         sc.close();
    109     }
    110 }
    TestMain.java
     1 package more;
     2 
     3 import java.io.File;
     4 
     5 public class Test {
     6     public static void TestForPath(String path) throws Exception{
     7         File file = new File(path);
     8         if(file.isDirectory())
     9         {
    10             File []fl = file.listFiles();
    11             int leng = fl.length;
    12             for(int i=0;i<leng;++i)
    13             {
    14                 String fileName = path+"/"+fl[i].getName();
    15                 EditTable table = new EditTable();
    16                 System.out.println("====================================================");
    17                 System.out.println("文件名称:"+fl[i].getName());
    18                 table.DealFile(fileName);
    19                 table.changeChannel();
    20                 table.Display(40);
    21             }
    22         }
    23     }
    24     public static void main(String[] args) throws Exception {
    25         TestForPath("txt");
    26     }
    27 }
    Test.java
      1 package running;
      2 
      3 import java.io.File;
      4 import java.io.FileWriter;
      5 import java.io.PrintWriter;
      6 import java.util.Scanner;
      7 
      8 import more.EditTable;
      9 import more.Table;
     10 
     11 import basic.Justice;
     12 
     13 public class TestMain {
     14     //第0步
     15     public static void process0(String file) throws Exception{
     16         Justice justice = new Justice();
     17         justice.dealFile(file);
     18         justice.changeChannel();
     19         justice.display();
     20     }
     21     //第1步
     22     public static void process1(String file) throws Exception{
     23         EditTable table = new EditTable();
     24         table.DealFile(file);
     25         table.changeChannel();
     26         table.Display(99999999);
     27     }
     28     //第2步
     29     public static void process2(String file,int num) throws Exception{
     30         EditTable table = new EditTable();
     31         table.DealFile(file);
     32         table.changeChannel();
     33         table.Display(num);
     34     }
     35     //第3步
     36     public static void process3(String file,int num) throws Exception{
     37         EditTable table = new EditTable(true);
     38         table.DealFile(file);
     39         table.changeChannel();
     40         table.Display(num);
     41     }
     42     //第4步
     43     public static void process4(String file,int num) throws Exception{
     44         Table table = new Table();
     45         table.DealFile(file);
     46         table.changeChannel();
     47         table.Display(999999);
     48     }
     49     //停用词
     50     public static void stopWord(String file) throws Exception{
     51         File f = new File("txt/stopword.txt");
     52         PrintWriter pw = new PrintWriter(new FileWriter(f,true));
     53         pw.println();
     54         pw.println(file);
     55         pw.close();
     56     }
     57     //
     58     public static void main(String[] args) throws Exception {
     59         Scanner sc = new Scanner(System.in);
     60         boolean isAccess = true;
     61         while(isAccess)
     62         {
     63             System.out.println("===============================================");
     64             System.out.println(" p0 + 文件路径                    ----------->      分析字母");
     65             System.out.println(" p1 + 文件路径                    ----------->      分析单词");
     66             System.out.println(" p2 + 文件路径  + 前几项     -----------> 分析频率最高的单词");
     67             System.out.println(" p3 + 文件路径  + 前几项     -----------> 分析频率最高的单词(过滤)");
     68             System.out.println(" sw + 单词                           ----------->      添加停用词");
     69             System.out.println(" p4 + 文件路径  + 前几项    ----------->      分析短语");
     70             System.out.println(" q                     ----------->        退出");
     71             System.out.println();
     72             String order = sc.next();
     73             String file = sc.next();
     74             if(order.compareTo("p0")==0)
     75             {
     76                 TestMain.process0(file);
     77             }
     78             else if(order.compareTo("p1")==0)
     79             {
     80                 TestMain.process1(file);
     81             }
     82             else if(order.compareTo("p2")==0)
     83             {
     84                 int num = sc.nextInt();
     85                 TestMain.process2(file,num);
     86             }
     87             else if(order.compareTo("p3")==0)
     88             {
     89                 int num = sc.nextInt();
     90                 TestMain.process3(file,num);
     91             }
     92             else if(order.compareTo("sw")==0)
     93             {
     94                 TestMain.stopWord(file);
     95             }
     96             else if(order.compareTo("p4")==0)
     97             {
     98                 int num = sc.nextInt();
     99                 TestMain.process4(file,num);
    100             }
    101             else if(order.compareTo("q")==0)
    102             {
    103                 System.out.println("欢迎下次使用!");
    104                 break;
    105             }
    106             System.out.println("===============================================");
    107         }
    108         sc.close();
    109     }
    110 }
    TestMain.java

     附上截图:

  • 相关阅读:
    Kubernetes实战:高可用集群的搭建和部署
    华为云MVP程云:知识化转型,最终要赋能一线
    支持60+数据传输链路,华为云DRS链路商用大盘点
    关于单元测试的那些事儿,Mockito 都能帮你解决
    深入原生冰山安全体系,详解华为云安全服务如何构筑全栈安全
    云小课|ModelArts Pro 视觉套件:零代码构建视觉AI应用
    FLINK重点原理与机制:内存(1)task之间的数据传输
    FLINK重点原理与机制:内存(2)网络流控及反压机制剖析(一)
    FLINK重点原理与机制:状态(3)两阶段提交
    FLINK重点原理与机制:状态(2)Flink的检查点算法CHECKPOINT
  • 原文地址:https://www.cnblogs.com/onepersonwholive/p/10872391.html
Copyright © 2011-2022 走看看