zoukankan      html  css  js  c++  java
  • 大二下学期学习进度(十)

    编码行数:850

    编码时长:13h

    发表博客数量:8

    所学知识点:

    统计文章中各单词出现的频率,并去掉常用单词。问题解决基本思路:先将所要读取的文件打开,先定义一个fileReader对象,用来初始化BufferedReader,new一个BufferedReader对象,将文件内容读取到缓存,定义一个字符串缓存,将字符串存放缓存中。然后按行读取,每一行读取的内容用append方法放在字符串缓存后边,再用toString()将其转化为字符串,根据".",",","?","!",":","‘","’","“","”","—",";","-"等将他们替换成空格,在根据空格分割成一个个单词放进字符串数组中,这里的方法用到Map方法,Map<String, Integer> map = new TreeMap<String, Integer>();两个参数分别代表出现的单词和其出现的次数。用for循环遍历单词数组,先统一转化成小写,在调用自定义方法judgeNouse()去掉常见单词,用map.get方法看其出现的次数,如果为0则不重复出现则其count记为1,否则为重复出现,其count++;并

    class entity11{//定义单词的实体类,便于list对象集合操作
    	String danci;
    	int cishu;
    	public entity11(String zimu,int cishu) {
    		this.danci = zimu;
    		this.cishu = cishu;
    	}
    	public String getDanci() {
    		return danci;
    	}
    	
    	public int getCishu() {
    		return cishu;
    	}
    	
    }
    public class DanciCollect {
    	public static boolean judgeNouse(String str) throws IOException {//去掉常用单词的方法
    		boolean flag = true;
    		FileReader fr = new FileReader("judge.txt");
    		BufferedReader bf = new BufferedReader(fr);
    		String str1;			// 按行读取字符串
    		while ((str1 = bf.readLine()) != null) {
    			if(str.equals(str1)) {
    				flag = false;
    			}
    		}
    		bf.close();
    		fr.close();
    		
    		
    		return flag;	
    			
    	
        }  
    	public static String toLowerCase(String str) {//将字符串转化成小写
    		char []StringArr = str.toCharArray();
    		for (int i = 0; i < StringArr.length; i++) {
    			StringArr[i] = Character.toLowerCase(StringArr[i]);
    		}
        	StringBuffer sb = new StringBuffer();
    		for(int i = 0;i < StringArr.length;i++) {
    			sb.append(StringArr[i]);
    		}
    		String str1 = sb.toString();
    		return str1; 
    		
    	}
    	public static void collect1() throws IOException {
    		try {
    			File file1 = new File("piao.txt");//定义一个file对象,用来初始化FileReader
    			FileReader reader1 = new FileReader(file1);//定义一个fileReader对象,用来初始化BufferedReader
    			BufferedReader bReader1 = new BufferedReader(reader1);//new一个BufferedReader对象,将文件内容读取到缓存
    			StringBuilder sb1 = new StringBuilder();//定义一个字符串缓存,将字符串存放缓存中
    			String s1 = "";
    			while ((s1 =bReader1.readLine()) != null) {//逐行读取文件内容,不读取换行符和末尾的空格
    			sb1.append(s1);//将读取的字符串添加换行符后累加p存放在缓存中
    			}
    			bReader1.close();
    			String text = sb1.toString();
    			int i=0;
    			String[] array = {".",",","?","!",":","‘","’","“","”","—",";","-"};
    			for (int j = 0; j < array.length; j++) {
    				text = text.replace(array[j]," ");                      //将text中的array数组中包含的特殊字符用空格代替
    			}
    			String[] textArray = text.split(" ");                       //根据空格将text分割并存放在textArray中
    			Map<String, Integer> map = new TreeMap<String, Integer>();
    			Integer count = 0;//每个字母的次数
    			Integer total = 0;//总共多少个字母
    			while(i < textArray.length) {
    				
    					String str = toLowerCase(textArray[i]);
    					if(!judgeNouse(str)) {
    						total = total + 1;
    						count = map.get(str+"");
    						if (count == null) {//单词没有出现重复;
    							count = 1;
    						}else {//单词出现重复,count+1;
    							count++;
    						}
    						map.put(str+"", count);
    						i++;
    					
    					}
    					else {
    						i++;
    					}
    					
    				
    			  }
    		
    			List<entity11> result = new ArrayList<>();
    			
    			entity11 e = null;
    			for (String str : map.keySet()) {
    				e = new entity11(str,map.get(str));
    				result.add(e);
    			}
    			result.sort((entity11 e1,entity11 e2)->{
    				return e2.getCishu()-e1.getCishu();});
    			System.out.println("文章共计"+total+"个单词");
    			
    	        
    			for(int ii = 0 ; ii < result.size();ii++) {
    				System.out.println(result.get(ii).getDanci()+"在文章中出现"+result.get(ii).getCishu()+"次,其频率为"+String.format("%.2f",result.get(ii).getCishu()*1.0/total));
    			}
    //			for(entity11 ee : result) {
    //				System.out.println("单词"+ee.getDanci()+"在文章中出现"+ee.getCishu()+"次,其频率为"+String.format("%.2f",ee.getCishu()*1.0/total));
    //			}
    			
    	
    	
    			
    	//		for (String str : map.keySet()) {
    	//			System.out.println("字母"+str+"出现"+map.get(str)+"次,其频率为:"+String.format("%.2f",map.get(str)*1.0/total));
    	//		}
    		} catch (FileNotFoundException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    	}
    	public static void main(String args[]) throws IOException {
    		try {
    			DanciCollect dancicollect = new DanciCollect();
    			DanciCollect.collect1();
    		}catch (FileNotFoundException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    	}
    }
    

      

    且将其单词本身字符串和出现次数count赋值给map对象map.put();最后将其转化成List集合,根据出现频率排序输出。源代码如下:

  • 相关阅读:
    以此来励志吧!!!(选自:知乎)
    【P1303】苹果二叉树
    【P1813】8的倍数
    2016.9.4 の 測試
    后缀数组
    个中模板
    基数排序
    【NOIP2014D2T3】解方程
    【HAOI2006】【BZOJ1051】【p1233】最受欢迎的牛
    java安全性-引用-分层-解耦
  • 原文地址:https://www.cnblogs.com/zjl-0217/p/10969760.html
Copyright © 2011-2022 走看看