从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序
1,张三,28
2,李四,35
3,张三,28
4,王五,35
5,张三,28
6,李四,35
7,赵六,28
8,田七,35
package com.swift; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class IO_Sort_Select { public static void main(String[] args) throws IOException { /* * 从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序 */ File file_s = new File("e:\neck\data.docx"); File dir = new File("e:\neck"); if (!dir.exists() && dir.isDirectory()) { System.out.println("目录不存在,即将创建..."); dir.mkdirs(); } File file_t = new File(dir, "data.txt"); List<String> list = new ArrayList<String>(); String text = readFromDocx(file_s); String[] hang = text.split("\n"); for (int i = 0; i < hang.length; i++) { String[] lie = hang[i].split("\,"); for (int j = 0; j < lie.length; j++) { if (j == 1) { list.add(lie[j]); } } } printList(list); Map<String, Integer> map = new TreeMap<String, Integer>(); for (String str : list) { map.put(str, 0); } for(String str:list) { if(map.containsKey(str)) { int num=map.get(str); num++; map.remove(str); map.put(str, num); } } List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet()); Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){ @Override public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) { int num=arg1.getValue()-arg0.getValue(); return num==0?arg0.getKey().compareTo(arg1.getKey()):num; } }); for(Entry<String, Integer> entry:listMap) { System.out.println("重复的姓名是 :"+entry.getKey()+" 重复的次数是:"+entry.getValue()); } } private static void printList(List<String> list) { for (String str : list) { System.out.println(str); } } public static String readFromDocx(File file) throws IOException { FileInputStream fis = new FileInputStream(file); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); String text = extractor.getText(); return text; } }
上面代码读取的是docx的word文件,直接读取会出现乱码,因word中不仅有文本还有压缩的其他属性等内容,所以要使用poi的jar包进行解析
解析的jar包如下图
这个应该比较全面了,也可以解析excel操作或写入表格等
下载地址:
链接: https://pan.baidu.com/s/1htYPKLA 密码: e36e
下面是读取doc的方法
package com.swift; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class IO_Sort_Select { public static void main(String[] args) throws IOException { /* * 从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序 */ File file_s = new File("e:\neck\data.doc"); File dir = new File("e:\neck"); if (!dir.exists() && dir.isDirectory()) { System.out.println("目录不存在,即将创建..."); dir.mkdirs(); } List<String> list = new ArrayList<String>(); String text = readFromDoc(file_s); String[] hang = text.split("\r"); for (int i = 0; i < hang.length; i++) { System.out.println(hang.length); System.out.println(hang[i]); String[] lie = hang[i].split("\,"); for (int j = 0; j < lie.length; j++) { if (j == 1) { list.add(lie[j]); } } } printList(list); Map<String, Integer> map = new TreeMap<String, Integer>(); for (String str : list) { map.put(str, 0); } for(String str:list) { if(map.containsKey(str)) { int num=map.get(str); num++; map.remove(str); map.put(str, num); } } List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet()); Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){ @Override public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) { int num=arg1.getValue()-arg0.getValue(); return num==0?arg0.getKey().compareTo(arg1.getKey()):num; } }); for(Entry<String, Integer> entry:listMap) { System.out.println("重复的姓名是 :"+entry.getKey()+" 重复的次数是:"+entry.getValue()); } } private static void printList(List<String> list) { for (String str : list) { System.out.println(str); } } public static String readFromDocx(File file) throws IOException { FileInputStream fis = new FileInputStream(file); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); String text = extractor.getText(); return text; } public static String readFromDoc(File file) throws IOException { FileInputStream fis = new FileInputStream(file); HWPFDocument doc = new HWPFDocument(fis); String text = doc.getDocumentText(); return text; } }
doc读取出来的文本还不能用"\n"进行行分割,用的是"\r"才行,要不然只能分割出1行,这点注意,要不会觉得程序莫名其妙不安自己的思路走