package com.page.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
/**
* @description 通过HashCode()去除文本文件中的重复数据,使用
* @author BrinPage
* @Date 2012.07.14
* @time 18:35:00
*/
public class UDB {
private static BufferedReader reader = null;
private static FileWriter writer = null;
private static File file = null;
private static List<String> list = null;
/**
* @description 通过HashCode()的比较,去除重复数据方法
* @param filePath1
* @param filePath2
*/
public void retrievalRepeat(String filePath1, String filePath2){
file = new File(filePath1);
list = new ArrayList<String>();
try {
InputStream in = new FileInputStream(file);
reader = new BufferedReader(new InputStreamReader(in));
writer = new FileWriter(filePath2, true);
String s = null;
s = reader.readLine();
while(s != null){
list.add(s);
s = reader.readLine();
}
/*
* 统计原始数据量
*/
System.out.println(list.size());
for(int i = 0; i < list.size(); i ++){
for(int j = i + 1; j < list.size(); j ++){
if(list.get(i).hashCode() == list.get(j).hashCode()){
list.remove(j);
}
}
}
/*
* 统计去除重复后的数据量(及不重复的数据量)
*/
System.out.println(list.size());
for(int i = 0; i < list.size(); i ++){
writer.write(list.get(i));
writer.write("\r\n");
}
writer.flush();
writer.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String args[]){
CurrentTime time = new CurrentTime();
System.out.println("写入文件数据开始时间:" + time.getCurrentTime());
UDB udb = new UDB();
udb.retrievalRepeat("E:\\novel author list.txt", "E:\\novel author.txt");
System.out.println("写入文件数据结束时间:" + time.getCurrentTime());
}
}
注:如果变量的HashCode相等,再变量时行equals()方法的比较,从而去除重复的数据