zoukankan      html  css  js  c++  java
  • 将一个无法一次读入内存的大文件排序

    弄了一整天才弄出来 直接上代码

      1 package com.test;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.BufferedWriter;
      5 import java.io.File;
      6 import java.io.FileReader;
      7 import java.io.FileWriter;
      8 import java.io.IOException;
      9 import java.util.Collections;
     10 import java.util.Comparator;
     11 import java.util.Iterator;
     12 import java.util.LinkedList;
     13 import java.util.List;
     14 import java.util.PriorityQueue;
     15 import java.util.Random;
     16 
     17 /**
     18  * 
     19  * @author wangyuyuan
     20  * 将一个大文件中的数据排序 无法一次读入内存情况的处理方法
     21  *
     22  */
     23 public class LargeDataSortTest {
     24     static File file = new File("E:"+File.separator+"dataTest"+File.separator+"data.txt");
     25     static File file1 = new File("E:"+File.separator+"dataTest"+File.separator+"dataSorted.txt");
     26     public static void main(String[] args) throws Exception{
     27         createData();
     28         System.out.println("大文件写入成功");
     29         separateFile();
     30         System.out.println("文件拆分成功");
     31     
     32         everySingleFileSort();
     33         System.out.println("小文件排序完成");
     34         mergeFile();
     35         System.out.println("所有排序都已完成");
     36         
     37     }
     38     public static void createData() throws IOException{
     39         FileWriter fw = new FileWriter(file);
     40         BufferedWriter bw = new BufferedWriter(fw);
     41         Random random = new Random();
     42         for(int i=0;i<1000000;i++){
     43             bw.write(random.nextInt(Integer.MAX_VALUE)+"
    ");
     44         }
     45         bw.close();
     46         fw.close();
     47     }
     48     public static void separateFile() throws IOException{
     49         FileReader fr = new FileReader(file);
     50         BufferedReader br = new BufferedReader(fr);
     51         FileWriter fw = null;
     52         BufferedWriter bw = null;
     53         List<FileWriter> fwList = new LinkedList<FileWriter>();
     54         List<BufferedWriter> bwList = new LinkedList<BufferedWriter>();
     55         for(int i=0;i<20;i++){
     56             fw = new FileWriter("E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt");
     57             bw = new BufferedWriter(fw);
     58             //把对象放入集合
     59             fwList.add(fw);
     60             bwList.add(bw);
     61         }
     62         
     63         while(br.ready()){
     64             for(Iterator<BufferedWriter> iterator=bwList.iterator();iterator.hasNext();){
     65                 BufferedWriter it = iterator.next();
     66                 it.write(br.readLine()+"
    ");
     67                 continue;//第一个bw读完后让下一个读 然后写入小文件
     68             }
     69         }
     70         br.close();
     71         fr.close();
     72         //遍历关闭所有子文件流   
     73         for (Iterator iterator = bwList.iterator(); iterator.hasNext();) {
     74             BufferedWriter it = (BufferedWriter) iterator.next();
     75             it.close();
     76         }
     77         
     78         for (Iterator iterator = fwList.iterator(); iterator.hasNext();) {
     79             FileWriter it = (FileWriter) iterator.next();
     80             it.close();
     81         }
     82     }
     83     //对每个小文件进行排序
     84     public static void everySingleFileSort() throws Exception{
     85         LinkedList<Integer> numbers ;
     86         for(int i=0;i<20;i++){
     87             numbers = new LinkedList<Integer>();
     88             String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt";
     89             FileReader fr = new FileReader(path);
     90             BufferedReader br = new BufferedReader(fr);
     91             while(br.ready()){
     92                 numbers.add(Integer.parseInt(br.readLine()));
     93             }
     94             Collections.sort(numbers);
     95             numbersWrite(numbers,path);
     96             br.close();
     97             fr.close();
     98         }
     99     }
    100     //将排好序的没个文件写回到小文件中
    101     public static void numbersWrite(LinkedList<Integer> numbers,String path) throws IOException{
    102         FileWriter fw  = new FileWriter(path);
    103         BufferedWriter bw = new BufferedWriter(fw);
    104         for(Iterator<Integer> iterator=numbers.iterator();iterator.hasNext();){
    105             Integer num = (Integer)iterator.next();
    106             bw.write(num+"
    ");
    107         }
    108         bw.close();
    109         fw.close();
    110     }
    111     //再将所有小文件整合到一个大文件中
    112     public static void mergeFile() throws Exception{
    113         PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20,new Obj());
    114         FileReader fr = null;
    115         BufferedReader br = null;
    116         FileWriter fw = new FileWriter(file1);
    117         BufferedWriter bw = new BufferedWriter(fw);
    118         List<FileReader> frList = new LinkedList<FileReader>();
    119         List<BufferedReader> brList = new LinkedList<BufferedReader>();
    120         int n;
    121         for(int i=0;i<20;i++){
    122             String path = "E:"+File.separator+"dataTest"+File.separator+"data"+i+".txt";
    123             fr = new FileReader(path);
    124             br = new BufferedReader(fr);
    125             frList.add(fr);
    126             brList.add(br);
    127         }
    128         //把每个小文件的第一个数读入队列中
    129         for(int i=0;i<=20;i++){
    130             BufferedReader buffR;
    131             if(i==20){
    132                 while(queue.size()!=0){
    133                     Obj obj = queue.poll();
    134                     bw.write(obj.a+"
    ");
    135                     buffR = brList.get(obj.b);
    136                     while(buffR.ready()&&queue.size()<20){
    137                         n = Integer.parseInt(buffR.readLine());
    138                         queue.add(new Obj(n,obj.b));
    139                     }
    140                 }
    141                 break;
    142             }
    143             buffR = brList.get(i);
    144             while(buffR.ready()&&queue.size()<20){
    145                 n = Integer.parseInt(buffR.readLine());
    146                 Obj obj = new Obj(n,i);
    147                 queue.add(obj);
    148                 break;
    149             }
    150         }
    151         bw.close();
    152         fw.close();
    153         //遍历关闭所有子文件流   
    154         for (Iterator iterator = brList.iterator(); iterator.hasNext();) {
    155             BufferedReader it = (BufferedReader) iterator.next();
    156             it.close();
    157         }
    158         
    159         for (Iterator iterator = frList.iterator(); iterator.hasNext();) {
    160             FileReader it = (FileReader) iterator.next();
    161             it.close();
    162         }
    163     }
    164 }
    165 
    166 class Obj implements Comparator<Obj>{
    167     int a,b;
    168     Obj(){}
    169     Obj(int a,int b){
    170         this.a =a;
    171         this.b=b;
    172     }
    173     public int compare(Obj o1, Obj o2) {
    174         return o1.a-o2.a;
    175     }
    176 }
  • 相关阅读:
    HDU 4665 Unshuffle DFS找一个可行解
    Servlet生命周期引起的问题
    获取真实Ip地址
    java中return与finally的执行顺序
    理解正则表达式
    抽象类与接口
    java 4种方式读取配置文件 + 修改配置文件
    Java基础语法
    接口多继承
    Java类成员(成员变量和方法)的覆盖与隐藏归纳
  • 原文地址:https://www.cnblogs.com/yaoboyyao/p/3663121.html
Copyright © 2011-2022 走看看