zoukankan      html  css  js  c++  java
  • 磁盘文件排序 编程珠玑

      开始看编程珠玑了,第一个就是进行磁盘排序的问题,想到了也只是归并排序,但题目要求1M内存,这个算法不可行。编程珠玑写到使用位图(分两次操作读写可以成功实现,小于内存1M),详情看编程珠玑第一章。

    题目:给定10^7数据,对大数据进行排序。要求内存只有1M,时间可以接受,较短。

    解决方法:1.多路归并。2.位图操作。

    在这里看了july的文章,写的真心不错。推荐:http://blog.csdn.net/v_JULY_v/article/details/6451990

    其中有个生成不同随机数的程序附下:

     1 #include <iostream>  
     2 #include <time.h>  
     3 #include <assert.h>  
     4 using namespace std;  
     5   
     6 const int size = 10000000;  
     7 int num[size];  
     8   
     9 int main()  
    10 {  
    11     int n;  
    12     FILE *fp = fopen("data.txt", "w");  
    13     assert(fp);  
    14   
    15     for (n = 1; n <= size; n++)    
    16         //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。  
    17         num[n] = n;  
    18     srand((unsigned)time(NULL));  
    19     int i, j;  
    20   
    21     for (n = 0; n < size; n++)  
    22     {  
    23         i = (rand() * RAND_MAX + rand()) % 10000000;  
    24         j = (rand() * RAND_MAX + rand()) % 10000000;  
    25         swap(num[i], num[j]);  
    26     }  
    27   
    28     for (n = 0; n < size; n++)  
    29         fprintf(fp, "%d ", num[n]);  
    30     fclose(fp);  
    31     return 0;  
    32 }  

    位图程序实现排序,首先排前5000000(每个数字对应一个bit)个后排剩下的5000000数据(每次都小于1M),代码如下(模拟两次):

     1 #include <iostream>  
     2 #include <bitset>  
     3 #include <assert.h>  
     4 #include <time.h>  
     5 using namespace std;  
     6   
     7 const int max_each_scan = 5000000;  
     8   
     9 int main()  
    10 {  
    11     clock_t begin = clock();  
    12     bitset<max_each_scan> bit_map;  
    13     bit_map.reset();  
    14       
    15     // open the file with the unsorted data  
    16     FILE *fp_unsort_file = fopen("data.txt", "r");  
    17     assert(fp_unsort_file);  
    18     int num;  
    19   
    20     // the first time scan to sort the data between 0 - 4999999  
    21     while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  
    22     {  
    23         if (num < max_each_scan)  
    24             bit_map.set(num, 1);  
    25     }  
    26       
    27     FILE *fp_sort_file = fopen("sort.txt", "w");  
    28     assert(fp_sort_file);  
    29     int i;  
    30       
    31     // write the sorted data into file  
    32     for (i = 0; i < max_each_scan; i++)  
    33     {  
    34         if (bit_map[i] == 1)  
    35             fprintf(fp_sort_file, "%d ", i);  
    36     }  
    37       
    38     // the second time scan to sort the data between 5000000 - 9999999  
    39     int result = fseek(fp_unsort_file, 0, SEEK_SET);  
    40     if (result)  
    41         cout << "fseek failed!" << endl;  
    42     else  
    43     {  
    44         bit_map.reset();  
    45         while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  
    46         {  
    47             if (num >= max_each_scan && num < 10000000)  
    48             {  
    49                 num -= max_each_scan;  
    50                 bit_map.set(num, 1);  
    51             }  
    52         }  
    53         for (i = 0; i < max_each_scan; i++)  
    54         {  
    55             if (bit_map[i] == 1)  
    56                 fprintf(fp_sort_file, "%d ", i + max_each_scan);  
    57         }  
    58     }  
    59       
    60     clock_t end = clock();  
    61     cout<<"用位图的方法,耗时:"<<endl;  
    62     cout << (end - begin) / CLK_TCK << "s" << endl;  
    63     fclose(fp_sort_file);  
    64     fclose(fp_unsort_file);  
    65     return 0;  
    66 }  
  • 相关阅读:
    堆优化Dijkstra模版
    poj_1364King
    快速排序库函数qsort的使用
    CMD type命令
    开放地址法
    poj_3159Candies
    poj_1511Invitation Cards
    何谓数据结构
    div ul li添加文本自动自动
    java虚拟机使用内存
  • 原文地址:https://www.cnblogs.com/zCoderJoy/p/3863916.html
Copyright © 2011-2022 走看看