开始看编程珠玑了,第一个就是进行磁盘排序的问题,想到了也只是归并排序,但题目要求1M内存,这个算法不可行。编程珠玑写到使用位图(分两次操作读写可以成功实现,小于内存1M),详情看编程珠玑第一章。
题目:给定10^7数据,对大数据进行排序。要求内存只有1M,时间可以接受,较短。
解决方法:1.多路归并。2.位图操作。
在这里看了july的文章,写的真心不错。推荐:http://blog.csdn.net/v_JULY_v/article/details/6451990
其中有个生成不同随机数的程序附下:
1 #include <iostream> 2 #include <time.h> 3 #include <assert.h> 4 using namespace std; 5 6 const int size = 10000000; 7 int num[size]; 8 9 int main() 10 { 11 int n; 12 FILE *fp = fopen("data.txt", "w"); 13 assert(fp); 14 15 for (n = 1; n <= size; n++) 16 //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。 17 num[n] = n; 18 srand((unsigned)time(NULL)); 19 int i, j; 20 21 for (n = 0; n < size; n++) 22 { 23 i = (rand() * RAND_MAX + rand()) % 10000000; 24 j = (rand() * RAND_MAX + rand()) % 10000000; 25 swap(num[i], num[j]); 26 } 27 28 for (n = 0; n < size; n++) 29 fprintf(fp, "%d ", num[n]); 30 fclose(fp); 31 return 0; 32 }
位图程序实现排序,首先排前5000000(每个数字对应一个bit)个后排剩下的5000000数据(每次都小于1M),代码如下(模拟两次):
1 #include <iostream> 2 #include <bitset> 3 #include <assert.h> 4 #include <time.h> 5 using namespace std; 6 7 const int max_each_scan = 5000000; 8 9 int main() 10 { 11 clock_t begin = clock(); 12 bitset<max_each_scan> bit_map; 13 bit_map.reset(); 14 15 // open the file with the unsorted data 16 FILE *fp_unsort_file = fopen("data.txt", "r"); 17 assert(fp_unsort_file); 18 int num; 19 20 // the first time scan to sort the data between 0 - 4999999 21 while (fscanf(fp_unsort_file, "%d ", &num) != EOF) 22 { 23 if (num < max_each_scan) 24 bit_map.set(num, 1); 25 } 26 27 FILE *fp_sort_file = fopen("sort.txt", "w"); 28 assert(fp_sort_file); 29 int i; 30 31 // write the sorted data into file 32 for (i = 0; i < max_each_scan; i++) 33 { 34 if (bit_map[i] == 1) 35 fprintf(fp_sort_file, "%d ", i); 36 } 37 38 // the second time scan to sort the data between 5000000 - 9999999 39 int result = fseek(fp_unsort_file, 0, SEEK_SET); 40 if (result) 41 cout << "fseek failed!" << endl; 42 else 43 { 44 bit_map.reset(); 45 while (fscanf(fp_unsort_file, "%d ", &num) != EOF) 46 { 47 if (num >= max_each_scan && num < 10000000) 48 { 49 num -= max_each_scan; 50 bit_map.set(num, 1); 51 } 52 } 53 for (i = 0; i < max_each_scan; i++) 54 { 55 if (bit_map[i] == 1) 56 fprintf(fp_sort_file, "%d ", i + max_each_scan); 57 } 58 } 59 60 clock_t end = clock(); 61 cout<<"用位图的方法,耗时:"<<endl; 62 cout << (end - begin) / CLK_TCK << "s" << endl; 63 fclose(fp_sort_file); 64 fclose(fp_unsort_file); 65 return 0; 66 }