zoukankan      html  css  js  c++  java
  • 59.排序好的大数据创建索引文件,并实现大文件的二分查找,根据索引百万数据秒读数据

    • 创建索引
      1 //创建索引
      2 struct index
      3 {
      4     //保存每行偏移的位置
      5     int *pindex;
      6     //文件的总长度
      7     int length;
      8 }allindex;//索引
    • 初始化索引数组并把索引写入到文件
       1 //初始化索引数组,并把索引写入到文件
       2 void init(char *path)
       3 {
       4     printf("
      索引数组开始分配");
       5     allindex.length = N;
       6     //分配内存
       7     allindex.pindex = calloc(N, sizeof(int));
       8     printf("
      索引数组完成分配");
       9 
      10     printf("
      开始读取");
      11     //二进制读取文件 避免/r/n读取成/n
      12     FILE *pf = fopen("filesort.txt", "rb");
      13     if (pf == NULL)
      14     {
      15         return -1;
      16     }
      17     else
      18     {
      19         int alllength = 0;
      20         for (int i = 0; i < N; i++)
      21         {
      22             char str[50] = { 0 };
      23             fgets(str, 50, pf);
      24             //每一个首地址的偏移
      25             allindex.pindex[i] = alllength;
      26 
      27             int length = strlen(str);
      28             alllength += length;
      29         }
      30         fclose(pf);
      31     }
      32     printf("
      结束读取");
      33 
      34     printf("
      开始写入");
      35     //二进制方式打开文件,并写入索引
      36     FILE *pfw = fopen("index.txt", "wb");
      37     //写入
      38     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
      39     //关闭文件
      40     fclose(pfw);
      41     printf("
      结束写入");
      42 
      43     //释放内存
      44     free(allindex.pindex);
      45 }
    • 从文件中读取索引到索引数组中
       1 //从文件中读取索引
       2 void qucik()
       3 {
       4     printf("
      索引数组开始分配");
       5     allindex.length = N;
       6     allindex.pindex = calloc(N, sizeof(int));//分配内存
       7     printf("
      索引数组完成分配");
       8 
       9 
      10     printf("
      开始读取");
      11     //以二进制读的方式读取索引
      12     FILE *pfw = fopen("index.txt", "rb");
      13     //读取
      14     fread(allindex.pindex, sizeof(int), allindex.length, pfw);
      15     //关闭文件
      16     fclose(pfw);
      17     printf("
      结束读取");
      18 }
    • 测试函数
       1   FILE *pf1 = fopen("index.txt", "rb");
       2     FILE *pf2 = fopen("filesort.txt", "rb");
       3     while (1)
       4     {
       5         printf("
      请输入要读取的行数");
       6         int num = 0;
       7         scanf("%d", &num);
       8 
       9         int indexnum = 0;
      10         fseek(pf1, num*sizeof(int), SEEK_SET);
      11         fread(&indexnum, sizeof(int), 1, pf1);//读索引到indexnum
      12 
      13         fseek(pf2, indexnum, SEEK_SET);
      14         char str[128] = { 0 };
      15         fgets(str, 128, pf2);//读取
      16         printf("
      %s", str);
      17 
      18     }
      19     fclose(pf1);
      20     fclose(pf2);
    • 根据索引文件对已经排序好的文件进行二分查找
       1 void binsearch(char *searchstr)
       2 {
       3     //头部
       4     int tou = 0;
       5     //尾部
       6     int wei = N - 1;
       7     //是否找到的标识
       8     int flag = 0;
       9     //如果头小于尾
      10     while (tou <= wei)
      11     {
      12         //获取中部
      13         int zhong = (tou + wei) / 2;
      14         //读取中部索引的内容
      15         char zhongstr[256] = { 0 };
      16         {
      17             //打开索引文件
      18             FILE *pf1 = fopen("index.txt", "rb");
      19             //打开排序好的文件
      20             FILE *pf2 = fopen("filesort.txt", "rb");
      21 
      22             //读zhong对应的地址存到indexnum中
      23             int indexnum = 0;
      24             fseek(pf1, zhong * sizeof(int), SEEK_SET);
      25             fread(&indexnum, sizeof(int), 1, pf1);
      26 
      27             //根据读取的位置读取文件到zhongstr中
      28             fseek(pf2, indexnum, SEEK_SET);
      29             fgets(zhongstr, 128, pf2);
      30 
      31             fclose(pf1);
      32             fclose(pf2);
      33         }
      34         //消除'
      或者
      '
      35         eatN(zhongstr);
      36         char pnewzhongstr[256] = { 0 };
      37         sprintf(pnewzhongstr, zhongstr);
      38         //进行处理,遇到-终止
      39         eatg(pnewzhongstr);
      40         //比较是否找到
      41         int res = strcmp(pnewzhongstr, searchstr);//1 0  -1
      42         if (res == 0)
      43         {
      44             flag = 1;
      45             printf("%s", zhongstr);
      46             break;
      47         }
      48         //如果中比searchstr要大
      49         else if (res == 1)
      50         {
      51             wei = zhong - 1;
      52         }
      53         //如果中比searchstr小
      54         else
      55         {
      56             tou = zhong + 1;
      57         }
      58 
      59 
      60     }
      61     //判断是否找到
      62     if (flag)
      63     {
      64         printf("
      find");
      65     } 
      66     else
      67     {
      68         printf("
       not find");
      69     }
      70 }
    • 遇到'-'结束
       1 //遇到'-'结束
       2 void eatg(char *str)
       3 {
       4     while (*str!='')
       5     {
       6 
       7         if (*str=='-')
       8         {
       9             *str = '';
      10         }
      11         str++;
      12     }
      13 
      14 }
    • 测试函数
       1 void main()
       2 {
       3 
       4 
       5     char str[256] = { 0 };
       6     scanf("%s", str);
       7     binsearch(str);
       8 
       9 
      10 
      11     system("pause");
      12 }
  • 相关阅读:
    洛谷
    洛谷
    洛谷
    模板
    洛谷
    洛谷
    Codeforces Round #561 (Div. 2) E. The LCMs Must be Large(数学)
    Codeforces Round #561 (Div. 2)
    Mail.Ru Cup 2018 Round 2 C. Lucky Days(拓展欧几里得)
    The 10th Shandong Provincial Collegiate Programming Contest H.Tokens on the Segments(贪心+优先级队列 or 贪心+暴力)
  • 原文地址:https://www.cnblogs.com/xiaochi/p/8437140.html
Copyright © 2011-2022 走看看