zoukankan      html  css  js  c++  java
  • 记一个男默女泪的 BUG

    姗姗来迟的词频统计代码 BUG 的发现

    1. 此前提交的第一次代码作业总结博客

    http://www.cnblogs.com/ustczwq/p/8680704.html

    2. BUG 本天成,妙手偶得之

    虽然代码已经提交,但总是感觉哪个地方不太对,bug 存在得过于莫名其妙。然后,随手打开代码,稍微调试了一下,当我发现 bug 的时候,不知道该说些什么好,只想讲脏话。

     

    出现 bug 的地方:

     

     

    改过之后:

     

     

     

    看出来了吧,妈卖批,三目运算符没赋值。改完之后,输出结果立马正确。怪不得用 unordered_map 的时候哈希表的查询出问题了,我 TM 定义的哈希函数有问题。虽然迟了,但那种优化是对的,简单补一篇,算是对原博客的完善。

     

    3. 加了几个等于号之后的源代码

      1 #include "io.h"
      2 #include "math.h"
      3 #include "stdio.h"
      4 #include "string.h"
      5 #include "stdlib.h"
      6 #include "unordered_map"
      7 
      8 using namespace std;
      9    
     10 #define small 2
     11 
     12 int wordnum = 0;
     13 int charnum = 0;
     14 int linenum = 0;
     15 
     16 struct wordsdata                //存放单词信息
     17 {
     18     char words[1024];           //单词字符串
     19     int number;                 //出现次数
     20     wordsdata *next;
     21 };
     22 struct phrases
     23 {
     24     char *one;
     25     char *two;
     26     int num;
     27 };
     28 
     29 int wordcmp(char *str1, char *str2);
     30 int gettop(struct wordsdata **word);
     31 int getwords(char *path, struct wordsdata **word);
     32 int getfiles(char *path, struct _finddata_t *fileinfo, long handle);
     33 
     34 struct phrase_cmp
     35 {
     36     bool operator()(const phrases &p1, const phrases &p2) const
     37     {
     38         return ((wordcmp(p1.one, p2.one) < 2) && (wordcmp(p1.two, p2.two) < 2));
     39     }
     40 
     41 };
     42 struct phrase_hash
     43 {
     44     size_t operator()(const phrases &ph) const
     45     {
     46         unsigned long __h = 0;
     47         int temp;
     48         size_t i;
     49         for (i = 0; ph.one[i]; i++)
     50         {
     51             temp = ph.one[i];
     52             if (temp > 64)
     53             {
     54                 (temp > 96) ? (temp -= 96) : (temp -= 64);
     55                 __h += (29 * __h + temp);
     56                 __h %= 2147483647;
     57             }
     58 
     59         }
     60         for (i = 0; ph.two[i]; i++)
     61         {
     62             temp = ph.two[i];
     63             if (temp > 64)
     64             {
     65                 (temp > 96) ? (temp -= 96) : (temp -= 64);
     66                 __h += (29 * __h + temp);
     67                 __h %= 2147483647;
     68             }
     69         }
     70 
     71         return size_t(__h);
     72     }
     73 
     74 };
     75 
     76 typedef unordered_map<phrases, int, phrase_hash, phrase_cmp> Char_Phrase;
     77 Char_Phrase phrasemap;
     78 struct wordsdata *fourletter[26 * 26 * 26 * 26] = {}; //按首四字母排序
     79 
     80 int main()
     81 {
     82     int j = 0;                            
     83     long handle = 0;                           // 用于查找的句柄 
     84     struct _finddata_t fileinfo;               // 文件信息的结构体 
     85     char *path = __argv[1];
     86     
     87     getfiles(path, &fileinfo, handle);
     88 
     89     gettop(fourletter);
     90 
     91     system("pause");
     92     return 1;
     93 }
     94 
     95 int getfiles(char *path, struct _finddata_t *fileinfo, long handle)
     96 {                                    
     97     handle = _findfirst(path, fileinfo);            //第一次打开父目录
     98     if (handle == -1)
     99         return -1;
    100 
    101 
    102     do
    103     {
    104         //printf("> %s
    ", path);           //显示目录名
    105 
    106         if (fileinfo->attrib & _A_SUBDIR)           //如果读取到子目录
    107         {
    108             if (strcmp(fileinfo->name, ".") != 0 && strcmp(fileinfo->name, "..") != 0)
    109             {
    110                 char temppath[1024] = "";              //记录子目录路径
    111                 long temphandle = 0;
    112                 struct _finddata_t tempfileinfo;
    113                 strcpy(temppath, path);
    114                 strcat(temppath, "/*");
    115 
    116                 temphandle = _findfirst(temppath, &tempfileinfo);  //第一次打开子目录
    117                 if (temphandle == -1)
    118                     return -1;
    119 
    120                 do                              //对子目录所有文件递归
    121                 {
    122                     if (strcmp(tempfileinfo.name, ".") != 0 && strcmp(tempfileinfo.name, "..") != 0)
    123                     {
    124                         strcpy(temppath, path);
    125                         strcat(temppath, "/");
    126                         strcat(temppath, tempfileinfo.name);
    127                         getfiles(temppath, &tempfileinfo, temphandle);
    128                     }
    129                 } while (_findnext(temphandle, &tempfileinfo) != -1);
    130 
    131                 _findclose(temphandle);
    132             }//递归完毕
    133 
    134         } //子目录读取完毕
    135         else
    136             getwords(path, fourletter);
    137 
    138 
    139     } while (_findnext(handle, fileinfo) != -1);
    140 
    141     _findclose(handle);       //关闭句柄
    142 
    143     return 1;
    144 
    145 }
    146 
    147 int getwords(char *path, struct wordsdata **word)
    148 {
    149     FILE *fp;
    150     int j = 0;
    151     int cmp = 0;
    152     int num = 0;               //计算首四位地址
    153     char temp = 0;             //读取一个字符 ACSII 码值
    154     int length = 0;
    155 
    156     char present[1024] = "";  //存储当前单词
    157 
    158     char address[4] = "";
    159     struct wordsdata *q = NULL;
    160     struct wordsdata *pre = NULL;
    161     struct wordsdata *neword = NULL;
    162     struct wordsdata *now = NULL;
    163     struct wordsdata *previous = NULL;
    164     struct phrases *newphrase = NULL;
    165 
    166     if ((fp = fopen(path, "r")) == NULL)
    167     {
    168         //printf("error!!! 
    ", path);
    169         return 0;
    170     }
    171     linenum++;
    172     while (temp != -1)
    173     {
    174         //读取字符串
    175         temp = fgetc(fp);
    176         if (temp > 31 && temp < 127)
    177             charnum++;
    178         if (temp == '
    ' || temp == '
    ')
    179             linenum++;
    180 
    181         while ((temp >= '0' && temp <= '9') || (temp >= 'a' && temp <= 'z') || (temp >= 'A' && temp <= 'Z'))
    182         {
    183             if (length != -1 && length < 4)
    184             {
    185                 if (temp >= 'A')  //是字母
    186                 {
    187                     present[length] = temp;
    188                     address[length] = (temp >= 'a' ? (temp - 'a') : (temp - 'A'));
    189                     length++;
    190                 }
    191                 else            //不是字母
    192                     length = -1;
    193             }
    194             else if (length >= 4)
    195             {
    196                 present[length] = temp;
    197                 length++;
    198             }
    199             temp = fgetc(fp);
    200             if (temp > 31 && temp < 127)
    201                 charnum++;
    202             if (temp == '
    ' || temp == '
    ')
    203                 linenum++;
    204         } // end while
    205 
    206           //判断是否为单词
    207         if (length >= 4)
    208         {
    209             wordnum++;
    210 
    211             //计算首四位代表地址
    212             num = address[0] * 17576 + address[1] * 676 + address[2] * 26 + address[3];
    213 
    214             //插入当前单词
    215             if (word[num] == NULL)
    216             {
    217                 word[num] = new wordsdata;
    218                 neword = new wordsdata;
    219                 neword->number = 1;
    220                 neword->next = NULL;
    221                 strcpy(neword->words, present);
    222                 word[num]->next = neword;
    223                 now = neword;
    224             }
    225             else
    226             {
    227                 pre = word[num];
    228                 q = pre->next;
    229                 cmp = wordcmp(q->words, present);
    230 
    231                 while (cmp == small)
    232                 {
    233                     pre = q;
    234                     q = q->next;
    235                     if (q != NULL)
    236                         cmp = wordcmp(q->words, present);
    237                     else
    238                         break;
    239                 }
    240                 if (q != NULL && cmp <= 1)
    241                 {
    242                     now = q;
    243                     q->number++;
    244                     if (cmp == 1)
    245                         strcpy(q->words, present);                
    246                 }
    247 
    248                 else
    249                 {
    250                     neword = new wordsdata;
    251                     neword->number = 1;
    252                     strcpy(neword->words, present);
    253                     pre->next = neword;
    254                     neword->next = q;
    255                     now = neword;
    256                 }
    257             }
    258 
    259             if (previous != NULL)
    260             {
    261                 newphrase = new phrases;
    262 
    263                 newphrase->one = previous->words;
    264                 newphrase->two = now->words;
    265                 
    266                 unordered_map<phrases, int>::const_iterator got = phrasemap.find( *newphrase);
    267                 if (got != phrasemap.end())
    268                 {
    269                     phrasemap[*newphrase]++;
    270                 }
    271                 else
    272                 {
    273                     phrasemap.insert(pair<phrases, int>(*newphrase, 1));
    274                 }
    275             }
    276             previous = now;
    277 
    278             //当前单词置空
    279             for (int j = 0; present[j] && j < 1024; j++)
    280                 present[j] = 0;
    281         }
    282         length = 0;
    283     }
    284 
    285     fclose(fp);
    286     return 1;
    287 }
    288 
    289 int wordcmp(char *str1, char *str2)
    290 {
    291     char *p1 = str1;
    292     char *p2 = str2;
    293     char q1 = *p1;
    294     char q2 = *p2;
    295 
    296     if (q1 >= 'a' && q1 <= 'z')
    297         q1 -= 32;
    298 
    299     if (q2 >= 'a' && q2 <= 'z')
    300         q2 -= 32;
    301 
    302     while (q1 && q2 && q1 == q2)
    303     {
    304         p1++;
    305         p2++;
    306 
    307         q1 = *p1;
    308         q2 = *p2;
    309 
    310         if (q1 >= 'a' && q1 <= 'z')
    311             q1 -= 32;
    312 
    313         if (q2 >= 'a' && q2 <= 'z')
    314             q2 -= 32;
    315     }
    316 
    317     while (*p1 >= '0' && *p1 <= '9')
    318         p1++;
    319     while (*p2 >= '0' && *p2 <= '9')
    320         p2++;
    321 
    322     if (*p1 == 0 && *p2 == 0)           //两单词等价    
    323         return strcmp(str1, str2);       //等价前者字典顺序小返回-1,大返回1,完全相等返回0
    324 
    325     if (q1 < q2)                   //前者小
    326         return 2;
    327 
    328     if (q1 > q2)                   //后者小
    329         return 3;
    330 
    331     return 4;
    332 }
    333 
    334 int gettop(struct wordsdata **word)
    335 {
    336     int i = 0, j = 0;
    337     struct wordsdata *topw[12] = {};
    338     struct phrases *toph[12] = {};
    339     struct wordsdata *w = NULL;
    340     FILE *fp;
    341     fp = fopen("result.txt", "w");
    342     fprintf(fp,"characters:%d 
    words:%d 
    lines:%d
    ",  charnum,wordnum, linenum);
    343 
    344     for (j = 0; j < 12; j++)
    345     {        
    346         toph[j] = new struct phrases;
    347         toph[j]->num = 0;
    348         topw[j] = new struct wordsdata;
    349         topw[j]->number = 0;
    350     }
    351     for (i = 0; i < 456976; i++)
    352     {
    353         if (word[i] != NULL)
    354         {
    355             w = word[i]->next;
    356             while (w != NULL)
    357             {
    358                 topw[11]->number = w->number;
    359                 topw[11]->next = w;
    360                 j = 11;
    361                 while (j > 1 && topw[j]->number > topw[j - 1]->number)
    362                 {
    363                     topw[0] = topw[j];
    364                     topw[j] = topw[j - 1];
    365                     topw[j - 1] = topw[0];
    366                     j--;
    367                 }
    368                 w = w->next;
    369             }
    370         }
    371     }
    372     for (j = 1; j < 11; j++)
    373     {
    374         if (topw[j]->number)
    375             fprintf(fp,"
    %s :%d", topw[j]->next->words, topw[j]->number);
    376     }
    377     for (Char_Phrase::iterator it = phrasemap.begin(); it != phrasemap.end(); it++)
    378     {
    379         toph[11]->one = it->first.one;
    380         toph[11]->two = it->first.two;
    381         toph[11]->num = it->second;
    382         j = 11;
    383         while (j > 1 && toph[j]->num > toph[j - 1]->num)
    384         {
    385             toph[0] = toph[j];
    386             toph[j] = toph[j - 1];
    387             toph[j - 1] = toph[0];
    388             j--;
    389         }
    390     }
    391     fprintf(fp, "
    ");
    392     for (j = 1; j < 11; j++)
    393     {
    394         if (toph[j]->num)
    395             fprintf(fp,"
    %s %s :%d", toph[j]->one, toph[j]->two, toph[j]->num);
    396     }
    397     fclose(fp);
    398     return 1;
    399 }
    View Code

     

  • 相关阅读:
    iOS exit(0); 直接退出程序
    友盟推送简单调用
    KxMenu下拉菜单
    打开相册另类写法
    简洁调用字号
    十六进制颜色宏
    Swift定义单例
    不要在初始化方法和dealloc方法中使用Accessor Methods
    copyin函数
    c语言中的赋值
  • 原文地址:https://www.cnblogs.com/ustczwq/p/8689040.html
Copyright © 2011-2022 走看看