5-46 新浪微博热门话题 (30分)——unfinished HASH

新浪微博可以在发言中嵌入“话题”，即将发言中的话题文字写在一对“#”之间，就可以生成话题链接，点击链接可以看到有多少人在跟自己讨论相同或者相似的话题。新浪微博还会随时更新热门话题列表，并将最热门的话题放在醒目的位置推荐大家关注。

本题目要求实现一个简化的热门话题推荐功能，从大量英文（因为中文分词处理比较麻烦）微博中解析出话题，找出被最多条微博提到的话题。

输入格式:

输入说明：输入首先给出一个正整数 $10^5≤105），随后NN行，每行给出一条英文微博，其长度不超过140个字符。任何包含在一对最近的#中的内容均被认为是一个话题，如果长度超过40个字符，则只保留前40个字符。输入保证#成对出现。$

输出格式:

第一行输出被最多条微博提到的话题，第二行输出其被提到的微博条数。如果这样的话题不唯一，则输出按字母序最小的话题，并在第三行输出And k more ...，其中k是另外几条热门话题的条数。输入保证至少存在一条话题。

注意：两条话题被认为是相同的，如果在去掉所有非英文字母和数字的符号、并忽略大小写区别后，它们是相同的字符串；同时它们有完全相同的分词。输出时除首字母大写外，只保留小写英文字母和数字，并用一个空格分隔原文中的单词。

输入样例:

4
This is a #test of topic#.
Another #Test of topic.#
This is a #Hot# #Hot# topic
Another #hot!# #Hot# topic

输出样例:

Hot
2
And 1 more ...


这题对字符串处理要求比较多，在字符串比较的时候要遵守一定规则（字母和数字相同即相同），但是在输出时却要原样输出，而且同一行中一个话题不可以加入两次，这样没办法使用cstring里面的函数
比较尴尬，通过这题学了一个分离字符串的函数strtok，这个函数和python里面得split函数差不多，都是把一个字符串分隔成以规定字符间隔得多个字符串，下面附上第一次做的源码：这次没有考虑
到比较规则。。这题我搜了一下网上也没有答案，对于题意我还有一点疑问，就是当两个按规则比较相等的字符串为出现次数最多的热门话题时，也输出字典序小的那个吗？？

#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<iostream>
#include<string>
using namespace std;
#define MAXN 10005
typedef long long LL;
/*

*/

typedef struct node
{
    char id[41];
    int cnt;
    int line;
    struct node* next;
}*List;
typedef struct tb
{
    int Tablesize;
    List *list;
}*Hashlist;
LL Hash(char key[],LL size)
{
    LL tmp = 0;
    for(LL i=13;i<18;i++)
    {
        if( !( (key[i]<='z'&&key[i]>='a')||(key[i]<='9'&&key[i]>='0') ))
            continue;
        if(key[i]=='x')
            tmp = (tmp*10+10)%size;
        else
            tmp = (tmp*10 + key[i]-'0')%size;
    }
    if(tmp>=0)
        return tmp;
    else
        return (tmp+size)%size;
}
int NextPrime(int x)
{
    int i; 
    for (int Next = x; ; Next++)
    {
        for (i = 2; i * i <= Next; i++)
            if (Next % i == 0)
                break;
        if (i * i > Next)
            return Next;
    }
}
Hashlist Init(int size)
{
    Hashlist H = (Hashlist)malloc(sizeof(tb));
    H->Tablesize = NextPrime(size);
    H->list = (List*)malloc(sizeof(List)*H->Tablesize);
    for(int i=0;i< H->Tablesize;i++)
    {
        H->list[i] =(List)malloc(sizeof(node));
        H->list[i]->next = NULL;
        H->list[i]->cnt = 0;
        H->list[i]->line = -1;
    }
    return H;
}
List Find(char key[],Hashlist H)
{
    List t = H->list[Hash(key,H->Tablesize)];
    List p = t->next;
    while(p!=NULL && strcmp(key,p->id))
        p = p->next;
    return p;
}
void Insert(char key[],Hashlist H,int line)
{
    int len = strlen(key);
    for(int i=0;i<len;i++)
        key[i] = tolower(key[i]);
    //cout<<key<<endl;
    List t = H->list[Hash(key,H->Tablesize)];
    List f = Find(key,H);
    if(f==NULL)
    {
        List tmp = (List)malloc(sizeof(node));
        tmp->cnt = 1;
        tmp->line = line;
        strcpy(tmp->id,key);
        tmp->next = t->next;
        t->next = tmp;
    }
    else
    {
        if((f->line)!=line)
            (f->cnt)++;
    }
}
void Findmax(Hashlist H)
{
    int max = -1,same = 1;
    char ans[41];
    for(int i=0;i< H->Tablesize;i++)
    {
        List t = H->list[i];
        List p = t->next;
        while(p!=NULL)
        {
            if(p->cnt>max)
            {
                max = p->cnt;
                same = 1;
                strcpy(ans,p->id);
            }
            else if(p->cnt==max)
            {
                if(strcmp(ans,p->id)>0)
                    strcpy(ans,p->id);
                same++;
            }
            p = p->next;
        }
    }
    if(ans[0]<='z'&&ans[0]>='a')
        ans[0] = toupper(ans[0]);
    printf("%s
%d
",ans,max);
    if(same>1)
        printf("And %d more ...
",same-1);
}
int main()
{
    int n;
    char str[141];
    scanf("%d",&n);
    Hashlist H = Init(n);
    getchar();
    for(int l=1;l<=n;l++)
    {
        gets(str);
        char * p;
        p = strtok(str,"#");
        int cnt = 1;
        while(p!=NULL)
        {
            if(cnt%2==0)
                Insert(p,H,l);
            cnt++;
            p = strtok(NULL,"#");
        }
    }
    Findmax(H);
    return 0;
}