zoukankan      html  css  js  c++  java
  • 词频统计(未完成,错误)

    #include<iostream>
    #include<cstdio>
    #include<cmath>
    #include<cstdlib>
    #include<cstring>
    #include<algorithm>
    #include<vector>
    using namespace std;
    #define KEYLENGTH 15
    #define MAXWORDLEN 80
    #define MAXTABLESIZE 100000
    typedef char ET[KEYLENGTH+1];
    typedef int Index;
    typedef struct LNode* PtrToLNode;
    struct LNode{
        ET Data;
        PtrToLNode Next;
        int Count;
    };
    typedef PtrToLNode Position;
    typedef PtrToLNode List;
    typedef struct TblNode *HashTable;
    struct TblNode{
        int TableSize;
        List Heads;
    };
    int flag,cnt;
    vector<LNode> v;
    bool cmp(LNode a,LNode b){
        if(a.Count > b.Count )
            return true;
        if(a.Count ==b.Count ){
            if(strcmp(a.Data,b.Data)<0)
                return true;
        }
        return false;
    }
    int NextPrime( int N ){
        int i, p = (N%2) ? N+2 :N+1;
        
        while(p<=MAXTABLESIZE){
            for(i=(int)sqrt(p); i>2; i--)
                if(! (p%i)) break;
            if(i == 2) break;
            else p += 2;
        }
        return p;
    }
    HashTable CreateTable( int TableSize){
        HashTable H;
        int i;
        
        H = (HashTable)malloc(sizeof(struct TblNode));
        H->TableSize = NextPrime(TableSize);
        
        H->Heads = (List)malloc(H->TableSize *sizeof(struct LNode));
        
        for( i=0; i<H->TableSize ; i++){
            H->Heads [i].Data[0] = '';
            H->Heads [i].Next = NULL;
        }
        return H;
    }
    int Hash(const char* Key, int TableSize){
        unsigned int H=0;
        while(* Key !='')
            H = (H<<5) + *Key++;
        return H % TableSize;
    }
    Position Find(HashTable H, ET Key){
        Position P;
        Index Pos;
        
        Pos = Hash(Key, H->TableSize );
        P = H->Heads [Pos].Next;
        
        while(P && strcmp(P->Data , Key))
            P = P->Next ;
        return P;
    }
    void InsertAndCount(HashTable H, ET Key){
        if(Key[0]=='') return;
        Position P, NewCell;
        Index Pos;
        P=Find(H,Key);
        if(!P){
            NewCell = (Position)malloc(sizeof(LNode));
            strcpy(NewCell->Data ,Key);
            NewCell->Count =1;
            Pos=Hash(Key,H->TableSize);
            NewCell->Next =H->Heads[Pos].Next;
            H->Heads [Pos].Next=NewCell;
            H->Heads [Pos].Count++;
        }
        else
            P->Count ++;
    } 
    bool IsWordChar(char c){
        if(c>='a'&&c<='z'||c>='0'&&c<='9'||c=='_')
            return true;
        else
            return false;
    }
    void GetAWord(ET word){
        char tempword[MAXWORDLEN+1], c;
        int len=0;
        scanf("%c",&c);
        if(c=='#'){
            flag=0;
            return;
        }
        while(c!='#'){
            if(c>='A'&&c<='Z')
                c+=32;
            if(IsWordChar(c))
                tempword[len++]=c;
            scanf("%c",&c);
            
            if(len&&!IsWordChar(c))
                break;
        }
        if(c=='#') flag=0;
        tempword[len] = '';
        if(len>KEYLENGTH)
            tempword[KEYLENGTH] = '';
        strcpy(word, tempword);
        //cout<<"0 "<<word<<endl;
        //cout<<c<<flag<<endl;
    }
    void Show(HashTable H, double percent){
        int diffwordcount=0;
        int maxf = 0;
        int * diffwords;
        int maxCollision = 0;
        int minCollision = 100;
        Position L;
        int i, j ,k, lowerbound, count = 0;
        
        for(i = 0; i<H->TableSize ;i++){
            diffwordcount += H->Heads [i].Count;
            if(maxCollision < H->Heads [i].Count)
                maxCollision = H->Heads [i].Count;
            if(minCollision > H->Heads [i].Count)
                minCollision = H->Heads [i].Count;
            L = H->Heads [i].Next;
            while(L){
                if(maxf < L->Count ) maxf = L->Count ;
                L = L->Next ;
            }
        }
        printf("%d
    ",diffwordcount);
        cnt=diffwordcount;
        diffwords = (int * )malloc((maxf+1)*sizeof(int));
        for(i = 0; i <= maxf; i++)
            diffwords[i]=0;
        for(i = 0; i < H->TableSize ; i++){
            L = H->Heads [i].Next;
            while(L){
                diffwords[ L->Count ]++;
                L = L->Next ;
            }
        }
        
        lowerbound = (int)( diffwordcount * percent);
        for(i = maxf; i >= 1 && count<lowerbound; i--)
            count += diffwords[i];
        
        for(j = maxf; j >= i; j--){
            for(k = 0; k<H->TableSize ; k++){
                L = H->Heads [k].Next;
                while(L){
                    if(j==L->Count ){
                        //printf("%d:%-15s
    ",L->Count ,L->Data );
                        struct LNode temp;
                        temp.Count =L->Count ;
                        strcpy(temp.Data,L->Data );
                        v.push_back(temp);
                    }
                        
                    L = L->Next ;
                }
            }
        }
        free(diffwords);
    }
    void DestoryTable(HashTable H){
        int i;
        Position P, Tmp;
        
        for(i=0; i<H->TableSize ;i++){
            P = H->Heads [i].Next;
            while(P){
                Tmp = P->Next ;
                free(P);
                P=Tmp;
            }
        }
        free(H->Heads );
        free(H);
    }
    int main(){
        HashTable H;
        ET word;
        int TableSize = 100;
        int length, wordcount = 0;
        H = CreateTable(TableSize);
        flag=1;
        do{
            GetAWord(word);
            wordcount++;
            
            InsertAndCount(H, word);
        }while(flag);
        Show(H, 10.0/100);
        sort(v.begin(),v.end(),cmp);
        for(int i=0;i<cnt/10;i++)
            printf("%d:%15s
    ",v[i].Count,v[i].Data);
        DestoryTable(H);
        return 0;
    }
    View Code
  • 相关阅读:
    OpenJudge 3765(最大权闭合图,最小割
    多校8-1010 HDU5389 (dp
    570D Codeforces Round #316 (Div. 2) D(dfs序,时间戳,二分
    CodeForces
    hiho一下!
    HDU 4123(树上任意点到其他点的最远距离,rmq
    Oracle创建索引;查询索引
    HBase启动和停止命令
    flink dom4j冲突异常
    flink checkpoint状态储存三种方式选择
  • 原文地址:https://www.cnblogs.com/astonc/p/10150586.html
Copyright © 2011-2022 走看看