zoukankan      html  css  js  c++  java
  • 2nd 词频统计更新

    词频统计更新

     

    实现功能:从控制台输入文件路径,并统计单词总数及不重复的单词数,并输出所有单词词频,同时排序。

     

    头文件

    1 #include <stdio.h>
    2 #include <stdlib.h>
    3 #include <string.h>

    定义宏

    #define WORD_LENGTH 250

    定义结构体及全局变量

    typedef struct Node
    {
        char word[WORD_LENGTH];
        int time;
        struct Node *next;
    }wordNode;
    
    typedef struct TopNode
    {
        int sum;        //全文单词个数
        int num;        //全文无重复单词个数
        wordNode * next;
    }TopNode;
    
    TopNode t;
    
    TopNode * L = NULL;

    声明文件中使用的函数

    wordNode *wordSearch(char *word);
    void wordJob(char word[]);
    void wordCount(char *word);
    
    void printCountList();
    void PrintFirstTenTimes();
    
    void mergeSort(wordNode **head);
    void FrontBackSplit(wordNode *head,wordNode **pre,wordNode **next);
    wordNode *SortedMerge(wordNode *pre,wordNode *next);
    
    void release();

    主函数

    int main(int argc,char *argv[])
    {
        char temp[WORD_LENGTH];//定义用以临时存放单词的数组
        char file_path[100];
        wordNode * h;
        FILE *file;
        printf("请输入文件路径:");
        gets(file_path);
        if((file = fopen(file_path, "r")) == NULL)
        {
            printf("文件读取失败!");
            exit(1);
        }
        L = &t;
        L->num = 0;
        L->sum = 0;
        L->next = NULL;
        while((fscanf(file,"%s",temp))!= EOF)
        {
            L->sum++;
            wordJob(temp);
            wordCount(temp);
        }
        fclose(file);
        printCountList();
        printf("
    
    输出词频最高的10个词
    ");
        h = L->next;
        mergeSort(&h);              //排序
        PrintFirstTenTimes();
        release();
        return 0;
    }

    查找单词所在节点并返回

    wordNode *wordSearch(char *word)
    {
        char * t;
        wordNode *node;
        wordNode *nextNode = L->next;
        if(L->next == NULL)
        {
            node = (wordNode*)malloc(sizeof(wordNode));
            strcpy(node->word,word);
            node->time = 0;
            node->next = NULL;           //初试化,必须有,否则会发生错误。
            L->num++;
            L->next = node;
            return node;
        }
        while(nextNode != NULL)          //查找匹配单词
        {
            t = nextNode->word;
            if(strcmp(t,word) == 0)
            {
                return nextNode;
            }
            nextNode = nextNode->next;
        }
        if(nextNode == NULL)             //原链表中不存在该单词
        {
            node = (wordNode*)malloc(sizeof(wordNode));
            strcpy(node->word, word);
            node->time = 0;
            node->next = L->next;
            L->next = node;
            L->num++;
            return node;
        }
        else
            return nextNode;            //返回查找到的节点
    }

    词频统计

    void wordCount(char *word)
    {
        wordNode *tmpNode;
        tmpNode = wordSearch(word);      //word所在的节点
        tmpNode->time++;
    }

    输出所有词频

    void printCountList()
    {
        int i = 0;
        wordNode *node = L->next;
        if(L->next == NULL)
        {
            printf("该文件无内容!");
    
        }
        else
        {
            printf("
    这篇文章总计%d词
    
    不重复单词共%d个
    ",L->sum,L->num);
            printf("
    输出所有单词的频数
    ");
            while(node != NULL)
            {
                printf(" %s:%d次	",node->word,node->time);
                i++;
                node = node->next;
                if(i%4 == 0)
                    printf("
    ");
            }
        }
    }

    输出词频最高的10个词

    void PrintFirstTenTimes()
    {
        wordNode *node = L->next;
        int i = 1;
        if(L->next == NULL)
        {
            printf("该文件无内容!");
    
        }
        else
        {
            while (node != NULL && i<=10)
            {
                printf("	%s:%d次
    ",node->word,node->time);
                node = node->next;
                i++;
            }
        }
    }

    对词频统计结果进行归并排序

    void mergeSort(wordNode **headnode)
    {
        wordNode *pre,*next,*head;
        head = *headnode;
        if(head == NULL || head->next == NULL)
        {
            return;
        }
        FrontBackSplit(head,&pre,&next);
        mergeSort(&pre);
        mergeSort(&next);
        *headnode = SortedMerge(pre,next);  //插入排序
    }

    取尾节点

    void FrontBackSplit(wordNode *source,wordNode **pre,wordNode **next)
    {
        wordNode *fast;
        wordNode *slow;
        if(source == NULL || source->next == NULL)
        {
            *pre = source;
            *next = NULL;
        }
        else
        {
            slow = source;
            fast = source->next;
            while(fast != NULL)
            {
                fast = fast->next;
                if(fast != NULL)
                {
                    slow = slow->next;
                    fast = fast->next;
                }
            }
            *pre = source;
            fast = source;
            *next = slow->next;     //pre和next为传址
            slow->next = NULL;
        }
    }

    取频数最大的节点作为头节点

    wordNode *SortedMerge(wordNode *pre,wordNode *next)
    {
        wordNode *result = NULL;
        if(pre == NULL)
            return next;
        else if(next == NULL)
            return pre;
        if(pre->time >= next->time)
        {
            result = pre;
            result->next = SortedMerge(pre->next,next);
        }
        else
        {
            result = next;
            result->next = SortedMerge(pre,next->next);
        }
        return result;
    }

    处理单词

    void wordJob(char word[])
    {
        int i,k;
        for(i = 0;i<strlen(word);i++)
        {
            if(word[i]>='A'&& word[i]<='Z')
            {
                word[i] += 32;
                continue;
            }
            if(word[i]<'a'||word[i]>'z')
            {
                if(i == (strlen(word)-1))
                {
                    word[i] = '';
                }
                else
                {
                    k = i;
                    while(i < strlen(word))
                    {
                        word[i] = word[i+1];
                        i++;
                    }
                    i = k;
                }
            }
        }
    }

    释放所有结点内存

    void release()
    {
        wordNode *pre;
        if(L->next == NULL)
            return;
        pre = L->next;
        while(pre != NULL)
        {
            L->next = pre->next;
            free(pre);
            pre = L->next;
        }
    }

    ssh://git@git.coding.net:amberpass/cptjgx.git

     

    https://git.coding.net/amberpass/cptjgx.git

  • 相关阅读:
    Exp5 MSF基础应用
    20155239 《网络对抗》Exp4 恶意代码分析
    20155239吕宇轩《网络对抗》Exp3 免杀原理与实践
    20155239 吕宇轩 后门原理与实践
    20155239吕宇轩 Exp1 PC平台逆向破解(5)M
    学号—20155239—吕宇轩《信息安全系统设计基础》课程总结
    20155238 2016-2017-2《Java程序设计》课程总结
    20155238 第十五周课堂实践
    20155238 实验四 Android程序设计
    20155238 《JAVA程序设计》实验三(敏捷开发与XP实践)实验报告
  • 原文地址:https://www.cnblogs.com/landscape/p/5874201.html
Copyright © 2011-2022 走看看