zoukankan      html  css  js  c++  java
  • 词频统计

    作业要求:

    1.读取文件;

    2.记录出现的词汇及出现频率;

    3.输出运行结果。

    编码实现:

    // pin.cpp : Defines the entry point for the console application.
    //

    #include "stdafx.h"
    #include <iostream>
    #include <fstream>
    #include <string>
    #define SOURCE_H

    struct node
    {
      int col;
      int row;
      node* next;
    };
    struct Node
    {
      char words[20];
      node* ptr;
      Node* next;
      int num;

    };

    class TLink
    {
      public:
      TLink() { head = NULL; }
      ~TLink()
      {
        while( head != NULL )
        {
          Node* temp;
          temp = head;
          head = head -> next;
          delete temp;
        }
      }
      void Insert( char* Item );
      void calcute(char *szFile,int size);
      Node* gethead();
      private:
      Node* head;
    };

    char A_to_a( char alp );

    void showwindow();

    void show_text();

    void input();
    #include<iostream>
    #include<fstream>
    #include<cstdlib>
    //#include "source.h"
    using namespace std;

    TLink link;
    int i=0;
    char szFile[2000];

    int main()
    {
      show_text();
      cout << endl;
      input();
      return 0;
    }


    /****************************************************************/

    void TLink::Insert(char *Item)
    {
      int flag = 0;
      Node* temp;
      temp = new Node;
      int i = 0;
      while( Item[i] != '' )
      {
        temp -> words[i] = Item[i];
        ++ i;
      }
      temp -> num = i;
      temp -> words[i] = '';


      Node* ptrr = NULL;
      ptrr = link.gethead();
      while( ptrr != NULL )
      {
        if( ptrr -> num == temp -> num )
        {
          int n;
          for( n = 0; n < i; ++ n )
          if( A_to_a( ptrr -> words[n] ) != A_to_a( Item[n] ) )
            break;
          if( n == i )
          {
            flag = 1;
            break;
          }
        }
        ptrr = ptrr -> next;
      }  

      if( flag != 1 )
      {

        temp -> ptr = NULL;
        temp -> next = NULL;
        Node* Temp = head;
        if( head == NULL )
        {
           head = temp;
        }
        else
        {
          while( Temp -> next != NULL )
          Temp = Temp -> next;
          Temp -> next = temp;
        }
      }
      else
      delete temp;

    }

    /*****************************************************************/

    char A_to_a( char alp )
    {
      if( ( alp >= 'A' ) && ( alp <= 'Z' ) )
      alp = alp + 32;
      return alp;
    }

    /*****************************************************************/

    void TLink::calcute(char *szFile, int size)
    {
      //cout << "calcute is called!" << endl;
      int i = 0; //记录已搜索过的字符数-1
      int col = 1;//列标
      int row = 0;//行标
      int count;//记录空格数-1
      Node* ptrr = NULL;
      while( i < size )
      {
        ptrr = link.gethead();
        int j = 0;//对每个单词从开始计数
        while( ( szFile[i] >= 'a' && szFile[i] <= 'z' ) || ( szFile[i] >= 'A' && szFile[i] <= 'Z' ) )
        {
          ++ i;
          ++ j;
        }
        while( ptrr != NULL )
        {
          if( ptrr -> num == j )
          {
            int n;
            for( n = 0; n <= j; ++ n )
            if( A_to_a( ptrr -> words[n] ) != A_to_a( szFile[i - j + n] ) )
              break;
            if( n == j )
            {
              node* temp;
              temp = new node;
              temp -> col = col;
              temp -> row = row;
              temp -> next = NULL;
              node* Temp = ptrr -> ptr;
              if( ptrr -> ptr == NULL )
              {
                ptrr -> ptr = temp;
              }
              else
              {
                while( Temp -> next != NULL )
                Temp = Temp -> next;
                Temp -> next = temp;
              }
            }//插入行数
          }

        ptrr = ptrr -> next;
      }
      if( szFile[i] == ' ' || szFile[i] == ' ' )
      {
        count = -1;
        while( szFile[i] == ' ' )
        {
          ++ i; //设置列数
          ++ row;//行的单词个数加
          ++ count;//单词之间空格-1
        }
        row = row - count;
        if( szFile[i] == ' ' )
        {
          ++ col; //列遇到换行累加
          ++ i;
          row = 0;//单词的行个数清零
        }
      }
      else
        ++ i;
      }
      cout << endl;

    }

    /****************************************************************/


    Node* TLink::gethead()
    {
      return head;
    }

    /********************************************************/

    void showwindow()
    {
      Node* curptr = link.gethead();
      while( curptr != NULL )
      {
        int word_num = 0;
        for( int k = 0; curptr -> words[k] != ''; ++ k )
        cout << curptr -> words[k];
        cout << endl;
        if( curptr -> ptr == NULL )
        cout << "没有该词,或输入不正确!" << endl;
        else
        while( curptr -> ptr != NULL )
        {
          cout << "(";
          cout << curptr -> ptr -> col ;
          cout << ",";
          cout << curptr -> ptr -> row ;
          cout << ")";
          cout << ' ';
          curptr -> ptr = curptr -> ptr -> next;
          word_num ++;
        }
        cout << endl;
        cout << "该单词共出现" << word_num << "次!" << endl;
        curptr = curptr -> next;

      }
    }


    /*************************************************************/
    void show_text()
    {
      ifstream fin;
      fin.open("F:\pin\Debug\1.txt");
      if (fin.fail())
      {
        cout<<"Input file opening failed. ";
        exit(1);
      }

      char next;

      fin.get(next);
      while (! fin.eof())
      {
        szFile[i] = next;
        ++ i;
        fin.get(next);
      }
      szFile[i] = '';
      for( int k = 0; k < i; ++ k )
      cout << szFile[k];
      cout << "*****Total number :" << i << endl;
      cout << "***************************************************************************" << endl;
    }


    /**********************************************************************/
    void input()
    {
      char Item[40]; //暂存数组
      char in; //接受输入字符
      char ans; //判断是否重新开始
      do{
      if( link.gethead() != NULL )
        link.~TLink();
        cout << "请输入要统计的单词,单词之间用逗号隔开(输入@键结束,本程序忽略空格):" << endl;
        cin >> in;
        int flag = 1;
        while( true )
        {
          if( in == '@' )
          break;
          int m = 0;
          while( in != ',' )
          {
            Item[m] = in;
            ++ m;
            cin >> in;
            if( in == '@' )
            {
              flag = 0;
              break;
            }
          }
          Item[m] = '';
          link.Insert( Item );
          if( flag == 0 )
            break;
            cin >> in;
        }
        if( link.gethead() == NULL )
          cout << "没有插入任何单词!" << endl;
        else
        {
          link.calcute( szFile, i );
          showwindow();
        }
        cout << "是否继续?(Y/y or N/n):";
        cin >> ans;
      }while( ( ans != 'n' ) && ( ans != 'N' ) );
    }

    运行结果:

  • 相关阅读:
    [kuangbin带你飞]专题十六 KMP & 扩展KMP & ManacherK
    [kuangbin带你飞]专题十六 KMP & 扩展KMP & Manacher J
    [kuangbin带你飞]专题十六 KMP & 扩展KMP & Manacher I
    pat 1065 A+B and C (64bit)(20 分)(大数, Java)
    pat 1069 The Black Hole of Numbers(20 分)
    pat 1077 Kuchiguse(20 分) (字典树)
    pat 1084 Broken Keyboard(20 分)
    pat 1092 To Buy or Not to Buy(20 分)
    pat 1046 Shortest Distance(20 分) (线段树)
    pat 1042 Shuffling Machine(20 分)
  • 原文地址:https://www.cnblogs.com/lffang/p/5847654.html
Copyright © 2011-2022 走看看