zoukankan      html  css  js  c++  java
  • std::map初体验

    /*

    This exercise is in the field of bibliometric (words and text) analysis. You will be provided with a text

    file, which consists of several paragraphs of English text. Your task is to write a program which will

    analyse the text, and output a range of statistics about the text.

    Your program should do the following:

    A list of all the words that occur, in alphabetical order. You must not print the same word twice.

    Print out the 20 most common words in the text, together with the number of instances of each word,

    with the most common word at the top.

    */

    //////////////////////////////////////////////////////////////////////////

    //     ReadTxt.cpp : .

    //     by Mythma

    //     以标准的字母表形式列出文章中的单词,不能重复

    //     并把出现频率最高的20个打印出来

    //   compiler: g++

    //     vc6不能编译此程序

    //////////////////////////////////////////////////////////////////////////

    #include <iostream>

    #include <fstream.h>

    #include <string>

    #include <vector>

    #include <map>

    #include <algorithm>

    using namespace std;

     

    const string STR_INTERVAL = "/n-----------------------------------------------------------";

     

    map<string, int>     gMap;

    vector<string>              gMMStr;

     

    bool AddWordToList(string strWord)

    {

           if(strWord.empty())

                  return false;

     

           string strTemp = strWord;

          

     

           //upper to lower

           for(int i = 0; i < strTemp.size(); i++)

           {

                  strTemp[i] = tolower(strTemp[i]);

                 

           }    

          

           //remove head punctuation and number

           while(strTemp[0] < 'a' || strTemp[0] > 'z')

           {

                  if(strTemp.size() > 1)

                         strTemp = strTemp.substr(1, strTemp.size() - 1);

                  else

                         return false;

           }

     

           //remove tail punctuation and number

           while(strTemp[strTemp.size() - 1] < 'a' || strTemp[strTemp.size() - 1] > 'z')

           {

                  if(strTemp.size() > 1)

                         strTemp = strTemp.substr(0, strTemp.size() - 2);

                  else

                         return false;

           }

          

          

           map<string ,int>::iterator it = gMap.find(strTemp);

           //add to map if exist

           if(it == gMap.end())

                  gMap.insert(map<string, int>::value_type(strTemp, 1));

           //increase if not exist

           else

                  ++ (*it).second;

     

           return true;

    }

     

    void OutPutWordsList()

    {

          

          

           cout << STR_INTERVAL

                   << "/n--文件中单词的总数为: "

                   << gMap.size()

                   << " 按字母排列如下"

                   << STR_INTERVAL << endl;

     

           int n = 0;

           for(map<string, int>::iterator it = gMap.begin(); it != gMap.end(); ++it)

           {

                  ++n;

                  cout.width(15);

                  cout.flags(ios::left);

                  cout << it->first.c_str();

                  if( 5 == n)

                  {

                         cout << endl;

                         n = 0;

                  }

           }

     

           cout << STR_INTERVAL << endl;

    }

     

    bool Cmp(const pair<string,int> &p1, const pair<string,int> &p2)

    {

                  return p1.second > p2.second;

    }

     

    void OutPutCount()

    {    

          

           vector< pair<string,int> > wd(gMap.begin(), gMap.end());

           sort(wd.begin(), wd.end(), Cmp);

          

           cout << STR_INTERVAL

                << "/n--出现频率最多的几个单词是:"

                   << STR_INTERVAL;

     

           int i = 0;

          

           for(vector< pair<string, int> >::iterator it=wd.begin();

                  it != wd.end() && i < 20; ++it, ++i)

           {

                  cout.width(15);

                  cout.flags(ios::left);

                  cout << endl

                          << it->first.c_str()

                          << " ----       "

                          << it->second;

           }

           cout << STR_INTERVAL;

    }

     

     

    int main(int argc, char* argv[])

    {

           char*      strPath;

           if(argc == 2)

                  strPath = argv[1];

           else

                  strPath = "c://words.txt";

           //read file

           ifstream  inFile(strPath);

           while( !inFile.eof())

           {

                  string strWord;

                  inFile >> strWord;

                  AddWordToList(strWord);

           }

     

           OutPutWordsList();

           OutPutCount();

     

           return 0;

    }

     

     

  • 相关阅读:
    团队作业7——第二次项目冲刺(Beta版本)
    团队作业7——第二次项目冲刺(Beta版本12.09——12.10)
    团队作业7——第二次项目冲刺(Beta版本12.07——12.08)
    团队作业7——Beta版本冲刺计划及安排
    团队作业7——第二次项目冲刺(Beta版本12.04——12.07)
    团队作业6——展示博客(Alpha版本)
    团队作业5——测试与发布(Alpha版本)
    20192317邓子彦 2020-2021-1《数据结构与面向对象程序设计》课程总结
    20192317邓子彦 实验九 《数据结构与面向对象程序设计》实验报告
    20192317邓子彦 实验八 《数据结构与面向对象程序设计》实验报告
  • 原文地址:https://www.cnblogs.com/aiwz/p/6333285.html
Copyright © 2011-2022 走看看