问题描述:
对一个输入文本中的每个单词的出现次数统计,并选取出现频率最大的10个单词
首先用C++实现,着重注意STL的map,vector排序用法,这里与编程珠玑的代码不同。不知道何故,编程珠玑上的代码, 输入之后得到的结果是按照单词排序的,而不是按照次数排序,这里做了修改
C++实现代码:
#include <iostream> #include <stdlib.h> #include <string> #include <map> #include <vector> #include <algorithm> using namespace std; typedef pair<string, int> PAIR; int cmp(const PAIR& x, const PAIR& y) { return x.second > y.second; } int main() { map<string, int> wordcount; vector<PAIR> vec; vector<PAIR>::iterator itr; string t; while((cin >> t) && t != "0") { wordcount[t]++; } for (map<string,int>::iterator curr = wordcount.begin(); curr != wordcount.end(); ++curr) { vec.push_back(make_pair(curr->first, curr->second)); } sort(vec.begin(), vec.end(), cmp); for(itr = vec.begin(); itr != vec.end(); itr++) { cout << itr->first << " " << itr->second<<endl; } system("pause"); }
这里用C语言试下的话较为麻烦些,需要建立自己的节点和散列表:
下面按照编程珠玑,用C语言实现,但是编程珠玑似乎仍然没有给出排序的方式
#include <stdio.h> #include <stdlib.h> #include <string.h> #define NHASH 29989 #define MULT 31 typedef struct node{ char *word; int times; node *next; } node; typedef struct node *nodeptr; nodeptr bin[NHASH]; int hash(char *p) { unsigned int h = 0; for( ; *p != 0; p++) { h = h * MULT + *p; if(h > NHASH) { h = h % NHASH; } } return h; } void incWord(char *s) { int h = hash(s); for(node *p = bin[h]; p != NULL; p = p->next) { if(strcmp(s,p->word) == 0) { p->times++; return ; } } node *ptr ; ptr = (node*)malloc(sizeof(node)); ptr->times = 1; ptr->word = (char*)malloc(strlen(s)+1); strcpy(ptr->word, s); ptr->next = bin[h]; bin[h] = ptr; } int main() { char buf[50]; while(scanf("%s", buf) != EOF ) { incWord(buf); } for(int i=0; i<NHASH; i++) { for(node *p = bin[i]; p != NULL; p = p->next) { printf("%s %d\n", p->word, p->times); } } for(int i=0; i<NHASH; i++) { for(node *p = bin[i]; p != NULL; p = p->next) { free(p); } } system("pause"); }