wcount.cc
1 #include <iostream> 2 #include <map> 3 #include <string> 4 #include <algorithm> 5 #include <ctype.h> 6 // So we don't have to type "std::" everywhere... 7 using namespace std; 8 9 10 string processWord(string &word); 11 void processText(map<string, int>& wordCounts); 12 void outputWordsByCount(map<string, int>& wordCounts); 13 14 unsigned total = 0; 15 16 int main() 17 { 18 map<string, int> wordCounts; 19 20 21 22 // Process the text on console-input, using the skip-list. 23 processText(wordCounts); 24 25 cout << "Total words are " << total << endl; 26 cout << "unique words are " << wordCounts.size() <<endl; 27 28 // Finally, output the word-list and the associated counts. 29 outputWordsByCount(wordCounts); 30 } 31 32 33 /* 34 * This helper-function converts a word to all lower-case, and then removes 35 * any leading and/or trailing punctuation. 36 * 37 * Parameters: 38 * word The word to process. It is passed by-value so that it can be 39 * manipulated within the function without affecting the caller. 40 * 41 * Return value: 42 * The word after all leading and trailing punctuation have been removed. 43 * Of course, if the word is entirely punctuation (e.g. "--") then the result 44 * may be an empty string object (containing ""). 45 */ 46 string processWord(string &word) 47 { 48 /*****************************************/ 49 /* TODO: Your implementation goes here! */ 50 /*****************************************/ 51 52 for(int i = 0;i < word.length(); i++) 53 { 54 if(isalpha(word[i])) 55 { 56 word[i] = tolower(word[i]); 57 } 58 } 59 60 int j = 0; 61 for(; j < word.length(); j++) 62 { 63 if(isalpha(word[j]) || isdigit(word[j])) 64 break; 65 } 66 67 int k = word.length()-1; 68 for(; k >= 0; k--) 69 { 70 if(isalpha(word[k]) || isdigit(word[k])) 71 break; 72 } 73 total++; 74 if(j > k) 75 { 76 return ""; 77 } 78 else 79 { 80 return word.substr(j, k-j+1); 81 } 82 } 83 84 85 void processText(map<string, int>& wordCounts) 86 { 87 /*****************************************/ 88 /* TODO: Your implementation goes here! */ 89 /*****************************************/ 90 91 string word; 92 while(cin >> word) 93 { 94 string new_word = processWord(word); 95 cout<<new_word<<endl;//log 96 97 if(new_word.length() > 0) 98 { 99 wordCounts[new_word]++; 100 } 101 } 102 103 /* for(auto i = wordCounts.begin(); i != wordCounts.end(); i++) 104 { 105 cout << i->first << " " << i->second << endl; 106 } 107 */ 108 } 109 110 111 /* 112 * This helper-function outputs the generated word-list in descending order 113 * of count. The function uses an STL associative container to sort the words 114 * by how many times they appear. Because multiple words can have the same 115 * counts, a multimap is used. 116 */ 117 void outputWordsByCount(map<string, int>& wordCounts) 118 { 119 multimap<int, string, greater<int> > sortByCount; 120 map<string, int>::const_iterator wIter; 121 122 for (wIter = wordCounts.begin(); wIter != wordCounts.end(); wIter++) 123 sortByCount.insert(pair<int, string>(wIter->second, wIter->first)); 124 125 multimap<int, string>::const_iterator cIter; 126 for (cIter = sortByCount.begin(); cIter != sortByCount.end(); cIter++) 127 cout << cIter->second << " " << cIter->first << endl; 128 }
swcount.cc
1 #include <iostream> 2 #include <map> 3 #include <set> 4 #include <string> 5 6 7 // So we don't have to type "std::" everywhere... 8 using namespace std; 9 10 11 void initSkipList(set<string>& skipList); 12 string processWord(string word); 13 void processText(set<string>& skipList, map<string, int>& wordCounts); 14 void outputWordsByCount(map<string, int>& wordCounts); 15 16 int total = 0; 17 int skipped = 0; 18 19 int main() 20 { 21 set<string> skipList; 22 map<string, int> wordCounts; 23 24 // Initialize the skip-list. 25 initSkipList(skipList); 26 27 // Process the text on console-input, using the skip-list. 28 processText(skipList, wordCounts); 29 30 cout << "Total words are------------ " << total << endl; 31 cout << "unique words are------------ " << wordCounts.size() << endl; 32 cout << "skipped words are------------ " << skipped << endl; 33 34 // Finally, output the word-list and the associated counts. 35 outputWordsByCount(wordCounts); 36 } 37 38 39 /* 40 * This function initializes the skip-list of words. 41 * 42 * skipList = the set of words to skip 43 */ 44 void initSkipList(set<string>& skipList) 45 { 46 // Use a pre-specified skip-list. 47 48 const char *swords[] = { 49 "a", "all", "am", "an", "and", "are", "as", "at", 50 "be", "been", "but", "by", 51 "did", "do", 52 "for", "from", 53 "had", "has", "have", "he", "her", "hers", "him", "his", 54 "i", "if", "in", "into", "is", "it", "its", 55 "me", "my", 56 "not", 57 "of", "on", "or", 58 "so", 59 "that", "the", "their", "them", "they", "this", "to", 60 "up", "us", 61 "was", "we", "what", "who", "why", "will", "with", 62 "you", "your", 63 0 64 }; 65 66 for (int i = 0; swords[i] != 0; i++) 67 skipList.insert(string(swords[i])); 68 } 69 70 71 /* 72 * This helper-function converts a word to all lower-case, and then removes 73 * any leading and/or trailing punctuation. 74 * 75 * Parameters: 76 * word The word to process. It is passed by-value so that it can be 77 * manipulated within the function without affecting the caller. 78 * 79 * Return value: 80 * The word after all leading and trailing punctuation have been removed. 81 * Of course, if the word is entirely punctuation (e.g. "--") then the result 82 * may be an empty string object (containing ""). 83 */ 84 string processWord(string word) 85 { 86 /*****************************************/ 87 /* TODO: Your implementation goes here! */ 88 /*****************************************/ 89 90 for(int i = 0;i < word.length(); i++) 91 { 92 if(isalpha(word[i])) 93 { 94 word[i] = tolower(word[i]); 95 } 96 } 97 98 int j = 0; 99 for(; j < word.length(); j++) 100 { 101 if(isalpha(word[j]) || isdigit(word[j])) 102 break; 103 } 104 105 int k = word.length()-1; 106 for(; k >= 0; k--) 107 { 108 if(isalpha(word[k]) || isdigit(word[k])) 109 break; 110 } 111 112 if(j > k) 113 { 114 return ""; 115 } 116 else 117 { 118 total++; 119 return word.substr(j, k-j+1); 120 } 121 122 } 123 124 void processText(set<string>& skipList, map<string, int>& wordCounts) 125 { 126 /***********************************/ 127 /* TODO: Implement this function! */ 128 /***********************************/ 129 130 string word; 131 while(cin >> word) 132 { 133 string new_word = processWord(word); 134 135 if(new_word.length() > 0) 136 { 137 if(skipList.find(new_word) == skipList.end()) 138 wordCounts[new_word]++; 139 else 140 skipped++; 141 } 142 } 143 } 144 145 146 /* 147 * This helper-function outputs the generated word-list in descending order 148 * of count. The function uses an STL associative container to sort the words 149 * by how many times they appear. Because multiple words can have the same 150 * counts, a multimap is used. 151 */ 152 void outputWordsByCount(map<string, int>& wordCounts) 153 { 154 multimap<int, string, greater<int> > sortByCount; 155 map<string, int>::const_iterator wIter; 156 157 for (wIter = wordCounts.begin(); wIter != wordCounts.end(); wIter++) 158 sortByCount.insert(pair<int, string>(wIter->second, wIter->first)); 159 160 multimap<int, string>::const_iterator cIter; 161 for (cIter = sortByCount.begin(); cIter != sortByCount.end(); cIter++) 162 cout << cIter->second << " " << cIter->first << endl; 163 }