zoukankan      html  css  js  c++  java
  • 统计一个文件英文单词的个数

    /*
    	统计一个文件内。英文单词的个数,并按 word = count的格式顺序输出	
    	统计输出总的英文单词个数
    */
    
    #include <fstream>
    #include <iostream>
    #include <string>
    #include <vector>
    #include <algorithm>
    using namespace std;
    int main(){
    	string line;
    	//打开输入文件
    	ifstream input("transform.txt");
    	//打开输出文件
    	ofstream output("result.txt");
    	//使用两个vector来实现map的功能,差别是按顺序统计单词,而不是按键值
    	vector<string> wordVec;
    	vector<int> countVec(200);
    	//单词计数
    	int wordCount=0;
    	//每次读一行
    	while(getline(input,line))
    	{	
    		size_t pos=0;
    		//先按空格切割字符串
    		while(pos!=line.size()){
    			if(line[pos]!=' '){
    				string word;
    				while(pos!=line.size() && line[pos]!=' '){
    					word+=line[pos];
    					++pos;
    				}
    				
    				//去掉子字符串的前导标点符号
    				int prepos=0;
    				while(prepos!=word.size() && ispunct(word[prepos])){
    					++prepos;
    				}
    			
    				//去掉子字符串的后缀标点符号
    				//注意无符号数和有符号数
    				int pofixPos=word.size()-1;
    				while((pofixPos>=0) && ispunct(word[pofixPos])){
    					--pofixPos;
    				}
    			
    				//跳过无效单词的统计操作
    				if(prepos==word.size() || pofixPos<0)
    					continue;
    				else
    					++wordCount;
    				//得到处理后的单词,统计单词个数
    				string pureWord=word.substr(prepos,pofixPos-prepos+1);
    				vector<string>::iterator iter=find(wordVec.begin(),wordVec.end(),word);
    				if(iter==wordVec.end()){
    					countVec[iter-wordVec.begin()]++;
    					wordVec.push_back(pureWord);
    				}else
    					countVec[iter-wordVec.begin()]++;
    				if(pos==line.size())
    					break;			
    			}
    			++pos;		
    		}
    	}
    	//关闭输入文件
    	input.close();
    	//遍历得的单词统计的具体结果
    	vector<string>::iterator traiter=wordVec.begin();
    	vector<int>::iterator coniter=countVec.begin();
    	output<<"单词总数 :"<<wordCount<<endl;
    	while(traiter!=wordVec.end()){
    		output<<*traiter<<" = "<<*coniter<<endl;
    		++traiter;
    		++coniter;
    	}
    	//关闭输出文件
    	output.close();
    	return 0;
    }
    

    输入文件transform.txt内容:

    My father was a self-taught mandolin player. He was one of the best string instrument players in our town. 
    He could't read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. 
    They would play at local dances and on a few occasions would play for the local radio station. 
    He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer.

    输出文件result.txt内容:

    单词总数 :91
    My = 1
    father = 1
    was = 4
    a = 8
    self-taught = 1
    mandolin = 1
    player = 1
    He = 3
    one = 1
    of = 2
    the = 2
    best = 1
    string = 1
    instrument = 1
    players = 1
    in = 2
    our = 1
    town = 1
    could't = 1
    read = 1
    music = 2
    but = 1
    if = 1
    he = 5
    heard = 1
    tune = 1
    few = 2
    times = 1
    could = 1
    play = 3
    it = 1
    When = 1
    younger = 1
    member = 1
    small = 1
    country = 1
    band = 2
    They = 1
    would = 2
    at = 1
    local = 2
    dances = 1
    and = 2
    on = 1
    occasions = 1
    for = 1
    radio = 1
    station = 1
    often = 1
    told = 1
    us = 1
    how = 1
    had = 1
    auditioned = 1
    earned = 1
    position = 1
    that = 1
    featured = 1
    Patsy = 1
    Cline = 1
    as = 1
    their = 1
    lead = 1
    singer = 1
    


  • 相关阅读:
    HDU 3401 Trade
    POJ 1151 Atlantis
    HDU 3415 Max Sum of MaxKsubsequence
    HDU 4234 Moving Points
    HDU 4258 Covered Walkway
    HDU 4391 Paint The Wall
    HDU 1199 Color the Ball
    HDU 4374 One hundred layer
    HDU 3507 Print Article
    GCC特性之__init修饰解析 kasalyn的专栏 博客频道 CSDN.NET
  • 原文地址:https://www.cnblogs.com/yjbjingcha/p/7039922.html
Copyright © 2011-2022 走看看