zoukankan      html  css  js  c++  java
  • 统计一个文件英文单词的个数

    /*
    	统计一个文件内。英文单词的个数,并按 word = count的格式顺序输出	
    	统计输出总的英文单词个数
    */
    
    #include <fstream>
    #include <iostream>
    #include <string>
    #include <vector>
    #include <algorithm>
    using namespace std;
    int main(){
    	string line;
    	//打开输入文件
    	ifstream input("transform.txt");
    	//打开输出文件
    	ofstream output("result.txt");
    	//使用两个vector来实现map的功能,差别是按顺序统计单词,而不是按键值
    	vector<string> wordVec;
    	vector<int> countVec(200);
    	//单词计数
    	int wordCount=0;
    	//每次读一行
    	while(getline(input,line))
    	{	
    		size_t pos=0;
    		//先按空格切割字符串
    		while(pos!=line.size()){
    			if(line[pos]!=' '){
    				string word;
    				while(pos!=line.size() && line[pos]!=' '){
    					word+=line[pos];
    					++pos;
    				}
    				
    				//去掉子字符串的前导标点符号
    				int prepos=0;
    				while(prepos!=word.size() && ispunct(word[prepos])){
    					++prepos;
    				}
    			
    				//去掉子字符串的后缀标点符号
    				//注意无符号数和有符号数
    				int pofixPos=word.size()-1;
    				while((pofixPos>=0) && ispunct(word[pofixPos])){
    					--pofixPos;
    				}
    			
    				//跳过无效单词的统计操作
    				if(prepos==word.size() || pofixPos<0)
    					continue;
    				else
    					++wordCount;
    				//得到处理后的单词,统计单词个数
    				string pureWord=word.substr(prepos,pofixPos-prepos+1);
    				vector<string>::iterator iter=find(wordVec.begin(),wordVec.end(),word);
    				if(iter==wordVec.end()){
    					countVec[iter-wordVec.begin()]++;
    					wordVec.push_back(pureWord);
    				}else
    					countVec[iter-wordVec.begin()]++;
    				if(pos==line.size())
    					break;			
    			}
    			++pos;		
    		}
    	}
    	//关闭输入文件
    	input.close();
    	//遍历得的单词统计的具体结果
    	vector<string>::iterator traiter=wordVec.begin();
    	vector<int>::iterator coniter=countVec.begin();
    	output<<"单词总数 :"<<wordCount<<endl;
    	while(traiter!=wordVec.end()){
    		output<<*traiter<<" = "<<*coniter<<endl;
    		++traiter;
    		++coniter;
    	}
    	//关闭输出文件
    	output.close();
    	return 0;
    }
    

    输入文件transform.txt内容:

    My father was a self-taught mandolin player. He was one of the best string instrument players in our town. 
    He could't read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. 
    They would play at local dances and on a few occasions would play for the local radio station. 
    He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer.

    输出文件result.txt内容:

    单词总数 :91
    My = 1
    father = 1
    was = 4
    a = 8
    self-taught = 1
    mandolin = 1
    player = 1
    He = 3
    one = 1
    of = 2
    the = 2
    best = 1
    string = 1
    instrument = 1
    players = 1
    in = 2
    our = 1
    town = 1
    could't = 1
    read = 1
    music = 2
    but = 1
    if = 1
    he = 5
    heard = 1
    tune = 1
    few = 2
    times = 1
    could = 1
    play = 3
    it = 1
    When = 1
    younger = 1
    member = 1
    small = 1
    country = 1
    band = 2
    They = 1
    would = 2
    at = 1
    local = 2
    dances = 1
    and = 2
    on = 1
    occasions = 1
    for = 1
    radio = 1
    station = 1
    often = 1
    told = 1
    us = 1
    how = 1
    had = 1
    auditioned = 1
    earned = 1
    position = 1
    that = 1
    featured = 1
    Patsy = 1
    Cline = 1
    as = 1
    their = 1
    lead = 1
    singer = 1
    


  • 相关阅读:
    css中span元素的width属性无效果原因及多种解决方案
    CentOS、Ubuntu、Debian三个linux比较异同
    linux系统中/etc/syslog.conf文件解读
    /proc/interrupts 和 /proc/stat 查看中断的情况
    网卡优化RPS/RFS
    关于Linux网卡调优之:RPS (Receive Packet Steering)
    rsync详解之exclude排除文件
    Linux rsync 同步实践
    CentOS 6.3下rsync服务器的安装与配置
    解决linux下/etc/rc.local开机器不执行的原因
  • 原文地址:https://www.cnblogs.com/yjbjingcha/p/7039922.html
Copyright © 2011-2022 走看看