一个数字的组成由以下几个字符
正负号 + - 小数点 . 数字 0-9
比如
3
-3
3.13
-34.2234
但是符号和小数点不会出现多次
那么识别流程用图来表示 则是
整数
浮点数
一个读取C++源文件 将内容解析成一个个单独的TOKEN的代码
代码1
#include <iostream> #include <fstream> #include <cctype> #include <cstring> #include <string> #include <exception> using namespace std; #define SOURCE_FILE_NAME "sourcefile.cpp" #define DEST_FILE_NAME "destfile.cpp" // The input and output file streams. ifstream fin; ofstream fout; bool GetToken(string& token){ bool bRet = false; char ch; ch = fin.get(); if(ch == EOF){ return false; } if(isspace(ch)){ //进入接受连续空白符(' ' ' '等) while(isspace(ch)){ token += ch; ch = fin.get(); } fin.putback(ch); bRet = true; return bRet; } if(isalpha(ch)){ while(isalpha(ch)){ token += ch; ch =fin.get(); } fin.putback(ch); bRet = true; return bRet; } if(isdigit(ch)){ while(isdigit(ch) || ch == '.'){ token += ch; ch = fin.get(); } fin.putback(ch); bRet = true; return bRet; } if(ch == '-' || ch == '+'){ token += ch; ch = fin.get(); while(isdigit(ch) || ch == '.'){ token += ch; ch = fin.get(); } fin.putback(ch); bRet = true; return bRet; } if(ch == '<' || ch == '>'){ token += ch; ch = fin.get(); if(ch == '<' || ch == '>'){ token += ch; }else{ fin.putback(ch); } bRet = true; return bRet; } token += ch; bRet = true; return bRet; } int main(int argc, char *argv[]) { fin.open(SOURCE_FILE_NAME); if(!fin){ cout << "Open source file error.Exit!!" << endl; return -1; } fout.open(DEST_FILE_NAME); if(!fout){ cout << "Open destinaton file error.Exit!!" << endl; return -1; } try{ string token; while(GetToken(token)){ cout << token ;//<< endl; token.clear(); } }catch(exception& e){ cerr << e.what() << endl; } fin.close(); fout.close(); cout << "Hello World!"<<endl; return 0; }
测试文件
293048 24 895523 3.14159 235 253 52435 345 459245 22 .5 .35 2.0 1 0.0 1.0 0 02345 63246 0.2346 34.0
代码2
#include <iostream> #include <fstream> #include <exception> #include <queue> using namespace std; #define IN_FILE_NAME "SourceFile.cpp" #define OUT_FILE_NAME "DestinationFile.cpp" enum STATE{ state_init = 0, state_int, state_float, state_error }; class FileParse{ public: FileParse(const string& infileName,const string& outfileName){ fin_.open(infileName); fout_.open(outfileName); } ~FileParse(){ if(fin_.is_open()) fin_.close(); if(fout_.is_open()) fout_.close(); } bool ParseToTokens(){ STATE state = state_init; bool isFinish = false; string token; if(linestr_.empty()) return false; for(size_t i = 0;i<linestr_.size();++i){ char currentChar = linestr_[i]; if(currentChar == ' ') break; switch(state){ case state_init: if(isspace(currentChar)){ continue; }else if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(currentChar == '.'){ state = state_float; token += currentChar; continue; }else{ state = state_error; break; } case state_int: if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(currentChar == '.'){ state = state_float; token += currentChar; continue; }else if(isspace(currentChar)){ isFinish = true; break; }else{ state = state_error; break; } case state_float: if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(isspace(currentChar)){ isFinish = true; break; }else{ state = state_error; break; } case state_error: break; } if(isFinish ){ cout << token <<endl; token.clear(); isFinish = false; state = state_init; } } return true; } bool run(){ try{ if(!fin_.is_open() || !fout_.is_open()) { throw runtime_error("open file is null"); } while(1){ if (fin_.eof()) break; linestr_.clear(); getline(fin_,linestr_); linestr_ += ' '; ParseToTokens(); } }catch(exception& e){ cerr << e.what() << endl; return false; } } private: string linestr_; queue<string> vecToken_; ifstream fin_; ofstream fout_; }; int main(int argc, char *argv[]) { FileParse a(IN_FILE_NAME,OUT_FILE_NAME); a.run(); return 0; }
显示结果
代码3 新增字符串的识别解析
#include <iostream> #include <fstream> #include <exception> #include <queue> using namespace std; #define IN_FILE_NAME "SourceFile.cpp" #define OUT_FILE_NAME "DestinationFile.cpp" enum STATE{ state_init = 0, state_int, state_float, state_word, state_error }; class FileParse{ public: FileParse(const string& infileName,const string& outfileName){ fin_.open(infileName); fout_.open(outfileName); } ~FileParse(){ if(fin_.is_open()) fin_.close(); if(fout_.is_open()) fout_.close(); } bool ParseToTokens(){ STATE state = state_init; bool isFinish = false; string token; if(linestr_.empty()) return false; for(size_t i = 0;i<linestr_.size();++i){ char currentChar = linestr_[i]; if(currentChar == ' ') break; switch(state){ case state_init: if(isspace(currentChar)){ continue; }else if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(currentChar == '.'){ state = state_float; token += currentChar; continue; }else if(isalpha(currentChar)|| currentChar == '_'){ state = state_word; token += currentChar; continue; }else{ state = state_error; break; } case state_word: if(isalpha(currentChar)||isdigit(currentChar)|| currentChar == '_'){ state = state_word; token += currentChar; continue; }else if(isspace(currentChar)){ isFinish = true; break; }else{ state = state_error; break; } case state_int: if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(currentChar == '.'){ state = state_float; token += currentChar; continue; }else if(isspace(currentChar)){ isFinish = true; break; }else{ state = state_error; break; } case state_float: if(isdigit(currentChar)){ state = state_int; token += currentChar; continue; }else if(isspace(currentChar)){ isFinish = true; break; }else{ state = state_error; break; } case state_error: break; } if(isFinish ){ cout << token <<endl; token.clear(); isFinish = false; state = state_init; } } return true; } bool run(){ try{ if(!fin_.is_open() || !fout_.is_open()) { throw runtime_error("open file is null"); } while(1){ if (fin_.eof()) break; linestr_.clear(); getline(fin_,linestr_); linestr_ += ' '; ParseToTokens(); } }catch(exception& e){ cerr << e.what() << endl; return false; } } private: string linestr_; queue<string> vecToken_; ifstream fin_; ofstream fout_; }; int main(int argc, char *argv[]) { FileParse a(IN_FILE_NAME,OUT_FILE_NAME); a.run(); return 0; }
测试文本
293048 24 895523 3.14159 235 253 52435 345 MyVar0 MyVar1 MyVar2 459245 rEtUrN TRUE false 22 .5 .35 2.0 while 1 0.0 var 1.0 var 0 This_is_an_identifier 02345 _so_is_this___ 63246 0.2346 34.0
显示结果