词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
首先先说下我的思路,先建立map<string, string> m;存储符号及其种别码
map<string, string> m; void init(){ m["begin"]="1"; m["if"]="2"; m["then"]="3"; m["while"]="4"; m["do"]="5"; m["end"]="6"; m["+"]="13"; m["-"]="14"; m["*"]="15"; m["/"]="16"; m[":"]="17"; m[":="]="18"; m["<"]="20"; m["<="]="21"; m["<>"]="22"; m[">"]="23"; m[">="]="24"; m["="]="25"; m[";"]="26"; m["("]="27"; m[")"]="28"; m["#"]="0"; }
而后进行字符串录入以#作为结束录入的标志。在c++中,cin用于string类型录入时会将空格作为结束符从而导致空格后的内容无法录入,故采用getline()函数来进行字符串的录入。
string str; string s; cout<<"请输入字符串:"<<endl; do{ getline(cin,s); str+=s; if(str[str.size()-1]=='#'){ break; } str+=" "; }while(1);
而后将会字符串进行处理,将特殊符号和标识符以空格的方式隔开,即插入空格到指定位置。
//插入空格 for(int i=0;i<str.size();i++){ switch(str[i]){ case '+': str.insert(i," "); i++; str.insert(i+1," "); break; case '-': str.insert(i," "); i++; str.insert(i+1," "); break; case '*': str.insert(i," "); i++; str.insert(i+1," "); break; /**/ case '/': if(str[i+1]=='*'){ for(int k=i;k<str.size();k++){ if(str[k]=='/' && str[k-1]=='*'){ cout<<"成功进入"<<endl; str.erase(i,k-i+1); str.insert(i," "); chock=1; break; } } if(chock==0){ chock=0; cout<<"注释方式错误不完整,错误位置第"<<row<<"行"<<endl ; exit(0); } } else{ str.insert(i," "); i++; str.insert(i+1," "); } break; case ':': str.insert(i," "); if(str[i+2]=='=')i=i+2;else i++; str.insert(i+1," "); break; case '<': str.insert(i," "); if(str[i+2]=='=' || str[i+2]=='>')i=i+2;else i++; str.insert(i+1," "); break; case '>': str.insert(i," "); if(str[i+2]=='=')i=i+2;else i++; str.insert(i+1," "); break; case '=': str.insert(i," "); i++; str.insert(i+1," "); break; case ';': str.insert(i," "); i++; str.insert(i+1," "); break; case '(': str.insert(i," "); i++; str.insert(i+1," "); break; case ')': str.insert(i," "); i++; str.insert(i+1," "); break; case '#': str.insert(i," "); i++; str.insert(i+1," "); break; case ' ': str.insert(i," "); i++; row++; str.insert(i+1," "); break; } }
之后再将处理过的字符串用空格相隔开,不比c#,c++没有字符串切割函数只能直接编写
//字符串切割函数 vector<string> split(const string& str, const string& delim) { vector<string> res; if("" == str) return res; //先将要切割的字符串从string类型转换为char*类型 char * strs = new char[str.length() + 1] ; strcpy(strs, str.c_str()); char * d = new char[delim.length() + 1]; strcpy(d, delim.c_str()); char *p = strtok(strs, d); while(p) { string s = p; //分割得到的字符串转换为string类型 res.push_back(s); //存入结果数组 p = strtok(NULL, d); } return res; }
而后得到的vector<string>遍历从map中找出对应值即可。
//输出对应序列 for(int i = 0; i < str_list.size(); i++) { if (m.find(str_list[i]) != m.end()) cout<<" < "<<str_list[i] <<","<<m[str_list[i]]<<" >"<<endl; else if(AllisNum(str_list[i]))cout<<" < "<<str_list[i] <<","<<11<<" >"<<endl; else cout<<" < "<<str_list[i] <<","<<10<<" >"<<endl; }
最后附上完整代码
#include <iostream> #include <ext/hash_map> #include <string.h> #include<map> using std::map; using namespace __gnu_cxx; using namespace std; map<string, string> m; void init(){ m["begin"]="1"; m["if"]="2"; m["then"]="3"; m["while"]="4"; m["do"]="5"; m["end"]="6"; m["+"]="13"; m["-"]="14"; m["*"]="15"; m["/"]="16"; m[":"]="17"; m[":="]="18"; m["<"]="20"; m["<="]="21"; m["<>"]="22"; m[">"]="23"; m[">="]="24"; m["="]="25"; m[";"]="26"; m["("]="27"; m[")"]="28"; m["#"]="0"; } //字符串切割函数 vector<string> split(const string& str, const string& delim) { vector<string> res; if("" == str) return res; //先将要切割的字符串从string类型转换为char*类型 char * strs = new char[str.length() + 1] ; strcpy(strs, str.c_str()); char * d = new char[delim.length() + 1]; strcpy(d, delim.c_str()); char *p = strtok(strs, d); while(p) { string s = p; //分割得到的字符串转换为string类型 res.push_back(s); //存入结果数组 p = strtok(NULL, d); } return res; } //辨别字符串是否为纯数字 bool AllisNum(string str) { for (int i = 0; i < str.size(); i++) { int tmp = (int)str[i]; if (tmp >= 48 && tmp <= 57) { continue; } else { return false; } } return true; } int main() { int chock=0; int row=1; init(); //string str="begin a/*:=bc/:()d>=eas;dasd"; string str; string s; cout<<"请输入字符串:"<<endl; do{ getline(cin,s); str+=s; if(str[str.size()-1]=='#'){ break; } str+=" "; }while(1); //插入空格 for(int i=0;i<str.size();i++){ switch(str[i]){ case '+': str.insert(i," "); i++; str.insert(i+1," "); break; case '-': str.insert(i," "); i++; str.insert(i+1," "); break; case '*': str.insert(i," "); i++; str.insert(i+1," "); break; /**/ case '/': if(str[i+1]=='*'){ for(int k=i;k<str.size();k++){ if(str[k]=='/' && str[k-1]=='*'){ cout<<"成功进入"<<endl; str.erase(i,k-i+1); str.insert(i," "); chock=1; break; } } if(chock==0){ chock=0; cout<<"注释方式错误不完整,错误位置第"<<row<<"行"<<endl ; exit(0); } } else{ str.insert(i," "); i++; str.insert(i+1," "); } break; case ':': str.insert(i," "); if(str[i+2]=='=')i=i+2;else i++; str.insert(i+1," "); break; case '<': str.insert(i," "); if(str[i+2]=='=' || str[i+2]=='>')i=i+2;else i++; str.insert(i+1," "); break; case '>': str.insert(i," "); if(str[i+2]=='=')i=i+2;else i++; str.insert(i+1," "); break; case '=': str.insert(i," "); i++; str.insert(i+1," "); break; case ';': str.insert(i," "); i++; str.insert(i+1," "); break; case '(': str.insert(i," "); i++; str.insert(i+1," "); break; case ')': str.insert(i," "); i++; str.insert(i+1," "); break; case '#': str.insert(i," "); i++; str.insert(i+1," "); break; case ' ': str.insert(i," "); i++; row++; str.insert(i+1," "); break; } } for(int i=0;i<str.size();i++){ cout<<str[i]; } cout<<endl; //切割空格 vector<string> str_list=split(str," "); //输出对应序列 for(int i = 0; i < str_list.size(); i++) { if (m.find(str_list[i]) != m.end()) cout<<" < "<<str_list[i] <<","<<m[str_list[i]]<<" >"<<endl; else if(AllisNum(str_list[i]))cout<<" < "<<str_list[i] <<","<<11<<" >"<<endl; else cout<<" < "<<str_list[i] <<","<<10<<" >"<<endl; } return 0; }
附上实验截图