词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
#include <iostream> #include <stdlib.h> #include <stdio.h> using namespace std; string KEYWORD[15]={"if","else","void","return","while","then","for","do", "int","char","double","float","case","cin","cout"}; char SEPARATER[10]={';',',','{','}','[',']','(',')','#','"'}; char OPERATOR[9]={'+','-','*','/','>','<','=','!','%'}; char FILTER[4]={' ',' ',' ',' '}; const int IDENTIFIER=100; const int CONSTANT=101; const int FILTER_VALUE=102; bool IsKeyword(string word){ for(int i=0;i<15;i++){ if(KEYWORD[i]==word){ return true; } } return false; } bool IsSeparater(char ch){ for(int i=0;i<10;i++){ if(SEPARATER[i]==ch){ return true; } } return false; } bool IsOperator(char ch){ for(int i=0;i<9;i++){ if(OPERATOR[i]==ch){ return true; } } return false; } bool IsFilter(char ch){ for(int i=0;i<4;i++){ if(FILTER[i]==ch){ return true; } } return false; } bool IsUpLetter(char ch){ if(ch>='A' && ch<='Z') return true; return false; } bool IsLowLetter(char ch){ if(ch>='a' && ch<='z') return true; return false; } bool IsDigit(char ch){ if(ch>='0' && ch<='9') return true; return false; } template <class T> int value(T *a,int n,T str){ for(int i=0;i<n;i++){ if(a[i]==str) return i+1; } return -1; } void analyse(FILE * fpin){ char ch=' '; string arr=""; while((ch=fgetc(fpin))!=EOF){ arr=""; if(IsFilter(ch)){} else if(IsLowLetter(ch)){ while(IsLowLetter(ch)){ arr += ch; ch=fgetc(fpin); } //fseek(fpin,-1L,SEEK_CUR); if(IsKeyword(arr)){ printf("%3d ",value(KEYWORD,15,arr)); cout<<arr<<" 保留字"<<endl; } else { printf("%3d ",IDENTIFIER); cout<<arr<<" 标识符"<<endl; } } else if(IsDigit(ch)){ //判断是否为数字 while(IsDigit(ch)||(ch=='.'&&IsDigit(fgetc(fpin)))){ arr += ch; ch=fgetc(fpin); } fseek(fpin,-1L,SEEK_CUR); printf("%3d ",CONSTANT); cout<<arr<<" 无符号数"<<endl; } else if(IsUpLetter(ch)||IsLowLetter(ch)||ch=='_'){ while(IsUpLetter(ch)||IsLowLetter(ch)||ch=='_'||IsDigit(ch)){ arr += ch; ch=fgetc(fpin); } fseek(fpin,-1L,SEEK_CUR); printf("%3d ",CONSTANT); cout<<arr<<" 标识符"<<endl; } else switch(ch){ case '+': case '-': case '*': case '/': case '>': case '<': case '=': case '!': case '%': { arr += ch; printf("%3d ",value(OPERATOR,8,*arr.data())); cout<<arr<<" 运算符"<<endl; break; } case ';': case ',': case '(': case ')': case '[': case ']': case '{': case '}': case '#': case '"': { arr += ch; printf("%3d ",value(SEPARATER,10,*arr.data())); cout<<arr<<" 界符"<<endl; break; } default :cout<<"""<<ch<<"":无法识别的字符!"<<endl; } } } int main() { char inFile[40]; FILE *fpin; cout<<"请输入源文件名(包括路径和后缀):"; while(true){ cin>>inFile; if((fpin=fopen(inFile,"r"))!=NULL) break; else{ cout<<"文件名错误!"<<endl; cout<<"请输入源文件名(包括路径和后缀):"; } } cout<<"------词法分析如下------"<<endl; analyse(fpin); return 0;