【实验目的】
通过上机实验,加深对语法制导翻译的理解,掌握将语法分析所识别的语法成分变换为中间代码的语义翻译方法。
【实验内容】
对给定的程序通过语义分析器能够判断语句串是否正确。正确则输出三地址指令形式的四元式代码,错误则抛出错误信息。
【设计思想】
(1)输入待分析的字符串。
语法如下:
a.关键字:begin,if,then,while,do,end.
b.运算符和界符::= + - * / < <= > >= <> = ; ( ) #
c.其他单词是标识符(ID)和整形常数(NUM):ID=letter(letter|digit)*,NUM=digitdigit*
d.空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、界符和关键字,词法分析阶段通常被忽略。
(2)扫描字符串,采用递归向下进行分析。
主要函数如下:
a.scaner()//词法分析函数,char token[8]用来存放构成单词符号的字符串;
b.parser()//语法分析,在语法分析的基础上插入相应的语义动作:将输入串翻译成四元式序列。只对表达式、赋值语句进行翻译。
c.emit(char *result,char *arg1,char *op,char *ag2)//该函数功能是生成一个三地址语句返回四元式表中。
d.char *newtemp()//该函数返回一个新的临时变量名,临时变量名产生的顺序为T1,T2,…。
【实验要求】
四元式表的结构如下:
struct {char result[8];
char ag1[8];
char op[8];
char ag2[8];
}quad[20];
(3)输出为三地址指令形式的四元式序列。
例如:语句串begin a:=2+3*4;x:=(a+b)/c;end#,
输出的三地址指令如下:
t1=3*4
t2=2+t1
a=t2
t3=a+b
t4=t3/c
x=t4
代码:
#include "stdio.h" #include "string.h" char prog[100], token[8], ch; char *rwtab[6] = { "begin", "if", "then", "while", "do", "end" }; int syn, p, m, n, sum, q; int kk; struct { char result1[8]; char ag11[8]; char op1[8]; char ag21[8]; } quad[20]; char *factor(); char *expression(); int yucu(); char *term(); int statement(); int lrparser(); char *newtemp(); scaner(); emit(char *result, char *ag1, char*op, char *ag2); main() { int j; q = p = kk = 0; printf(" **************************************"); printf(" please input a string (end with'#'): "); do { scanf_s("%c", &ch); prog[p++] = ch; } while (ch != '#'); p = 0; scaner(); lrparser(); if (q>19)printf(" too longsentense! "); else for (j = 0; j<q; j++)printf(" %s = %s%s %s ", quad[j].result1, quad[j].ag11, quad[j].op1, quad[j].ag21); getchar(); } int lrparser() { int schain = 0; kk = 0; if (syn == 1) { scaner(); schain = yucu(); if (syn == 6) { scaner(); if ((syn == 0) && (kk == 0)) printf("Success! "); } else { if (kk != 1)printf("short of 'end'! "); kk = 1; getchar(); exit(0); } } else { printf("short of 'begin' ! "); kk = 1; getchar(); exit(0); } return (schain); } int yucu() { int schain = 0; schain = statement(); while (syn == 26) { scaner(); schain = statement(); } return (schain); } int statement() { char tt[8], eplace[8]; int schain = 0; if (syn == 10) { strcpy_s(tt, 255, token); scaner(); if (syn == 18) { scaner(); strcpy_s(eplace, 255, expression()); emit(tt, eplace, "", ""); schain = 0; } else { printf("short of sign ':='! "); kk = 1; getchar(); exit(0); } return (schain); } } char *expression() { char *tp, *ep2, *eplace, *tt; tp = (char *)malloc(12); ep2 = (char *)malloc(12); eplace = (char *)malloc(12); tt = (char *)malloc(12); strcpy_s(eplace, 255, term()); while ((syn == 13) || (syn == 14)) { if (syn == 13)strcpy_s(tt, 255, "+"); else strcpy_s(tt, 255, "-"); scaner(); strcpy_s(ep2, 255, term()); strcpy_s(tp, 255, newtemp()); emit(tp, eplace, tt, ep2); strcpy_s(eplace, 255, tp); } return (eplace); } char *term() { char *tp, *ep2, *eplace, *tt; tp = (char *)malloc(12); ep2 = (char *)malloc(12); eplace = (char *)malloc(12); tt = (char *)malloc(12); strcpy_s(eplace, 255, factor()); while ((syn == 15) || (syn == 16)) { if (syn == 15)strcpy_s(tt, 255, "*"); else strcpy_s(tt, 255, "/"); scaner(); strcpy_s(ep2, 255, factor()); strcpy_s(tp, 255, newtemp()); emit(tp, eplace, tt, ep2); strcpy_s(eplace, 255, tp); } return (eplace); } char *factor() { char *fplace; fplace = (char *)malloc(12); strcpy_s(fplace, 255, ""); if (syn == 10) { strcpy_s(fplace, 255, token); scaner(); } else if (syn == 11) { itoa(sum, fplace, 10); scaner(); } else if (syn == 27) { scaner(); fplace = expression(); if (syn == 28) scaner(); else { printf("error on ')'! "); kk = 1; getchar(); exit(0); } } else { printf("error on '(' ! "); kk = 1; getchar(); exit(0); } return (fplace); } char *newtemp() { char *p; char m[8]; p = (char *)malloc(8); kk++; itoa(kk, m, 10); strcpy_s(p + 1, 255, m); p[0] = 't'; return(p); } scaner() { sum = 0; for (m = 0; m<8; m++)token[m++] = NULL; m = 0; ch = prog[p++]; while (ch == ' ')ch = prog[p++]; if (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A'))) { while (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A')) || ((ch >= '0') && (ch <= '9'))) { token[m++] = ch; ch = prog[p++]; } p--; syn = 10; token[m++] = ' '; for (n = 0; n<6; n++) if (strcmp(token, rwtab[n]) == 0) { syn = n + 1; break; } } else if ((ch >= '0') && (ch <= '9')) { while ((ch >= '0') && (ch <= '9')) { sum = sum * 10 + ch - '0'; ch = prog[p++]; } p--; syn = 11; } else switch (ch) { case '<':m = 0; ch = prog[p++]; if (ch == '>') { syn = 21; } else if (ch == '=') { syn = 22; } else { syn = 20; p--; } break; case '>':m = 0; ch = prog[p++]; if (ch == '=') { syn = 24; } else { syn = 23; p--; } break; case ':':m = 0; ch = prog[p++]; if (ch == '=') { syn = 18; } else { syn = 17; p--; } break; case '+': syn = 13; break; case '-': syn = 14; break; case '*': syn = 15; break; case '/': syn = 16; break; case '(': syn = 27; break; case ')': syn = 28; break; case '=': syn = 25; break; case ';': syn = 26; break; case '#': syn = 0; break; default: syn = -1; break; } } emit(char *result, char *ag1, char*op, char *ag2) { strcpy_s(quad[q].result1, 255, result); strcpy_s(quad[q].ag11, 255, ag1); strcpy_s(quad[q].op1, 255, op); strcpy_s(quad[q].ag21, 255, ag2); q++; }
运行截图: