zoukankan      html  css  js  c++  java
  • 简单的词法分析器的实现

    实验一 《实现C语言小子集程序的词法分析》实验要求

    以表1的小语言为例实现词法分析

     

    设计单词属性值,各类表格(表示标识符表、常量表),单词符号及机内表示
    编码实现词法分析程序

    采用文本输入和输出的方式。程序从名为“test.txt”的文件中读入代码,将词法分析的结果保存到“output.txt”中。

    要求实现:

    (1)对正确源程序的识别;

    (2)对包含有注释//和/* */的源程序的识别;

    (3)对包含错误标识符的源程序的识别。

    测试样例:test.txt

    int i = 3;

    int 5j = 10;

    int m = max(i, j);

    while(i<m) do

    {

    i = i+ 1;

    }

    void max(int x, int y)

    {

    int 3temp = 0;

    if(x > y)

    temp = x;

    else

    temp = y;

    return temp;

    }


    输出:out.txt


    <26,->, <1,i>, <16,->, <2,3>, <23,->,

    <26,->, LexicalError, <16,->, <2,10>, <23,->,

    <26,->, <1,m>, <16,->, <1,max>, <17,->, <1,i>, <24,->, <1,j>, <18,->, <23,->,

    <31,->, <17,->, <1,i>, <8,->, <1,m>, <18,->, <32,->,

    <21,->,

    <1,i>, <16,->, <1,i>, <3,->, <2,1>,<23,->,

    <22,->,

    <25,->, <1,max>, <17,->, <26,->, <1,x>, <24,->, <26,->, <1,y>, <18,->,

    <21,->,

    <26,->, LexicalError ,<16,->, <2,0>, <23,->,

    <29,->, <17,->, <1,x>, <10,->, <1,y>, <18,->,

    <1,temp>, <16,->, <1,x>, <23,->,

    <30,->

    <1,temp>, <16,->, <1,y>,

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define BASIZE 20
    #define EOS ''
    #define True 1
    #define False 0
    
    #define key_num 9 //关键字数量
    #define buff_size 1024
    
    char buff[buff_size];
    int lineno = 1; //行数
    int tokenval = 1; //数字
    char lexbuf[BASIZE]; //字符缓存区
    int error[10]; //错误记录
    
    //关键字
    char keyWord[key_num][10]={
    "void","int","float","char","if","else",
    "while","do","return"
    };
    
     
    
    //数字判断
    int isDigit(char ch)
    {
    if(ch>='0'&&ch<='9')
    return True;
    else
    return False;
    }
    
    
    //字母判断
    int isAlpha(char ch)
    {
    if((ch>='a' && ch<='z') || (ch>='A' && ch <='Z'))
    return True;
    else
    return False;
    }
    
    
    //操作符
    int isOperator(char ch)
    {
    if(ch == '+'||ch=='-'||ch=='*'||ch=='/'||ch=='>'||ch=='<'||ch=='!'||ch=='=')
    return True;
    else
    return False;
    }
    
    
    //分隔符
    int isDelimater(char ch)
    {
    if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==';'||ch==',')
    return True;
    else
    return False;
    }
    
     
    
    
    /×
    词发分析器
    @auth: finch
    @time: 2017/4/1
    ×/
    
    int lexan()
    {
    int error_num = 0;
    FILE *fp_in=NULL;
    FILE *fp_out=NULL;
    
    if((fp_in=fopen("test.txt","r"))!=NULL) //读取源码进入缓存buff中
    {
    char ch=NULL;
    int x =0;
    while(ch!=EOF) //文件结束符
    {
    printf("%d
    ",x);
    ch=fgetc(fp_in);
    buff[x]= ch;
    x++;
    if(x>=buff_size)
    {
    
    printf("Error");
    exit(-1);
    }
    }
    buff[x]=EOF;
    fclose(fp_in);
    }
    else
    {
    printf("Error:can't find the file");
    exit(-1);
    }
    
    
    if((fp_out=fopen("output.txt","w+"))==NULL) //词法分析的输出
    {
    printf("Error:can't create a file");
    exit(-1);
    }
    
    
    int t=0;
    while(buff[t]!=EOF) //读读入内存的源码进行分析
    {
    if (buff[t] ==' '||buff[t]=='	')
    ; //删除空格
    else if(buff[t]== '
    ') //换行
    {
    
    fprintf(fp_out,"
    ");
    printf("
    ");
    lineno = lineno +1; //行数
    
    }
    else if(isDigit(buff[t])) //数字
    {
    tokenval = buff[t] - '0';
    t++;
    while(isDigit(buff[t])) //整数
    {
    tokenval = tokenval *10 + buff[t] -'0';
    t++;
    }
    if(isAlpha(buff[t])) //数字+字母 :报错
    {
    fprintf(fp_out," LexicalError,");
    error[error_num++]=lineno; //出错行记录
    printf("<error:%d,%c>",tokenval,buff[t]);
    }
    else
    {
    t--;
    fprintf(fp_out,"<%d,->,",2);
    printf("<%d:%d>,",2,tokenval);
    }
    
    }
    else if(True==isAlpha(buff[t])||buff[t]=='_') //标识符或者关键字
    {
    
    int b = 0;
    while(isAlpha(buff[t])||isDigit(buff[t])||buff[t]=='_') //字母或数字
    {
    lexbuf[b] = buff[t];
    t++;
    b = b +1;
    if(b >=BASIZE)
    printf("compiler error");
    }
    t--;
    lexbuf[b] ='';
    if(True==isKeyWord(lexbuf)) //关键字判断
    {
    int code = 2;
    if(0==strcmp(lexbuf,"void")) //查找对应代码
    code =25;
    else if(0==strcmp(lexbuf,"int"))
    code =26;
    else if(0==strcmp(lexbuf,"float"))
    code =27;
    else if(!strcmp(lexbuf,"char"))
    code =28;
    else if(!strcmp(lexbuf,"if"))
    code =29;
    else if(!strcmp(lexbuf,"else"))
    code =30;
    else if(!strcmp(lexbuf,"while"))
    code =31;
    else if(!strcmp(lexbuf,"do"))
    code =32;
    else if(!strcmp(lexbuf,"return"))
    code =33;
    else if(!strcmp(lexbuf,"main"))
    code =34;
    else if(!strcmp(lexbuf,"printf"))
    code =35;
    fprintf(fp_out,"<%d,->,",code);
    printf("<%d:%s>,",code,lexbuf);
    }
    else
    { //标识符输出
    
    fprintf(fp_out,"<%d,%s>,",1,lexbuf);
    printf("<%d:%s>,",1,lexbuf);
    }
    
    }
    else if(isDelimater(buff[t])) //分隔符
    {
    int code = 0;
    if(buff[t]==',')
    code=24;
    else if(buff[t]==';')
    code=23;
    else if(buff[t]=='{')
    code=21;
    else if(buff[t]=='}')
    code=22;
    else if(buff[t]=='(')
    code=17;
    else if(buff[t]==')')
    code=18;
    else if(buff[t]=='[')
    code=19;
    else if(buff[t]==']')
    code=20;
    fprintf(fp_out,"<%d,->,",code);
    printf("<%d,%c>,",code,buff[t]);
    
    }
    else if(isOperator(buff[t])) //运算符
    {
    int code = 0;
    if(buff[t]=='+')
    code=3;
    else if(buff[t]=='-')
    code=4;
    else if(buff[t]=='*')
    code=5;
    else if(buff[t]=='/')
    {
    t++;
    if(buff[t]=='*') //多行注释判断
    {
    while(buff[t]!='/')
    {
    t++; //忽略注释
    }
    }
    else if(buff[t]=='/') //单行注释
    {
    while(buff[t]!='
    ')
    {
    
    t++;
    }
    }else{
    code = 4;
    t--;
    }
    
    }
    
    else if(buff[t]=='<')
    {
    t++;
    if(buff[t]=='=') //==
    {
    
    }
    else
    {
    t--;
    code = 8; // <
    }
    }
    
    else if(buff[t]=='>')
    {
    t++;
    if(buff[t]=='=') // >=
    {
    code =11;
    }
    else{
    code=10;
    t--;
    }
    }
    
    else if(buff[t]=='=')
    {
    t++;
    if(buff[t]=='=') // ==
    {
    code =12;
    }
    else{
    code=16; // =
    t--;
    }
    }
    
    fprintf(fp_out,"<%d,->,",code);
    printf("<%d,%c>",code,buff[t]);
    }
    t++;
    }
    
    if(error_num!=0) //错误输出
    {
    fprintf(fp_out,"LexicalError(s) on line(s) ");
    for(int i=0;i<error_num;i++)
    if(i!=error_num)
    fprintf(fp_out,"%d,",error[i]);
    else
    fprintf(fp_out,"%d",error[i]);
    
    }
    fclose(fp_out);
    return 0;
    }
    
    
    int main() //主程序入口
    {
    lexan();
    return 0;
    }
  • 相关阅读:
    PHP面向对象编程入门
    PHP错误处理机制
    PHP数组深入
    PHP 表单
    多重背包之单调队列优化理论性总结
    二分查找理论性总结
    大连海事大学第十届程序设计竞赛 题解
    Codeforces Round #603 (Div. 2) E. Editor (线段树维护前缀和最值)
    Educational Codeforces Round 77 (Rated for Div. 2) E. Tournament (DP)
    Leetcode1256 加密数字(手动找规律)
  • 原文地址:https://www.cnblogs.com/fench/p/6699967.html
Copyright © 2011-2022 走看看