zoukankan      html  css  js  c++  java
  • 词法分析器的手工实现

    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include<ctype.h>
    #include<iostream>
    #include<fstream>
    using namespace std;
    struct symbol
    {
        char * str;
        int coding;
    };
    char *keyword_list[34] = { "void", "char", "int", "float", "double", "short", "long", "signed", "unsigned", "struct", "union", "enum", "typedef", "sizeof", "auto", "static", "register", "extern", "const", "volatile", "return", "continue", "break", "goto", "if", "else", "switch", "case","default","for","do","while","scanf","printf"};
    char *operator_list[44] = { "{","}","[","]","(",")",".","->","~","++","--",
    "!","&","*","/","%","+","-","<<",">>",">", ">=","<","<=","==","!=","^","|","&&",
    "||","?","=","/=","*=","%=","+=","-=","&=","^=","|=",",","#",";",":"};
    char ch; //读入的字符
    char strToken[20] = ""; //读入的字符串
    int eof_flag = 0;
    int num = 1;//编码的数字(为了递增)
    int row = 1;
    struct symbol keywords[34];
    struct symbol identifiers[44];
    FILE *fp = NULL;
    FILE *fw = NULL;
    ofstream out;
    
    //给单词符号设定种别编码
    void initialization() {
        //给关键字设定种别编码
        for (int i = 0;i < 34;i++)
        {
            keywords[i].str = keyword_list[i];
            keywords[i].coding = num;
            num++;
        }
       //给算符和界符设定种别编码
        for (int i = 0;i < 44;i++) {
            identifiers[i].str = operator_list[i];
            identifiers[i].coding = num;
            num++;
        }
        //数字79,标识符80
    }
    
    //把下一个字符读入ch中
    void getNextChar(FILE *ffp)
    {
        if ((ch = getc(ffp)) == EOF)
        {
            eof_flag = 1;
        }
        if (ch == '
    ')
            row++;
    }
    //检查ch的字符是否为空白符、回车或者制表符,若是,则反复调用getNextChar (),直至ch中读入一非上述符号
    void getbc(FILE * ffp)
    {
        while (ch == ' ' || ch == '
    ' || ch == '	')
        {
            getNextChar(ffp);
        }
    }
    
    //判断ch是否为字母
    bool isLetter(char ch)
    {
        return isalpha(ch);
    }
    
    //判断ch是否为数字
    bool isDigit(char ch)
    {
        return isdigit(ch);
    }
    
    //判断ch是否为下划线
    bool isUnderline(char ch)
    {
        if (ch == '_')
            return 1;
        else
            return 0;
    }
    
    //将输入的字符ch连接到strToken
    void concat()
    {
        char * tmp = &ch;
        strcat(strToken, tmp);
    }
    
    //把搜索指针回调一个字符位置
    void retract(FILE * ffp)
    {
        fseek(ffp, -1, SEEK_CUR);
        ch = ' ';
    }
    
    //对于strToken中的字符串判断它是否为保留字,若它是保留字则给出它的编码,否则返回0
    int reserve_string(char * str) {
        for (int i = 0;i < 34;i++) {
            if ((strcmp(str, keywords[i].str)) == 0)
            {
                return keywords[i].coding;
            }
        }
        return 0;
    }
    
    //返回strToken中所识别出的算符和界符编码
    int reserve_operator(char* ch)
    {
    
        for (int i = 0;i < 44;i++) {
            if ((strcmp(ch, identifiers[i].str)) == 0)
            {
                return identifiers[i].coding;
            }
        }
        return 0;
    }
    
    //出错处理
    void error()
    {
        printf("
     ********Error*********************
    ");
        printf(" row %d  Invaild symbol ! ! ! ",  row);
        printf("
     ********Error*********************
    ");
        exit(0);
    }
    void write_result( int x,char *str )
    {
        char data[50];
        strcpy(data,"(");
    
        int m = x;
        char s[20];
        char ss[20];
        int i=0,j=0;
        if (x < 0)// 处理负数
        {
            m = 0 - m;
            j = 1;
            ss[0] = '-';
        }    
        while (m>0)
        {
            s[i++] = m % 10 + '0';
            m /= 10;
        }
        s[i] = '';
        i = i - 1;
        while (i >= 0)
        {
            ss[j++] = s[i--];
         }    
         ss[j] = '';  
        strcat(data,ss);
        
        strcat(data,",");
        strcat(data,str);
        strcat(data," )
    ");
        out<<data;
    }
    
    //词法分析
    void LexiscalAnalyzer()
    {
        int num = 0, val = 0, code = 0;
        strcpy(strToken, "");
        getNextChar(fp);
        getbc(fp);
        switch (ch)
        {
        case 'a':
        case 'b':
        case 'c':
        case 'd':
        case 'e':
        case 'f':
        case 'g':
        case 'h':
        case 'i':
        case 'j':
        case 'k':
        case 'l':
        case 'm':
        case 'n':
        case 'o':
        case 'p':
        case 'q':
        case 'r':
        case 's':
        case 't':
        case 'u':
        case 'v':
        case 'w':
        case 'x':
        case 'y':
        case 'z':
        case 'A':
        case 'B':
        case 'C':
        case 'D':
        case 'E':
        case 'F':
        case 'G':
        case 'H':
        case 'I':
        case 'J':
        case 'K':
        case 'L':
        case 'M':
        case 'N':
        case 'O':
        case 'P':
        case 'Q':
        case 'R':
        case 'S':
        case 'T':
        case 'U':
        case 'V':
        case 'W':
        case 'X':
        case 'Y':
        case 'Z':
        case '_':
            while (isLetter(ch) || isDigit(ch) || isUnderline(ch))
            {
                concat();
                getNextChar(fp);
            }
            retract(fp);
            code = reserve_string(strToken);
            if (code == 0)
            {
                printf("(%d , %s)
    ", 79, strToken);
                write_result(79,strToken);
            }
            else
            {
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case'0':
        case'1':
        case'2':
        case'3':
        case'4':
        case'5':
        case'6':
        case'7':
        case'8':
        case'9':
            while (isdigit(ch))
            {
                concat();
                getNextChar(fp);
            }
            retract(fp);
            printf("(%d , %s)
    ",80, strToken);
            write_result(80,strToken);
            break;
        case '{':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '}':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '[':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case ']':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '(':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;    
        case ')':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '.':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '-':
            concat();
            getNextChar(fp);
            if (ch == '>')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '-')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '~':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '+':
            concat();
            getNextChar(fp);
            if (ch == '+')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
    
            }
            break;
        case '*':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
    
            }
            break;
        case '&':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '&')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '!':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '%':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '<':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '<')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '>':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '>')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '=':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '^':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case '|':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '|')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
    
        case '?':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '/':
            concat();
            getNextChar(fp);
            if (ch == '=')
            {
                concat();
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            else if (ch == '/') //跳过注释
            {
                getNextChar(fp);
                while (ch != '
    ') {
                    getNextChar(fp);
                }
                    
                break;    
            }
            else if (ch == '*')//跳过注释
            {
                getNextChar(fp);
                while (ch != '*') {
                    getNextChar(fp);
                }
                getNextChar(fp);
                if (ch == '/');
                break;
            }
            else
            {
                retract(fp);
                code = reserve_operator(strToken);
                printf("(%d , %s)
    ", code, strToken);
                write_result(code,strToken);
            }
            break;
        case ',':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case '#':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;    
        case ';':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            break;
        case ':':
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
    ", code, strToken);
            write_result(code,strToken);
            //out<<strToken;
            break;
        default:
            if (ch == EOF)
            {
                eof_flag = 1;
                break;
            }
            error();
        }
    }
    
    //主函数
    int main()
    {
        initialization();
        char name[1024];
        cout<<"please input your file path:";
        cin>>name;
        fp=fopen(name,"r");
        out.open("result.txt");
        while(!feof(fp))
        {    if (eof_flag == 1)
            {
                exit(1);
            }
        LexiscalAnalyzer();
        }
        fclose(fp);
        out.close();
        return 0;
    }
  • 相关阅读:
    centos7 修复引导
    Django 过滤器
    Django 面向对象orm
    Django models字段查询谓词表
    linux常用的监控命令
    常用SQL语句
    python实现FTP服务器
    用python做一个图片验证码
    rsync
    jsonp的理解
  • 原文地址:https://www.cnblogs.com/duke77--null/p/9645926.html
Copyright © 2011-2022 走看看