zoukankan      html  css  js  c++  java
  • 词法分析器--DFA(c++实现)

    语言名为TINY

    实例程序:

    begin
         var x,y:interger;
         x:=10;
         read(x);
         if y<0 then x:=x-y;
         x:=x+y;
         write(x);
    end

    TINY语言扫描程序的DFA:

    代码

    //ExplLexicalAnalyzer.h
    #ifndef EXPLLEXICALANALYZER_H
    #define EXPLLEXICALANALYZER_H
    
    
    #define MAXTOKENLEN 40
    #define MAXRESERVED 13
    
    
    typedef enum {
        ENDFILE, ERROR,
        IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,
        ID, NUM,
        ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE
    } TokenType;
    
    //typedef struct {
    //    TokenType kind;
    //    int row = -1;
    //    int column = -1;
    //    double value;
    //    std::string ID;
    //} Token;
    
    
    TokenType getToken(void);
    
    #endif //LEARN_2_EXPLLEXICALANALYZER_H
      1 //ExplLexicalAnalyzer.cpp
      2 #include <cstdio>
      3 #include <iostream>
      4 #include <fstream>
      5 #include <cstring>
      6 #include  "ExplLexicalAnalyzer.h"
      7 
      8 using namespace std;
      9 
     10 typedef enum {
     11     START, INASSIGN, INCOMMENT, INNUM, INID, DONE
     12 } StateType;
     13 
     14 char tokenString[MAXTOKENLEN + 1];
     15 
     16 #define BUFLEN 256
     17 
     18 static char lineBuf[BUFLEN];
     19 static int linepos = 0;
     20 static int bufsize = 0;
     21 static int EOF_flag = false;
     22 static string filename;
     23 static fstream get;
     24 static int lineno = 0;
     25 static int columnpos = 0;
     26 bool TraceScan = true;
     27 StateType state;
     28 
     29 static struct {
     30     const char *str;
     31     TokenType tok;
     32 } reservedWords[MAXRESERVED]
     33         = {{"if",       IF},
     34            {"then",     THEN},
     35            {"else",     ELSE},
     36            {"end",      END},
     37            {"repeat",   REPEAT},
     38            {"until",    UNTIL},
     39            {"read",     READ},
     40            {"write",    WRITE},
     41            {"begin",    BEGIN},
     42            {"var",      VAR},
     43            {"interger", INTEGER},
     44            {"double",   DOUBLE},
     45            {"string",   STRING}};
     46 
     47 
     48 static char
     49 getNextChar() {
     50     if (linepos >= bufsize) {
     51         lineno = 0;
     52         if (state != START)
     53             return ' ';
     54         if (get.getline(lineBuf, BUFLEN - 1)) {
     55             printf("%d: %s
    ", columnpos++, lineBuf);
     56             bufsize = (int) strlen(lineBuf);
     57             linepos = 0;
     58             return lineBuf[linepos++];
     59         } else {
     60             return EOF;
     61         }
     62     } else return lineBuf[linepos++];
     63 }
     64 
     65 
     66 static TokenType reservedLookup(char *s) {
     67     int i;
     68     for (i = 0; i < MAXRESERVED; i++)
     69         if (!strcmp(s, reservedWords[i].str))
     70             return reservedWords[i].tok;
     71     return ID;
     72 }
     73 
     74 //退回一个字符
     75 static void ungetNextChar(void) { if (!EOF_flag) linepos--; }
     76 
     77 //打印分析结果
     78 void printToken(TokenType token, const char *tokenString) {
     79     switch (token) {
     80         case IF:
     81         case THEN:
     82         case ELSE:
     83         case END:
     84         case REPEAT:
     85         case UNTIL:
     86         case READ:
     87         case WRITE:
     88         case BEGIN:
     89         case VAR:
     90         case INTEGER:
     91         case DOUBLE:
     92         case STRING:
     93             printf("reserved word: %s
    ", tokenString);
     94             break;
     95         case DEFINE:
     96             printf(":
    ");
     97             break;
     98         case COMMA:
     99             printf(",
    ");
    100             break;
    101         case ASSIGN:
    102             printf(":=
    ");
    103             break;
    104         case LT:
    105             printf("<
    ");
    106             break;
    107         case EQ:
    108             printf("=
    ");
    109             break;
    110         case LPAREN:
    111             printf("(
    ");
    112             break;
    113         case RPAREN:
    114             printf(")
    ");
    115             break;
    116         case SEMI:
    117             printf(";
    ");
    118             break;
    119         case PLUS:
    120             printf("+
    ");
    121             break;
    122         case MINUS:
    123             printf("-
    ");
    124             break;
    125         case TIMES:
    126             printf("*
    ");
    127             break;
    128         case OVER:
    129             printf("/
    ");
    130             break;
    131         case ENDFILE:
    132             break;
    133         case NUM:
    134             printf("NUM, val= %s
    ", tokenString);
    135             break;
    136         case ID:
    137             printf("ID, name= %s
    ", tokenString);
    138             break;
    139         case ERROR:
    140             printf("ERROR: %s
    ", tokenString);
    141             break;
    142         default:
    143             printf("Unknown token: %d
    ", token);
    144     }
    145 }
    146 
    147 
    148 //词法分析
    149 TokenType getToken(void) {
    150     int tokenStringIndex = 0;
    151     TokenType currentToken;
    152     state = START;
    153     bool save;
    154     while (state != DONE) {
    155         char c = getNextChar();
    156         save = true;
    157         switch (state) {
    158             case START:
    159                 if (isdigit(c))
    160                     state = INNUM;
    161                 else if (isalpha(c))
    162                     state = INID;
    163                 else if (c == ':')
    164                     state = INASSIGN;
    165                 else if ((c == ' ') || (c == '	') || (c == '
    '))
    166                     save = false;
    167                 else if (c == '{') {
    168                     save = false;
    169                     state = INCOMMENT;
    170                 } else {
    171                     state = DONE;
    172                     switch (c) {
    173                         case EOF:
    174                             return ENDFILE;
    175                         case ',':
    176                             currentToken = COMMA;
    177                             break;
    178                         case '=':
    179                             currentToken = EQ;
    180                             break;
    181                         case '<':
    182                             currentToken = LT;
    183                             break;
    184                         case '+':
    185                             currentToken = PLUS;
    186                             break;
    187                         case '-':
    188                             currentToken = MINUS;
    189                             break;
    190                         case '*':
    191                             currentToken = TIMES;
    192                             break;
    193                         case '/':
    194                             currentToken = OVER;
    195                             break;
    196                         case '(':
    197                             currentToken = LPAREN;
    198                             break;
    199                         case ')':
    200                             currentToken = RPAREN;
    201                             break;
    202                         case ';':
    203                             currentToken = SEMI;
    204                             break;
    205                         default:
    206                             currentToken = ERROR;
    207                             break;
    208                     }
    209                 }
    210                 break;
    211             case INCOMMENT:
    212                 save = false;
    213                 if (c == EOF) {
    214                     state = DONE;
    215                     currentToken = ENDFILE;
    216                 } else if (c == '}') state = START;
    217                 break;
    218             case INASSIGN:
    219                 state = DONE;
    220                 if (c == '=')
    221                     currentToken = ASSIGN;
    222                 else {
    223                     currentToken = DEFINE;
    224                     ungetNextChar();
    225                 }
    226                 break;
    227             case INNUM:
    228                 if (!isdigit(c)) {
    229                     ungetNextChar();
    230                     save = false;
    231                     state = DONE;
    232                     currentToken = NUM;
    233                 }
    234                 break;
    235             case INID:
    236                 if (!isalpha(c)) {
    237                     tokenString[tokenStringIndex] = '';
    238                     if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {
    239                         save = false;
    240                         state = DONE;
    241                         currentToken = ID;
    242                         break;
    243                     }
    244                     ungetNextChar();
    245                     save = false;
    246                     state = DONE;
    247                     currentToken = ID;
    248                 }
    249                 break;
    250             case DONE:
    251                 break;
    252         }
    253         if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))
    254             tokenString[tokenStringIndex++] = c;
    255         if (state == DONE) {
    256             tokenString[tokenStringIndex] = '';
    257             if (currentToken == ID)
    258                 currentToken = reservedLookup(tokenString);
    259         }
    260     }
    261     if (TraceScan) {
    262         printf("	%d: ", lineno++);
    263         printToken(currentToken, tokenString);
    264     }
    265     return currentToken;
    266 }
    267 
    268 
    269 int
    270 main() {
    271     if (cin >> filename && filename == "q") {
    272         filename = "......";
    273     }
    274     get.open(filename, ios::in);
    275     while (getToken() != ENDFILE);
    276 }

     运行结果:

  • 相关阅读:
    Unique Encryption Keys 暴力学习map,vector 函数
    hdu 1250 Hat's Fibonacci
    匈牙利算法模板 图的二分匹配 hdu 2063 过山车
    hdu 4260 汉诺塔问题 The End of The World
    各种常见文件的hex文件头
    Ubuntu & node.js
    Linux周期性任务的执行指令配置
    MySQL各版本的性能特性(从4.0版本开始)
    Tsung 1.5.0 增加对 WebSocket 和 BOSH 的支持
    ShowSlow+Yslow环境搭建
  • 原文地址:https://www.cnblogs.com/INnoVationv2/p/5967015.html
Copyright © 2011-2022 走看看