zoukankan      html  css  js  c++  java
  • 词法分析器--DFA(c++实现)

    语言名为TINY

    实例程序:

    begin
         var x,y:interger;
         x:=10;
         read(x);
         if y<0 then x:=x-y;
         x:=x+y;
         write(x);
    end

    TINY语言扫描程序的DFA:

    代码

    //ExplLexicalAnalyzer.h
    #ifndef EXPLLEXICALANALYZER_H
    #define EXPLLEXICALANALYZER_H
    
    
    #define MAXTOKENLEN 40
    #define MAXRESERVED 13
    
    
    typedef enum {
        ENDFILE, ERROR,
        IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,
        ID, NUM,
        ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE
    } TokenType;
    
    //typedef struct {
    //    TokenType kind;
    //    int row = -1;
    //    int column = -1;
    //    double value;
    //    std::string ID;
    //} Token;
    
    
    TokenType getToken(void);
    
    #endif //LEARN_2_EXPLLEXICALANALYZER_H
      1 //ExplLexicalAnalyzer.cpp
      2 #include <cstdio>
      3 #include <iostream>
      4 #include <fstream>
      5 #include <cstring>
      6 #include  "ExplLexicalAnalyzer.h"
      7 
      8 using namespace std;
      9 
     10 typedef enum {
     11     START, INASSIGN, INCOMMENT, INNUM, INID, DONE
     12 } StateType;
     13 
     14 char tokenString[MAXTOKENLEN + 1];
     15 
     16 #define BUFLEN 256
     17 
     18 static char lineBuf[BUFLEN];
     19 static int linepos = 0;
     20 static int bufsize = 0;
     21 static int EOF_flag = false;
     22 static string filename;
     23 static fstream get;
     24 static int lineno = 0;
     25 static int columnpos = 0;
     26 bool TraceScan = true;
     27 StateType state;
     28 
     29 static struct {
     30     const char *str;
     31     TokenType tok;
     32 } reservedWords[MAXRESERVED]
     33         = {{"if",       IF},
     34            {"then",     THEN},
     35            {"else",     ELSE},
     36            {"end",      END},
     37            {"repeat",   REPEAT},
     38            {"until",    UNTIL},
     39            {"read",     READ},
     40            {"write",    WRITE},
     41            {"begin",    BEGIN},
     42            {"var",      VAR},
     43            {"interger", INTEGER},
     44            {"double",   DOUBLE},
     45            {"string",   STRING}};
     46 
     47 
     48 static char
     49 getNextChar() {
     50     if (linepos >= bufsize) {
     51         lineno = 0;
     52         if (state != START)
     53             return ' ';
     54         if (get.getline(lineBuf, BUFLEN - 1)) {
     55             printf("%d: %s
    ", columnpos++, lineBuf);
     56             bufsize = (int) strlen(lineBuf);
     57             linepos = 0;
     58             return lineBuf[linepos++];
     59         } else {
     60             return EOF;
     61         }
     62     } else return lineBuf[linepos++];
     63 }
     64 
     65 
     66 static TokenType reservedLookup(char *s) {
     67     int i;
     68     for (i = 0; i < MAXRESERVED; i++)
     69         if (!strcmp(s, reservedWords[i].str))
     70             return reservedWords[i].tok;
     71     return ID;
     72 }
     73 
     74 //退回一个字符
     75 static void ungetNextChar(void) { if (!EOF_flag) linepos--; }
     76 
     77 //打印分析结果
     78 void printToken(TokenType token, const char *tokenString) {
     79     switch (token) {
     80         case IF:
     81         case THEN:
     82         case ELSE:
     83         case END:
     84         case REPEAT:
     85         case UNTIL:
     86         case READ:
     87         case WRITE:
     88         case BEGIN:
     89         case VAR:
     90         case INTEGER:
     91         case DOUBLE:
     92         case STRING:
     93             printf("reserved word: %s
    ", tokenString);
     94             break;
     95         case DEFINE:
     96             printf(":
    ");
     97             break;
     98         case COMMA:
     99             printf(",
    ");
    100             break;
    101         case ASSIGN:
    102             printf(":=
    ");
    103             break;
    104         case LT:
    105             printf("<
    ");
    106             break;
    107         case EQ:
    108             printf("=
    ");
    109             break;
    110         case LPAREN:
    111             printf("(
    ");
    112             break;
    113         case RPAREN:
    114             printf(")
    ");
    115             break;
    116         case SEMI:
    117             printf(";
    ");
    118             break;
    119         case PLUS:
    120             printf("+
    ");
    121             break;
    122         case MINUS:
    123             printf("-
    ");
    124             break;
    125         case TIMES:
    126             printf("*
    ");
    127             break;
    128         case OVER:
    129             printf("/
    ");
    130             break;
    131         case ENDFILE:
    132             break;
    133         case NUM:
    134             printf("NUM, val= %s
    ", tokenString);
    135             break;
    136         case ID:
    137             printf("ID, name= %s
    ", tokenString);
    138             break;
    139         case ERROR:
    140             printf("ERROR: %s
    ", tokenString);
    141             break;
    142         default:
    143             printf("Unknown token: %d
    ", token);
    144     }
    145 }
    146 
    147 
    148 //词法分析
    149 TokenType getToken(void) {
    150     int tokenStringIndex = 0;
    151     TokenType currentToken;
    152     state = START;
    153     bool save;
    154     while (state != DONE) {
    155         char c = getNextChar();
    156         save = true;
    157         switch (state) {
    158             case START:
    159                 if (isdigit(c))
    160                     state = INNUM;
    161                 else if (isalpha(c))
    162                     state = INID;
    163                 else if (c == ':')
    164                     state = INASSIGN;
    165                 else if ((c == ' ') || (c == '	') || (c == '
    '))
    166                     save = false;
    167                 else if (c == '{') {
    168                     save = false;
    169                     state = INCOMMENT;
    170                 } else {
    171                     state = DONE;
    172                     switch (c) {
    173                         case EOF:
    174                             return ENDFILE;
    175                         case ',':
    176                             currentToken = COMMA;
    177                             break;
    178                         case '=':
    179                             currentToken = EQ;
    180                             break;
    181                         case '<':
    182                             currentToken = LT;
    183                             break;
    184                         case '+':
    185                             currentToken = PLUS;
    186                             break;
    187                         case '-':
    188                             currentToken = MINUS;
    189                             break;
    190                         case '*':
    191                             currentToken = TIMES;
    192                             break;
    193                         case '/':
    194                             currentToken = OVER;
    195                             break;
    196                         case '(':
    197                             currentToken = LPAREN;
    198                             break;
    199                         case ')':
    200                             currentToken = RPAREN;
    201                             break;
    202                         case ';':
    203                             currentToken = SEMI;
    204                             break;
    205                         default:
    206                             currentToken = ERROR;
    207                             break;
    208                     }
    209                 }
    210                 break;
    211             case INCOMMENT:
    212                 save = false;
    213                 if (c == EOF) {
    214                     state = DONE;
    215                     currentToken = ENDFILE;
    216                 } else if (c == '}') state = START;
    217                 break;
    218             case INASSIGN:
    219                 state = DONE;
    220                 if (c == '=')
    221                     currentToken = ASSIGN;
    222                 else {
    223                     currentToken = DEFINE;
    224                     ungetNextChar();
    225                 }
    226                 break;
    227             case INNUM:
    228                 if (!isdigit(c)) {
    229                     ungetNextChar();
    230                     save = false;
    231                     state = DONE;
    232                     currentToken = NUM;
    233                 }
    234                 break;
    235             case INID:
    236                 if (!isalpha(c)) {
    237                     tokenString[tokenStringIndex] = '';
    238                     if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {
    239                         save = false;
    240                         state = DONE;
    241                         currentToken = ID;
    242                         break;
    243                     }
    244                     ungetNextChar();
    245                     save = false;
    246                     state = DONE;
    247                     currentToken = ID;
    248                 }
    249                 break;
    250             case DONE:
    251                 break;
    252         }
    253         if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))
    254             tokenString[tokenStringIndex++] = c;
    255         if (state == DONE) {
    256             tokenString[tokenStringIndex] = '';
    257             if (currentToken == ID)
    258                 currentToken = reservedLookup(tokenString);
    259         }
    260     }
    261     if (TraceScan) {
    262         printf("	%d: ", lineno++);
    263         printToken(currentToken, tokenString);
    264     }
    265     return currentToken;
    266 }
    267 
    268 
    269 int
    270 main() {
    271     if (cin >> filename && filename == "q") {
    272         filename = "......";
    273     }
    274     get.open(filename, ios::in);
    275     while (getToken() != ENDFILE);
    276 }

     运行结果:

  • 相关阅读:
    21.Merge Two Sorted Lists 、23. Merge k Sorted Lists
    34. Find First and Last Position of Element in Sorted Array
    leetcode 20. Valid Parentheses 、32. Longest Valid Parentheses 、301. Remove Invalid Parentheses
    31. Next Permutation
    17. Letter Combinations of a Phone Number
    android 常见分辨率(mdpi、hdpi 、xhdpi、xxhdpi )及屏幕适配注意事项
    oc 异常处理
    oc 类型判断
    oc Delegate
    oc 协议
  • 原文地址:https://www.cnblogs.com/INnoVationv2/p/5967015.html
Copyright © 2011-2022 走看看