zoukankan      html  css  js  c++  java
  • Ogre源代码浅析——脚本及其解析(二)




    1     bool ScriptCompiler::compile(const String &str, const String &source, const String &group)
    2     {
    3         ScriptLexer lexer;
    4         ScriptParser parser;
    5         ConcreteNodeListPtr nodes = parser.parse(lexer.tokenize(str, source));
    6         return compile(nodes, group);
    7     }


      1     ScriptTokenListPtr ScriptLexer::tokenize(const String &str, const String &source)
      2     {
      3         // State enums
      6         // Set up some constant characters of interest
      8         const wchar_t varopener = L'$', quote = L'\"', slash = L'/', backslash = L'\\', openbrace = L'{', closebrace = L'}', colon = L':', star = L'*', cr = L'\r', lf = L'\n';
      9         wchar_t c = 0, lastc = 0;
     10 #else
     11         const wchar_t varopener = '$', quote = '\"', slash = '/', backslash = '\\', openbrace = '{', closebrace = '}', colon = ':', star = '*', cr = '\r', lf = '\n';
     12         char c = 0, lastc = 0;
     13 #endif
     15         String lexeme;
     16         uint32 line = 1, state = READY, lastQuote = 0;
     17         ScriptTokenListPtr tokens(OGRE_NEW_T(ScriptTokenList, MEMCATEGORY_GENERAL)(), SPFM_DELETE_T);
     19         // Iterate over the input
     20         String::const_iterator i = str.begin(), end = str.end();
     21         while(i != end)
     22         {
     23             lastc = c;
     24             c = *i;
     26             if(c == quote)
     27                 lastQuote = line;
     29             switch(state)
     30             {
     31             case READY:
     32                 if(c == slash && lastc == slash)
     33                 {
     34                     // Comment start, clear out the lexeme
     35                     lexeme = "";
     36                     state = COMMENT;
     37                 }
     38                 else if(c == star && lastc == slash)
     39                 {
     40                     lexeme = "";
     41                     state = MULTICOMMENT;
     42                 }
     43                 else if(c == quote)
     44                 {
     45                     // Clear out the lexeme ready to be filled with quotes!
     46                     lexeme = c;
     47                     state = QUOTE;
     48                 }
     49                 else if(c == varopener)
     50                 {
     51                     // Set up to read in a variable
     52                     lexeme = c;
     53                     state = VAR;
     54                 }
     55                 else if(isNewline(c))
     56                 {
     57                     lexeme = c;
     58                     setToken(lexeme, line, source, tokens.get());
     59                 }
     60                 else if(!isWhitespace(c))
     61                 {
     62                     lexeme = c;
     63                     if(c == slash)
     64                         state = POSSIBLECOMMENT;
     65                     else
     66                         state = WORD;
     67                 }
     68                 break;
     69             case COMMENT:
     70                 // This newline happens to be ignored automatically
     71                 if(isNewline(c))
     72                     state = READY;
     73                 break;
     74             case MULTICOMMENT:
     75                 if(c == slash && lastc == star)
     76                     state = READY;
     77                 break;
     78             case POSSIBLECOMMENT:
     79                 if(c == slash && lastc == slash)
     80                 {
     81                     lexeme = "";
     82                     state = COMMENT;
     83                     break;    
     84                 }
     85                 else if(c == star && lastc == slash)
     86                 {
     87                     lexeme = "";
     88                     state = MULTICOMMENT;
     89                     break;
     90                 }
     91                 else
     92                 {
     93                     state = WORD;
     94                 }
     95             case WORD:
     96                 if(isNewline(c))
     97                 {
     98                     setToken(lexeme, line, source, tokens.get());
     99                     lexeme = c;
    100                     setToken(lexeme, line, source, tokens.get());
    101                     state = READY;
    102                 }
    103                 else if(isWhitespace(c))
    104                 {
    105                     setToken(lexeme, line, source, tokens.get());
    106                     state = READY;
    107                 }
    108                 else if(c == openbrace || c == closebrace || c == colon)
    109                 {
    110                     setToken(lexeme, line, source, tokens.get());
    111                     lexeme = c;
    112                     setToken(lexeme, line, source, tokens.get());
    113                     state = READY;
    114                 }
    115                 else
    116                 {
    117                     lexeme += c;
    118                 }
    119                 break;
    120             case QUOTE:
    121                 if(c != backslash)
    122                 {
    123                     // Allow embedded quotes with escaping
    124                     if(c == quote && lastc == backslash)
    125                     {
    126                         lexeme += c;
    127                     }
    128                     else if(c == quote)
    129                     {
    130                         lexeme += c;
    131                         setToken(lexeme, line, source, tokens.get());
    132                         state = READY;
    133                     }
    134                     else
    135                     {
    136                         // Backtrack here and allow a backslash normally within the quote
    137                         if(lastc == backslash)
    138                             lexeme = lexeme + "\\" + c;
    139                         else
    140                             lexeme += c;
    141                     }
    142                 }
    143                 break;
    144             case VAR:
    145                 if(isNewline(c))
    146                 {
    147                     setToken(lexeme, line, source, tokens.get());
    148                     lexeme = c;
    149                     setToken(lexeme, line, source, tokens.get());
    150                     state = READY;
    151                 }
    152                 else if(isWhitespace(c))
    153                 {
    154                     setToken(lexeme, line, source, tokens.get());
    155                     state = READY;
    156                 }
    157                 else if(c == openbrace || c == closebrace || c == colon)
    158                 {
    159                     setToken(lexeme, line, source, tokens.get());
    160                     lexeme = c;
    161                     setToken(lexeme, line, source, tokens.get());
    162                     state = READY;
    163                 }
    164                 else
    165                 {
    166                     lexeme += c;
    167                 }
    168                 break;
    169             }
    171             // Separate check for newlines just to track line numbers
    172             if(c == cr || (c == lf && lastc != cr))
    173                 line++;
    175             i++;
    176         }
    178         // Check for valid exit states
    179         if(state == WORD || state == VAR)
    180         {
    181             if(!lexeme.empty())
    182                 setToken(lexeme, line, source, tokens.get());
    183         }
    184         else
    185         {
    186             if(state == QUOTE)
    187             {
    188                 OGRE_EXCEPT(Exception::ERR_INVALID_STATE, 
    189                     Ogre::String("no matching \" found for \" at line ") + 
    190                         Ogre::StringConverter::toString(lastQuote),
    191                     "ScriptLexer::tokenize");
    192             }
    193         }
    195         return tokens;
    196     }


     1     /** This struct represents a token, which is an ID'd lexeme from the
     2         parsing input stream.
     3     */
     4     struct ScriptToken
     5     {
     6         /// This is the lexeme for this token
     7         String lexeme, file;
     8         /// This is the id associated with the lexeme, which comes from a lexeme-token id mapping
     9         uint32 type;
    10         /// This holds the line number of the input stream where the token was found.
    11         uint32 line;
    12     };
    13     typedef SharedPtr<ScriptToken> ScriptTokenPtr;
    14     typedef vector<ScriptTokenPtr>::type ScriptTokenList;
    15     typedef SharedPtr<ScriptTokenList> ScriptTokenListPtr;


          变量“i”和“end”分别标识了读入的待解析的脚本文件的开头和结尾(20行),随着解析的进行,变量“i”将逐字符后移(175行)。整个解析过程由状态机的几种状态来表达,它们分别是:准备状态(READY 31-68行)、对注释信息的解析状态(COMMENT, MULTICOMMENT  69-77行)、对单词的解析状态(WORD  95-119)、对双引号中引用信息的解析状态(QUOTE  120-143行), 对变量信息的解析状态(VAR  144-168行)、对可能是注释信息的数据进行解析的状态(POSSIBLECOMMENT 78-94行)。词法分析的主要目的,是将脚本文件中的各个词素(lexeme 比如,一个单词、脚本中大括号的左半边、脚本中大括号的右半边等都被看一个词素)解读出来,并针对每个词素生成一个token对象,将此词素的相关信息保存在token对象中。在每一次循环开始时都要初始化两个变量:“c”和“lastc” (23,24行)。c表示当前正要被处理字符,lastc表示当前字符的前一个字符。之所以要申请这两个变量是因为,Ogre脚本中的“词素(lexeme)”是以空格为分格符的,用这两个变量就可以方便的识别出:当前读取的字符是一个新词素的第一个字符,还是正在解析的词素的最后一个字符,又或者是当前正在解析的词素的多个字符(如果存在的话)中间位置的某个字符。


     1     void ScriptLexer::setToken(const Ogre::String &lexeme, Ogre::uint32 line, const String &source, Ogre::ScriptTokenList *tokens)
     2     {
     4         const wchar_t openBracket = L'{', closeBracket = L'}', colon = L':', 
     5             quote = L'\"', var = L'$';
     6 #else
     7         const char openBracket = '{', closeBracket = '}', colon = ':', 
     8             quote = '\"', var = '$';
     9 #endif
    11         ScriptTokenPtr token(OGRE_NEW_T(ScriptToken, MEMCATEGORY_GENERAL)(), SPFM_DELETE_T);
    12         token->lexeme = lexeme;
    13         token->line = line;
    14         token->file = source;
    15         bool ignore = false;
    17         // Check the user token map first
    18         if(lexeme.size() == 1 && isNewline(lexeme[0]))
    19         {
    20             token->type = TID_NEWLINE;
    21             if(!tokens->empty() && tokens->back()->type == TID_NEWLINE)
    22                 ignore = true;
    23         }
    24         else if(lexeme.size() == 1 && lexeme[0] == openBracket)
    25             token->type = TID_LBRACKET;
    26         else if(lexeme.size() == 1 && lexeme[0] == closeBracket)
    27             token->type = TID_RBRACKET;
    28         else if(lexeme.size() == 1 && lexeme[0] == colon)
    29             token->type = TID_COLON;
    30         else if(lexeme[0] == var)
    31             token->type = TID_VARIABLE;
    32         else
    33         {
    34             // This is either a non-zero length phrase or quoted phrase
    35             if(lexeme.size() >= 2 && lexeme[0] == quote && lexeme[lexeme.size() - 1] == quote)
    36             {
    37                 token->type = TID_QUOTE;
    38             }
    39             else
    40             {
    41                 token->type = TID_WORD;
    42             }
    43         }
    45         if(!ignore)
    46             tokens->push_back(token);
    47     }


            TID_LBRACKET = 0, // {
            TID_RBRACKET, // }
            TID_COLON, // :
            TID_VARIABLE, // $...
            TID_WORD, // *
            TID_QUOTE, // "*"
            TID_NEWLINE, // \n


  • 相关阅读:
    vue+element 动态表单验证
    ‘Maximum call stack size exceeded’错误的解决方法
  • 原文地址:https://www.cnblogs.com/yzwalkman/p/2841607.html
Copyright © 2011-2022 走看看