zoukankan      html  css  js  c++  java
  • C99 词法部分 Antlr Grammar

    /*
     ============================================================================
     Name        : CTokens.g
     Author      : luqi
     Version     : 0.1
     Copyright   : Your copyright notice
     Description : C99 - Lexer - have tested <C99.pdf 6.4>
     ============================================================================
     */
    grammar CTokens;

    options {
      language = Java;
      superClass = DebugParser;
        //@ superClass = DebugLexer;
    }

    @header
    {
    package c99.ctokens;
    import util.DebugParser;
    }

    @lexer::header
    {
    package c99.ctokens;
    import util.DebugLexer;
    }

    prog     :    token
            ;
     
     
    token   : KEYWORD                    { System.out.println("Meet KEYWORD: "        + $KEYWORD.text); }
            | IDENTIFIER                 { System.out.println("Meet IDENTIFIER: "     + $IDENTIFIER.text); }
            | CONSTANT                   { System.out.println("Meet CONSTANT: "       + $CONSTANT.text); }
            | STRING_LITERAL             { System.out.println("Meet STRING_LITERAL: " + $STRING_LITERAL.text); }
            //| PUNCTUATOR
            ;
            
    keyword : KEYWORD
            ;
            
    identifier  : IDENTIFIER
                ;
                
    constant  : CONSTANT
              ;   
              
    string_literal  : STRING_LITERAL
                    ;
                    
    /*
    ==========================================================================================================
    */
            
    KEYWORD :     'auto'        | 'break'       | 'case'        | 'char'        | 'const'       | 'continue'
                | 'default'     | 'do'          | 'double'      | 'else'        | 'enum'        | 'extern'
                | 'float'       | 'for'         | 'goto'        | 'if'          | 'inline'      | 'int'
                | 'long'        | 'register'    | 'restrict'    | 'return'      | 'short'       | 'signed'
                | 'sizeof'      | 'static'      | 'struct'      | 'switch'      | 'typedef'     | 'union'
                | 'unsigned'    | 'void'        | 'volatile'    | 'while'       | '_Bool'       | '_Complex'
                | '_Imaginary'
                ;
                
    IDENTIFIER  :   IDENTIFIER_NONDIGIT ( IDENTIFIER_NONDIGIT | DIGIT ) *
                ;
                
    fragment   
    IDENTIFIER_NONDIGIT :   NONDIGIT
                        |   UNIVERSAL_CHARACTER_NAME
                        ;
                        
    fragment        
    NONDIGIT    :   'a' .. 'z'
                |   'A' .. 'Z'
                |   '_'
                ;
    fragment           
    DIGIT   :   '0' .. '9'
            ;

    fragment        
    UNIVERSAL_CHARACTER_NAME    :   '\\u' HEX_QUAD
                                |   '\\U' HEX_QUAD HEX_QUAD
                                ;
         
    HEX_QUAD    :   HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT
                ;
                
    CONSTANT    :   INTEGER_CONSTANT
                |   FLOATING_CONSTANT
              //|   ENUMERATION_CONSTANT
                |   CHARACTER_CONSTANT
                ;
                
    fragment            
    INTEGER_CONSTANT    :   DECIMAL_CONSTANT INTEGER_SUFFIX ?
                        |   OCTAL_CONSTANT INTEGER_SUFFIX ?
                        |   HEXADECIMAL_CONSTANT INTEGER_SUFFIX ?
                        ;
    fragment                   
    INTEGER_SUFFIX  :   UNSIGNED_SUFFIX LONG_SUFFIX ?
                    |   UNSIGNED_SUFFIX LONG_LONG_SUFFIX
                    |   LONG_SUFFIX UNSIGNED_SUFFIX ?
                    |   LONG_LONG_SUFFIX UNSIGNED_SUFFIX    ?  
                    ;
    fragment                
    UNSIGNED_SUFFIX : 'u'
                    | 'U'
                    ;
    fragment                
    LONG_SUFFIX     :   'l'
                    |   'L'
                    ;
    fragment                
    LONG_LONG_SUFFIX    :   'll'
                        |   'LL'
                        ;
    fragment                   
    DECIMAL_CONSTANT    :   NONZERO_DIGIT (DIGIT) *
                        ;
    fragment       
    OCTAL_CONSTANT  :   '0' (OCTAL_DIGIT) *
                    ;
    fragment                
    HEXADECIMAL_CONSTANT    :   HEXADECIMAL_PREFIX ( HEXADECIMAL_DIGIT )+
                            ;
     

    HEXADECIMAL_PREFIX  :   '0x'
                        |   '0X'
                        ;
    fragment        
    NONZERO_DIGIT   : '1' .. '9'
                    ;
    fragment               
    OCTAL_DIGIT :   '0' .. '7'   
                ;
                
    fragment
    HEXADECIMAL_DIGIT   :   '0' .. '9'
                        |   'a' .. 'f'
                        |   'A' .. 'F'  
                        ;

    fragment     
    FLOATING_CONSTANT   :   DECIMAL_FLOATING_CONSTANT
                        |   HEXADECIMAL_FLOATING_CONSTANT    
                        ;
                        
    fragment                   
    DECIMAL_FLOATING_CONSTANT   :   FRACTIONAL_CONSTANT EXPONENT_PART ? FLOATING_SUFFIX ?
                                |   DIGIT_SEQUENCE EXPONENT_PART FLOATING_SUFFIX ?
                                ;
    fragment                            
    FRACTIONAL_CONSTANT :   DIGIT_SEQUENCE ? '.' DIGIT_SEQUENCE
                        |   DIGIT_SEQUENCE '.'
                        ;
                        
    EXPONENT_PART   :   'e' SIGN ? DIGIT_SEQUENCE
                    |   'E' SIGN ? DIGIT_SEQUENCE
                    ;
    fragment                
    SIGN    :   '+'
            |   '-'
            ;
            
    fragment        
    DIGIT_SEQUENCE  :   DIGIT +
                    ;
    fragment       
    HEXADECIMAL_FLOATING_CONSTANT   :   HEXADECIMAL_PREFIX HEXADECIMAL_FRACTIONAL_CONSTANT BINARY_EXPONENT_PART FLOATING_SUFFIX ?
                                    |   HEXADECIMAL_PREFIX HEXADECIMAL_DIGIT_SEQUENCE BINARY_EXPONENT_PART FLOATING_SUFFIX ?
                                    ;
                                    
    HEXADECIMAL_FRACTIONAL_CONSTANT :   HEXADECIMAL_DIGIT_SEQUENCE ? '.' HEXADECIMAL_DIGIT_SEQUENCE
                                    |   HEXADECIMAL_DIGIT_SEQUENCE '.'
                                    ;
            
    BINARY_EXPONENT_PART    :   'p' SIGN ? DIGIT_SEQUENCE
                            |   'P' SIGN ? DIGIT_SEQUENCE
                            ;
                            
    HEXADECIMAL_DIGIT_SEQUENCE  :   HEXADECIMAL_DIGIT +
                                ;
                                
    fragment                            
    FLOATING_SUFFIX :   'f'
                    |   'l'
                    |   'F'
                    |   'L'
                    ;

    //fragment       
    //ENUMERATION_CONSTANT    :   IDENTIFIER
    //                        ;
     
    fragment  
    CHARACTER_CONSTANT  :   '\'' C_CHAR_SEQUENCE '\''
                        |   'L\'' C_CHAR_SEQUENCE '\''
                        ;
    fragment                   
    C_CHAR_SEQUENCE :   C_CHAR +
                    ;
    fragment        
    C_CHAR  :   ~('\'' | '\\' )
            |   ESCAPE_SEQUENCE
            ;
            
    ESCAPE_SEQUENCE :   SIMPLE_ESCAPE_SEQUENCE
                    |   OCTAL_ESCAPE_SEQUENCE
                    |   HEXADECIMAL_ESCAPE_SEQUENCE
                    |   UNIVERSAL_CHARACTER_NAME
                    ;
                    
    fragment                
    SIMPLE_ESCAPE_SEQUENCE  :   '\\' ( '\'' | '"' | '?' | '\\' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v'  )
                            ;
    fragment
    OCTAL_ESCAPE_SEQUENCE   :  '\\'  OCTAL_DIGIT                  //  OCTAL_DIGIT OCTAL_DIGIT ?  OCTAL_DIGIT ?
                            |  '\\'  OCTAL_DIGIT OCTAL_DIGIT
                            |  ('\\' OCTAL_DIGIT OCTAL_DIGIT  OCTAL_DIGIT )=> '\\'  OCTAL_DIGIT OCTAL_DIGIT  OCTAL_DIGIT
                            ;
       
    fragment        
    HEXADECIMAL_ESCAPE_SEQUENCE :   '\\x' HEXADECIMAL_DIGIT +
                                ;
     
    STRING_LITERAL  :   '"'  S_CHAR_SEQUENCE ? '"'
                    |   'L"' S_CHAR_SEQUENCE ? '"'
                    ;
     
    fragment                
    S_CHAR_SEQUENCE :  S_CHAR +  
                    ;

    fragment
    S_CHAR  :    ~('"' | '\\' )    
            |    ESCAPE_SEQUENCE
            ;


    SINGLELINECOMMENT   :    '//' (~('\n'|'\r'))* ('\n'|'\r'('\n')?)? {$channel=HIDDEN;}
                        ;
                        
                        
    MULTILINECOMMENT  : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
                      ;
          
                        
    WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
        ;
        
        
        
        
            
                
                

            

  • 相关阅读:
    SELECT IDENT_CURRENT(tableName)和自增长列的纠结
    [置顶]c# 设计模式(1)一 创建型
    我们互联网生活因家庭服务器改变
    互联网创业不妨先放下平台梦
    影响未来的应用ifttt,互联网自主神经系统的又一个有力证据
    什么是ifttt,ifttt怎么玩? ifttt操作体验具体步骤
    杰出企业家的20个好习惯
    折叠分组表格中重用Cell导致的问题
    使用AChartEngine画折线图
    MSSQL获取当前插入的ID号及在高并发的时候处理方式
  • 原文地址:https://www.cnblogs.com/quixotic/p/2269495.html
Copyright © 2011-2022 走看看