zoukankan      html  css  js  c++  java
  • python实现词法分析

    #请先安装Ply
    # -*- coding: utf-8 -*-
    #--------------------------------------------------------------------------
    #Author:Jmdebugger
    #email: pengkailb@gmail.com
    #date: 2013-9-17
    #--------------------------------------------------------------------------
    import ply.lex as lex
    
    tokens = [
        "TOKEN_IDENT",
        "TOKEN_INT",
        "TOKEN_FLOAT",
        "TOKEN_STRING",
        "TOKEN_OP",
        "TOKEN_DELIM_COMMA",        #,
        "TOKEN_DELIM_OPEN_PAREN",   #(
        "TOKEN_DELIM_CLOSE_PAREN",  #)
        "TOKEN_DELIM_OPEN_BRACKET", #[
        "TOKEN_DELIM_CLOSE_BRACKET",#]
        "TOKEN_DELIM_OPEN_BRACE",   #{
        "TOKEN_DELIM_CLOSE_BRACE",  #}
        "TOKEN_DELIM_SEMICOLON"    #;
        
        ]
    
    reserved = {
       'if'     :   'TOKEN_RSRVD_IF',
       'else'   :   'TOKEN_RSRVD_ELSE',
       'true'   :   'TOKEN_RSRVD_TRUE',
       'false'  :   'TOKEN_RSRVD_FALSE',
       'while'  :   'TOKEN_RSRVD_WHILE',
       'break'  :   'TOKEN_RSRVD_BREAK',
       'continue':  'TOKEN_RSRVD_CONTINUE',
       'goto'   :   'TOKEN_RSRVD_GOTO',
       'func'   :   'TOKEN_RSRVD_FUNC',
       'var'    :   'TOKEN_RSRVD_VAR',
       'for'    :   'TOKEN_RSRVD_FOR',
       'return' :   'TOKEN_RSRVD_RETURN'
    }
    
    tokens += reserved .values()
    
    
    t_ignore = r' 	
    '
    def t_COMMENT(t):
        r'(/*(.|
    )*?*/)|(//.*)'
        pass
    
    def t_newline(t):
        r'
    +'
        t.lexer.lineno += len(t.value)
        
    def t_error(t):
        print "LaunchScript error: "+repr(t.value)
        
    def t_TOKEN_IDENT(t):
        r'[a-zA-Z_][a-zA-Z_0-9]*' #标识符
        t.type = reserved.get(t.value , 'TOKEN_IDENT')
        return t
    
    def t_TOKEN_INT(t):
        r'(0x[a-fA-F0-9]+)|([0-9]+)'
        return t
    
    t_TOKEN_FLOAT =  r'[0-9]*.[0-9]+'
    t_TOKEN_STRING = r'("([^\
    ]|(\.))*?")' #|("([^\
    ]|(\.))*?")' only for windows
    t_TOKEN_DELIM_COMMA = r'\,'
    t_TOKEN_DELIM_OPEN_PAREN = r'('
    t_TOKEN_DELIM_CLOSE_PAREN = r')'
    t_TOKEN_DELIM_OPEN_BRACKET = r'['
    t_TOKEN_DELIM_CLOSE_BRACKET = r']'
    t_TOKEN_DELIM_OPEN_BRACE = r'{'
    t_TOKEN_DELIM_CLOSE_BRACE = r'}'
    t_TOKEN_DELIM_SEMICOLON = r';'
    
    def t_TOKEN_OP(t):
        r'(<<=)|(>>=)|([+-*/\%&|^=!><]=)|(||)|(&&)|(++)|(--)|[+-*/\%^=&|><!~]'
        return t
    
    
    
        
    if __name__ == "__main__":
        lexer = lex.lex()
        f = open("./test.txt" , 'rb')
        data = f.read()
        f.close()
        lexer.input(data)
    
        while True:
            tok = lexer.token()
            if not tok: break      # No more input
            print tok.value+"	---->	"+tok.type
    
        
    


  • 相关阅读:
    meta标签总结
    基本类型String的原生方法详解
    对JSON的增删查改
    百分比宽度div如何水平居中
    【转】Chrome 控制台console的用法(提高js调试能力)
    css textarea固定大小滚动条自动
    【转】前端必读:浏览器内部工作原理
    git clone 远程分支
    http-server 使用介绍
    js 全选/取消
  • 原文地址:https://www.cnblogs.com/james1207/p/3328938.html
Copyright © 2011-2022 走看看