#请先安装Ply
# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------
#Author:Jmdebugger
#email: pengkailb@gmail.com
#date: 2013-9-17
#--------------------------------------------------------------------------
import ply.lex as lex
tokens = [
"TOKEN_IDENT",
"TOKEN_INT",
"TOKEN_FLOAT",
"TOKEN_STRING",
"TOKEN_OP",
"TOKEN_DELIM_COMMA", #,
"TOKEN_DELIM_OPEN_PAREN", #(
"TOKEN_DELIM_CLOSE_PAREN", #)
"TOKEN_DELIM_OPEN_BRACKET", #[
"TOKEN_DELIM_CLOSE_BRACKET",#]
"TOKEN_DELIM_OPEN_BRACE", #{
"TOKEN_DELIM_CLOSE_BRACE", #}
"TOKEN_DELIM_SEMICOLON" #;
]
reserved = {
'if' : 'TOKEN_RSRVD_IF',
'else' : 'TOKEN_RSRVD_ELSE',
'true' : 'TOKEN_RSRVD_TRUE',
'false' : 'TOKEN_RSRVD_FALSE',
'while' : 'TOKEN_RSRVD_WHILE',
'break' : 'TOKEN_RSRVD_BREAK',
'continue': 'TOKEN_RSRVD_CONTINUE',
'goto' : 'TOKEN_RSRVD_GOTO',
'func' : 'TOKEN_RSRVD_FUNC',
'var' : 'TOKEN_RSRVD_VAR',
'for' : 'TOKEN_RSRVD_FOR',
'return' : 'TOKEN_RSRVD_RETURN'
}
tokens += reserved .values()
t_ignore = r'
'
def t_COMMENT(t):
r'(/*(.|
)*?*/)|(//.*)'
pass
def t_newline(t):
r'
+'
t.lexer.lineno += len(t.value)
def t_error(t):
print "LaunchScript error: "+repr(t.value)
def t_TOKEN_IDENT(t):
r'[a-zA-Z_][a-zA-Z_0-9]*' #标识符
t.type = reserved.get(t.value , 'TOKEN_IDENT')
return t
def t_TOKEN_INT(t):
r'(0x[a-fA-F0-9]+)|([0-9]+)'
return t
t_TOKEN_FLOAT = r'[0-9]*.[0-9]+'
t_TOKEN_STRING = r'("([^\
]|(\.))*?")' #|("([^\
]|(\.))*?")' only for windows
t_TOKEN_DELIM_COMMA = r'\,'
t_TOKEN_DELIM_OPEN_PAREN = r'('
t_TOKEN_DELIM_CLOSE_PAREN = r')'
t_TOKEN_DELIM_OPEN_BRACKET = r'['
t_TOKEN_DELIM_CLOSE_BRACKET = r']'
t_TOKEN_DELIM_OPEN_BRACE = r'{'
t_TOKEN_DELIM_CLOSE_BRACE = r'}'
t_TOKEN_DELIM_SEMICOLON = r';'
def t_TOKEN_OP(t):
r'(<<=)|(>>=)|([+-*/\%&|^=!><]=)|(||)|(&&)|(++)|(--)|[+-*/\%^=&|><!~]'
return t
if __name__ == "__main__":
lexer = lex.lex()
f = open("./test.txt" , 'rb')
data = f.read()
f.close()
lexer.input(data)
while True:
tok = lexer.token()
if not tok: break # No more input
print tok.value+" ----> "+tok.type