8.1 表达式的抽象语法树
Literal
Type表示类型的定义,如struct pr{int first; int second;},TypeRef则是类型的名称。struct pr;
定义TypeRef后,在类型定义之前就能编写用到了该类型的代码。
// #@@range/primary{
ExprNode primary():
{
Token t;
ExprNode n;
}
{
t=<INTEGER>
{
return integerNode(location(t), t.image);
}
| t=<CHARACTER>
{
return new IntegerLiteralNode(location(t),
IntegerTypeRef.charRef(),
characterCode(t.image));
}
| t=<STRING>
{
return new StringLiteralNode(location(t),
new PointerTypeRef(IntegerTypeRef.charRef()),
stringValue(t.image));
}
| t=<IDENTIFIER>
{
return new VariableNode(location(t), t.image);
}
| "(" n=expr() ")"
{
return n;
}
}
// #@@}
Unary
// #@@range/unary{
ExprNode unary():
{
ExprNode n;
TypeNode t;
}
{
"++" n=unary() { return new PrefixOpNode("++", n); }
| "--" n=unary() { return new PrefixOpNode("--", n); }
| "+" n=term() { return new UnaryOpNode("+", n); }
| "-" n=term() { return new UnaryOpNode("-", n); }
| "!" n=term() { return new UnaryOpNode("!", n); }
| "~" n=term() { return new UnaryOpNode("~", n); }
| "*" n=term() { return new DereferenceNode(n); }
| "&" n=term() { return new AddressNode(n); }
| LOOKAHEAD(3) <SIZEOF> "(" t=type() ")"
{
return new SizeofTypeNode(t, size_t());
}
| <SIZEOF> n=unary()
{
return new SizeofExprNode(n, size_t());
}
| n=postfix() { return n; }
}
// #@@}
注意sizeof也是unary的。
// #@@range/term{
ExprNode term():
{
TypeNode t;
ExprNode n;
}
{
LOOKAHEAD("(" type())
"(" t=type() ")" n=term() { return new CastNode(t, n); }
| n=unary() { return n; }
}
// #@@}
类型转换是一元的。term是unary运算或者是类型转化。
// #@@range/postfix{
ExprNode postfix():
{
ExprNode expr, idx;
String memb;
List<ExprNode> args;
}
{
expr=primary()
( "++" { expr = new SuffixOpNode("++", expr); }
| "--" { expr = new SuffixOpNode("--", expr); }
| "[" idx=expr() "]" { expr = new ArefNode(expr, idx); }
| "." memb=name() { expr = new MemberNode(expr, memb); }
| "->" memb=name() { expr = new PtrMemberNode(expr, memb); }
| "(" args=args() ")" { expr = new FuncallNode(expr, args); }
)*
{
return expr;
}
}
// #@@}
后缀运算符可以叠加
二元运算符
// #@@range/expr2{
ExprNode expr2():
{ ExprNode l, r; }
{
l=expr1() ( "+" r=expr1() { l = new BinaryOpNode(l, "+", r); }
| "-" r=expr1() { l = new BinaryOpNode(l, "-", r); }
)*
{
return l;
}
}
// #@@}
// #@@range/expr1{
ExprNode expr1():
{ ExprNode l, r; }
{
l=term() ( "*" r=term() { l = new BinaryOpNode(l, "*", r); }
| "/" r=term() { l = new BinaryOpNode(l, "/", r); }
| "%" r=term() { l = new BinaryOpNode(l, "%", r); }
)*
{
return l;
}
}
// #@@}
赋值表达式
二元运算符有无结合,左结合和右结合之分,cbc中只有赋值运算符=是右结合的。而无结合运算符比如不允许x OP y OP z,例如xy==z这样的运算出现。
一般来说,以下两种方法都能用于写可结合的二元运算符。
expr1 ("+" expr1())*
或者
expr1 :{}
{
term() "=" expr()
}
左结合的操作符左子树可以形成新的表达式,所以用expr1 ("+" expr1())*这种方式比较好。同理,右结合用第二种方式比较好。
8.2 语句的AST
// #@@range/if_stmt{
IfNode if_stmt():
{
Token t;
ExprNode cond;
StmtNode thenBody, elseBody = null;
}
{
t=<IF> "(" cond=expr() ")" thenBody=stmt()
[LOOKAHEAD(1) <ELSE> elseBody=stmt()]
{
return new IfNode(location(t), cond, thenBody, elseBody);
}
}
// #@@}
// #@@range/while_stmt{
WhileNode while_stmt():
{
Token t;
ExprNode cond;
StmtNode body;
}
{
t=<WHILE> "(" cond=expr() ")" body=stmt()
{
return new WhileNode(location(t), cond, body);
}
}
// #@@}
DoWhileNode dowhile_stmt():
{
Token t;
ExprNode cond;
StmtNode body;
}
{
t=<DO> body=stmt() <WHILE> "(" cond=expr() ")" ";"
{
return new DoWhileNode(location(t), body, cond);
}
}
ForNode for_stmt():
{
Token t;
ExprNode init = null, cond = null, incr = null;
StmtNode body;
}
{
t=<FOR> "(" [init=expr()] ";"
[cond=expr()] ";"
[incr=expr()] ")" body=stmt()
{
return new ForNode(location(t), init, cond, incr, body);
}
}
AST示例
root@cf43f429204e:/# cat if_test.cb
import stdio;
int main(int argc, char **argv)
{
if(1) if (2) { puts("OK"); }
else if(3) {puts("NO");}
else{puts("ERR");}
return 0;}
root@cf43f429204e:/# cbc --dump-ast if_test.cb
<<AST>> (if_test.cb:1)
variables:
functions:
<<DefinedFunction>> (if_test.cb:2)
name: "main"
isPrivate: false
params:
parameters:
<<CBCParameter>> (if_test.cb:2)
name: "argc"
typeNode: int
<<CBCParameter>> (if_test.cb:2)
name: "argv"
typeNode: char**
body:
<<BlockNode>> (if_test.cb:3)
variables:
stmts:
<<IfNode>> (if_test.cb:4)
cond:
<<IntegerLiteralNode>> (if_test.cb:4)
typeNode: int
value: 1
thenBody:
<<IfNode>> (if_test.cb:4)
cond:
<<IntegerLiteralNode>> (if_test.cb:4)
typeNode: int
value: 2
thenBody:
<<BlockNode>> (if_test.cb:4)
variables:
stmts:
<<ExprStmtNode>> (if_test.cb:4)
expr:
<<FuncallNode>> (if_test.cb:4)
expr:
<<VariableNode>> (if_test.cb:4)
name: "puts"
args:
<<StringLiteralNode>> (if_test.cb:4)
value: "OK"
elseBody:
<<IfNode>> (if_test.cb:5)
cond:
<<IntegerLiteralNode>> (if_test.cb:5)
typeNode: int
value: 3
thenBody:
<<BlockNode>> (if_test.cb:5)
variables:
stmts:
<<ExprStmtNode>> (if_test.cb:5)
expr:
<<FuncallNode>> (if_test.cb:5)
expr:
<<VariableNode>> (if_test.cb:5)
name: "puts"
args:
<<StringLiteralNode>> (if_test.cb:5)
value: "NO"
elseBody:
<<BlockNode>> (if_test.cb:6)
variables:
stmts:
<<ExprStmtNode>> (if_test.cb:6)
expr:
<<FuncallNode>> (if_test.cb:6)
expr:
<<VariableNode>> (if_test.cb:6)
name: "puts"
args:
<<StringLiteralNode>> (if_test.cb:6)
value: "ERR"
elseBody: null
<<ReturnNode>> (if_test.cb:7)
expr:
<<IntegerLiteralNode>> (if_test.cb:7)
typeNode: int
value: 0
stmts, block
// #@@range/stmts{
List<StmtNode> stmts():
{
List<StmtNode> ss = new ArrayList<StmtNode>();
StmtNode s;
}
{
(s=stmt() { if (s != null) ss.add(s); })*
{
return ss;
}
}
// #@@}
// #@@range/stmt{
StmtNode stmt():
{
StmtNode n = null;
ExprNode e = null;
}
{
( ";"
| LOOKAHEAD(2) n=labeled_stmt()
| e=expr() ";" { n = new ExprStmtNode(e.location(), e); }
| n=block()
| n=if_stmt()
| n=while_stmt()
| n=dowhile_stmt()
| n=for_stmt()
| n=switch_stmt()
| n=break_stmt()
| n=continue_stmt()
| n=goto_stmt()
| n=return_stmt()
)
{
return n;
}
}
// #@@}
// #@@range/block{
BlockNode block():
{
Token t;
List<DefinedVariable> vars;
List<StmtNode> stmts;
}
{
t="{" vars=defvar_list() stmts=stmts() "}"
{
return new BlockNode(location(t), vars, stmts);
}
}
// #@@}
8.3 声明的AST
声明变量
// #@@range/defvar_list{
List<DefinedVariable> defvar_list():
{
List<DefinedVariable> result = new ArrayList<DefinedVariable>();
List<DefinedVariable> vars;
}
{
( vars=defvars() { result.addAll(vars); } )*
{
return result;
}
}
// #@@}
// #@@range/defvars{
List<DefinedVariable> defvars():
{
List<DefinedVariable> defs = new ArrayList<DefinedVariable>();
boolean priv;
TypeNode type;
String name;
ExprNode init = null;
}
{
priv=storage() type=type() name=name() ["=" init=expr()]
{
defs.add(new DefinedVariable(priv, type, name, init));
init = null;
}
( "," name=name() ["=" init=expr()]
{
defs.add(new DefinedVariable(priv, type, name, init));
init = null;
}
)* ";"
{
return defs;
}
}
// #@@}
声明函数
// #@@range/defun{
DefinedFunction defun():
{
boolean priv;
TypeRef ret;
String n;
Params ps;
BlockNode body;
}
{
priv=storage() ret=typeref() n=name() "(" ps=params() ")" body=block()
{
TypeRef t = new FunctionTypeRef(ret, ps.parametersTypeRef());
return new DefinedFunction(priv, new TypeNode(t), n, ps, body);
}
}
// #@@}
// #@@range/storage{
boolean storage():
{ Token t = null; }
{
[t=<STATIC>] { return (t == null ? false : true); }
}
// #@@}
// #@@range/storage{
Params params():
{
Token t;
Params params;
}
{
LOOKAHEAD(<VOID> ")")
t=<VOID>
{
return new Params(location(t), new ArrayList<CBCParameter>());
}
| params=fixedparams()
["," "..." { params.acceptVarargs(); }]
{
return params;
}
}
// #@@}
// #@@range/fixedparams{
Params fixedparams():
{
List<CBCParameter> params = new ArrayList<CBCParameter>();
CBCParameter param, param1;
}
{
param1=param() { params.add(param1); }
( LOOKAHEAD(2) "," param=param() { params.add(param); } )*
{
return new Params(param1.location(), params);
}
}
// #@@}
// #@@range/param{
CBCParameter param():
{
TypeNode t;
String n;
}
{
t=type() n=name() { return new CBCParameter(t, n); }
}
// #@@}
// #@@range/block{
BlockNode block():
{
Token t;
List<DefinedVariable> vars;
List<StmtNode> stmts;
}
{
t="{" vars=defvar_list() stmts=stmts() "}"
{
return new BlockNode(location(t), vars, stmts);
}
}
// #@@}
声明列表本身
// #@@range/top_defs{
Declarations top_defs():
{
Declarations decls = new Declarations();
DefinedFunction defun;
List<DefinedVariable> defvars;
Constant defconst;
StructNode defstruct;
UnionNode defunion;
TypedefNode typedef;
}
{
( LOOKAHEAD(storage() typeref() <IDENTIFIER> "(")
defun=defun() { decls.addDefun(defun); }
| LOOKAHEAD(3)
defvars=defvars() { decls.addDefvars(defvars); }
| defconst=defconst() { decls.addConstant(defconst); }
| defstruct=defstruct() { decls.addDefstruct(defstruct); }
| defunion=defunion() { decls.addDefunion(defunion); }
| typedef=typedef() { decls.addTypedef(typedef); }
)*
{
return decls;
}
}
// #@@}
import
// #@@range/import_stmts{
Declarations import_stmts():
{
String libid;
Declarations impdecls = new Declarations();
}
{
(libid=import_stmt()
{
try {
Declarations decls = loader.loadLibrary(libid, errorHandler);
if (decls != null) {
impdecls.add(decls);
addKnownTypedefs(decls.typedefs());
}
}
catch (CompileException ex) {
throw new ParseException(ex.getMessage());
}
}
)*
{
return impdecls;
}
}
// #@@}
// #@@range/import_stmt{
String import_stmt():
{
StringBuffer buf = new StringBuffer();
String n;
}
{
<IMPORT> n=name() { buf.append(n); }
("." n=name() { buf.append("."); buf.append(n); } )*
";"
{
return buf.toString();
}
}
// #@@}
8.4 cbc解析器启动
- --deug-parser或者enable_tracing能够输出log
- 注意javaCC可能会抛出LookaheadSuccess异常