zoukankan      html  css  js  c++  java
  • 自制编译器 青木峰郎 笔记 Ch8 AST生成

    8.1 表达式的抽象语法树

    Literal

    Type表示类型的定义,如struct pr{int first; int second;},TypeRef则是类型的名称。struct pr;
    定义TypeRef后,在类型定义之前就能编写用到了该类型的代码。

    // #@@range/primary{
    ExprNode primary():
    {
        Token t;
        ExprNode n;
    }
    {
          t=<INTEGER>
            {
                return integerNode(location(t), t.image);
            }
        | t=<CHARACTER>
            {
                return new IntegerLiteralNode(location(t),
                                              IntegerTypeRef.charRef(),
                                              characterCode(t.image));
            }
        | t=<STRING>
            {
                return new StringLiteralNode(location(t),
                    new PointerTypeRef(IntegerTypeRef.charRef()),
                    stringValue(t.image));
            }
        | t=<IDENTIFIER>
            {
                return new VariableNode(location(t), t.image);
            }
        | "(" n=expr() ")"
            {
                return n;
            }
    }
    // #@@}
    

    Unary

    
    // #@@range/unary{
    ExprNode unary():
    {
        ExprNode n;
        TypeNode t;
    }
    {
          "++" n=unary()    { return new PrefixOpNode("++", n); }
        | "--" n=unary()    { return new PrefixOpNode("--", n); }
        | "+" n=term()      { return new UnaryOpNode("+", n); }
        | "-" n=term()      { return new UnaryOpNode("-", n); }
        | "!" n=term()      { return new UnaryOpNode("!", n); }
        | "~" n=term()      { return new UnaryOpNode("~", n); }
        | "*" n=term()      { return new DereferenceNode(n); }
        | "&" n=term()      { return new AddressNode(n); }
        | LOOKAHEAD(3) <SIZEOF> "(" t=type() ")"
            {
                return new SizeofTypeNode(t, size_t());
            }
        | <SIZEOF> n=unary()
            {
                return new SizeofExprNode(n, size_t());
            }
        | n=postfix()       { return n; }
    }
    // #@@}
    
    

    注意sizeof也是unary的。

    // #@@range/term{
    ExprNode term():
    {
        TypeNode t;
        ExprNode n;
    }
    {
          LOOKAHEAD("(" type())
          "(" t=type() ")" n=term()     { return new CastNode(t, n); }
        | n=unary()                     { return n; }
    }
    // #@@}
    

    类型转换是一元的。term是unary运算或者是类型转化。

    
    // #@@range/postfix{
    ExprNode postfix():
    {
        ExprNode expr, idx;
        String memb;
        List<ExprNode> args;
    }
    {
        expr=primary()
        ( "++"                  { expr = new SuffixOpNode("++", expr); }
        | "--"                  { expr = new SuffixOpNode("--", expr); }
        | "[" idx=expr() "]"    { expr = new ArefNode(expr, idx); }
        | "." memb=name()       { expr = new MemberNode(expr, memb); }
        | "->" memb=name()      { expr = new PtrMemberNode(expr, memb); }
        | "(" args=args() ")"   { expr = new FuncallNode(expr, args); }
        )*
            {
                return expr;
            }
    }
    // #@@}
    

    后缀运算符可以叠加

    二元运算符

    // #@@range/expr2{
    ExprNode expr2():
    { ExprNode l, r; }
    {
        l=expr1() ( "+" r=expr1() { l = new BinaryOpNode(l, "+", r); }
                  | "-" r=expr1() { l = new BinaryOpNode(l, "-", r); }
                  )*
            {
                return l;
            }
    }
    // #@@}
    
    // #@@range/expr1{
    ExprNode expr1():
    { ExprNode l, r; }
    {
        l=term() ( "*" r=term() { l = new BinaryOpNode(l, "*", r); }
                 | "/" r=term() { l = new BinaryOpNode(l, "/", r); }
                 | "%" r=term() { l = new BinaryOpNode(l, "%", r); }
                 )*
            {
                return l;
            }
    }
    // #@@}
    

    赋值表达式

    二元运算符有无结合,左结合和右结合之分,cbc中只有赋值运算符=是右结合的。而无结合运算符比如不允许x OP y OP z,例如xy==z这样的运算出现。
    一般来说,以下两种方法都能用于写可结合的二元运算符。

    expr1 ("+" expr1())*
    

    或者

    expr1 :{}
    {
          term() "=" expr()
    }
    

    左结合的操作符左子树可以形成新的表达式,所以用expr1 ("+" expr1())*这种方式比较好。同理,右结合用第二种方式比较好。

    8.2 语句的AST

    
    // #@@range/if_stmt{
    IfNode if_stmt():
    {
        Token t;
        ExprNode cond;
        StmtNode thenBody, elseBody = null;
    }
    {
        t=<IF> "(" cond=expr() ")" thenBody=stmt()
                [LOOKAHEAD(1) <ELSE> elseBody=stmt()]
            {
                return new IfNode(location(t), cond, thenBody, elseBody);
            }
    }
    // #@@}
    
    // #@@range/while_stmt{
    WhileNode while_stmt():
    {
        Token t;
        ExprNode cond;
        StmtNode body;
    }
    {
        t=<WHILE> "(" cond=expr() ")" body=stmt()
            {
                return new WhileNode(location(t), cond, body);
            }
    }
    // #@@}
    
    DoWhileNode dowhile_stmt():
    {
        Token t;
        ExprNode cond;
        StmtNode body;
    }
    {
        t=<DO> body=stmt() <WHILE> "(" cond=expr() ")" ";"
            {
                return new DoWhileNode(location(t), body, cond);
            }
    }
    
    ForNode for_stmt():
    {
        Token t;
        ExprNode init = null, cond = null, incr = null;
        StmtNode body;
    }
    {
        t=<FOR> "(" [init=expr()] ";"
                  [cond=expr()] ";"
                  [incr=expr()] ")" body=stmt()
            {
                return new ForNode(location(t), init, cond, incr, body);
            }
    }
    
    
    

    AST示例

    root@cf43f429204e:/# cat if_test.cb
    import stdio;
    int main(int argc, char **argv)
    {
       if(1) if (2) { puts("OK"); }
    else if(3) {puts("NO");}
    else{puts("ERR");}
        return 0;}
    
    root@cf43f429204e:/# cbc --dump-ast if_test.cb
    <<AST>> (if_test.cb:1)
    variables:
    functions:
        <<DefinedFunction>> (if_test.cb:2)
        name: "main"
        isPrivate: false
        params:
            parameters:
                <<CBCParameter>> (if_test.cb:2)
                name: "argc"
                typeNode: int
                <<CBCParameter>> (if_test.cb:2)
                name: "argv"
                typeNode: char**
        body:
            <<BlockNode>> (if_test.cb:3)
            variables:
            stmts:
                <<IfNode>> (if_test.cb:4)
                cond:
                    <<IntegerLiteralNode>> (if_test.cb:4)
                    typeNode: int
                    value: 1
                thenBody:
                    <<IfNode>> (if_test.cb:4)
                    cond:
                        <<IntegerLiteralNode>> (if_test.cb:4)
                        typeNode: int
                        value: 2
                    thenBody:
                        <<BlockNode>> (if_test.cb:4)
                        variables:
                        stmts:
                            <<ExprStmtNode>> (if_test.cb:4)
                            expr:
                                <<FuncallNode>> (if_test.cb:4)
                                expr:
                                    <<VariableNode>> (if_test.cb:4)
                                    name: "puts"
                                args:
                                    <<StringLiteralNode>> (if_test.cb:4)
                                    value: "OK"
                    elseBody:
                        <<IfNode>> (if_test.cb:5)
                        cond:
                            <<IntegerLiteralNode>> (if_test.cb:5)
                            typeNode: int
                            value: 3
                        thenBody:
                            <<BlockNode>> (if_test.cb:5)
                            variables:
                            stmts:
                                <<ExprStmtNode>> (if_test.cb:5)
                                expr:
                                    <<FuncallNode>> (if_test.cb:5)
                                    expr:
                                        <<VariableNode>> (if_test.cb:5)
                                        name: "puts"
                                    args:
                                        <<StringLiteralNode>> (if_test.cb:5)
                                        value: "NO"
                        elseBody:
                            <<BlockNode>> (if_test.cb:6)
                            variables:
                            stmts:
                                <<ExprStmtNode>> (if_test.cb:6)
                                expr:
                                    <<FuncallNode>> (if_test.cb:6)
                                    expr:
                                        <<VariableNode>> (if_test.cb:6)
                                        name: "puts"
                                    args:
                                        <<StringLiteralNode>> (if_test.cb:6)
                                        value: "ERR"
                elseBody: null
                <<ReturnNode>> (if_test.cb:7)
                expr:
                    <<IntegerLiteralNode>> (if_test.cb:7)
                    typeNode: int
                    value: 0
    
    

    stmts, block

    
    // #@@range/stmts{
    List<StmtNode> stmts():
    {
        List<StmtNode> ss = new ArrayList<StmtNode>();
        StmtNode s;
    }
    {
        (s=stmt() { if (s != null) ss.add(s); })*
            {
                return ss;
            }
    }
    // #@@}
    
    // #@@range/stmt{
    StmtNode stmt():
    {
        StmtNode n = null;
        ExprNode e = null;
    }
    {
        ( ";"
        | LOOKAHEAD(2) n=labeled_stmt()
        | e=expr() ";" { n = new ExprStmtNode(e.location(), e); }
        | n=block()
        | n=if_stmt()
        | n=while_stmt()
        | n=dowhile_stmt()
        | n=for_stmt()
        | n=switch_stmt()
        | n=break_stmt()
        | n=continue_stmt()
        | n=goto_stmt()
        | n=return_stmt()
        )
            {
                return n;
            }
    }
    // #@@}
    // #@@range/block{
    BlockNode block():
    {
        Token t;
        List<DefinedVariable> vars;
        List<StmtNode> stmts;
    }
    {
        t="{" vars=defvar_list() stmts=stmts() "}"
            {
                return new BlockNode(location(t), vars, stmts);
            }
    }
    // #@@}
    

    8.3 声明的AST

    声明变量

    // #@@range/defvar_list{
    List<DefinedVariable> defvar_list():
    {
        List<DefinedVariable> result = new ArrayList<DefinedVariable>();
        List<DefinedVariable> vars;
    }
    {
        ( vars=defvars() { result.addAll(vars); } )*
            {
                return result;
            }
    }
    // #@@}
    
    
    // #@@range/defvars{
    List<DefinedVariable> defvars():
    {
        List<DefinedVariable> defs = new ArrayList<DefinedVariable>();
        boolean priv;
        TypeNode type;
        String name;
        ExprNode init = null;
    }
    {
        priv=storage() type=type() name=name() ["=" init=expr()]
            {
                defs.add(new DefinedVariable(priv, type, name, init));
                init = null;
            }
        ( "," name=name() ["=" init=expr()]
            {
                defs.add(new DefinedVariable(priv, type, name, init));
                init = null;
            }
        )* ";"
            {
                return defs;
            }
    }
    // #@@}
    

    声明函数

    
    // #@@range/defun{
    DefinedFunction defun():
    {
        boolean priv;
        TypeRef ret;
        String n;
        Params ps;
        BlockNode body;
    }
    {
        priv=storage() ret=typeref() n=name() "(" ps=params() ")" body=block()
            {
                TypeRef t = new FunctionTypeRef(ret, ps.parametersTypeRef());
                return new DefinedFunction(priv, new TypeNode(t), n, ps, body);
            }
    }
    // #@@}
    
    // #@@range/storage{
    boolean storage():
    { Token t = null; }
    {
        [t=<STATIC>] { return (t == null ? false : true); }
    }
    // #@@}
    
    // #@@range/storage{
    Params params():
    {
        Token t;
        Params params;
    }
    {
          LOOKAHEAD(<VOID> ")")
          t=<VOID>
            {
                return new Params(location(t), new ArrayList<CBCParameter>());
            }
        | params=fixedparams()
                ["," "..." { params.acceptVarargs(); }]
            {
                return params;
            }
    }
    // #@@}
    
    // #@@range/fixedparams{
    Params fixedparams():
    {
        List<CBCParameter> params = new ArrayList<CBCParameter>();
        CBCParameter param, param1;
    }
    {
        param1=param() { params.add(param1); }
        ( LOOKAHEAD(2) "," param=param() { params.add(param); } )*
            {
                return new Params(param1.location(), params);
            }
    }
    // #@@}
    
    // #@@range/param{
    CBCParameter param():
    {
        TypeNode t;
        String n;
    }
    {
        t=type() n=name() { return new CBCParameter(t, n); }
    }
    // #@@}
    
    // #@@range/block{
    BlockNode block():
    {
        Token t;
        List<DefinedVariable> vars;
        List<StmtNode> stmts;
    }
    {
        t="{" vars=defvar_list() stmts=stmts() "}"
            {
                return new BlockNode(location(t), vars, stmts);
            }
    }
    // #@@}
    

    声明列表本身

    
    // #@@range/top_defs{
    Declarations top_defs():
    {
        Declarations decls = new Declarations();
        DefinedFunction defun;
        List<DefinedVariable> defvars;
        Constant defconst;
        StructNode defstruct;
        UnionNode defunion;
        TypedefNode typedef;
    }
    {
        ( LOOKAHEAD(storage() typeref() <IDENTIFIER> "(")
          defun=defun()         { decls.addDefun(defun); }
        | LOOKAHEAD(3)
          defvars=defvars()     { decls.addDefvars(defvars); }
        | defconst=defconst()   { decls.addConstant(defconst); }
        | defstruct=defstruct() { decls.addDefstruct(defstruct); }
        | defunion=defunion()   { decls.addDefunion(defunion); }
        | typedef=typedef()     { decls.addTypedef(typedef); }
        )*
            {
                return decls;
            }
    }
    // #@@}
    

    import

    // #@@range/import_stmts{
    Declarations import_stmts():
    {
        String libid;
        Declarations impdecls = new Declarations();
    }
    {
        (libid=import_stmt()
            {
                try {
                    Declarations decls = loader.loadLibrary(libid, errorHandler);
                    if (decls != null) {
                        impdecls.add(decls);
                        addKnownTypedefs(decls.typedefs());
                    }
                }
                catch (CompileException ex) {
                    throw new ParseException(ex.getMessage());
                }
            }
        )*
            {
                return impdecls;
            }
    }
    // #@@}
    
    // #@@range/import_stmt{
    String import_stmt():
    {
        StringBuffer buf = new StringBuffer();
        String n;
    }
    {
        <IMPORT> n=name()   { buf.append(n); }
        ("." n=name()       { buf.append("."); buf.append(n); } )*
        ";"
            {
                return buf.toString();
            }
    }
    // #@@}
    

    8.4 cbc解析器启动

    1. --deug-parser或者enable_tracing能够输出log
    2. 注意javaCC可能会抛出LookaheadSuccess异常
  • 相关阅读:
    php下 MVC实现的基本思路
    Apache 文件路径中“/”和“\”的问题
    PHP isset 函数作用
    适用于 php5.2 的 php.ini 中文版
    fedora17 用hostapd搭建无线wifi
    从看雪的一个沙箱代码中扣出的InlineHook代码
    添加psapi.h头文件之前要先添加Windows.h
    virtualbox中的window xp如何共享linux主机的文件
    Linux驱动开发之LDD3中第三章scull注释详解【转】
    Linux内核代码 结构体初始化【转】
  • 原文地址:https://www.cnblogs.com/xuesu/p/14380329.html
Copyright © 2011-2022 走看看