zoukankan      html  css  js  c++  java
  • angularjs-1.3代码学习-$parse

    这次我们来看一下angular的Sandboxing Angular Expressions。关于内置方法的,核心有两块:Lexer和Parser。其中大家对$parse可能更了解一点。好了不多废话,先看Lexer的内部结构:

    1.Lexer

    //构造函数
    var Lexer = function(options) {
      this.options = options;
    };
    //原型 
    Lexer.prototype = {
        constructor: Lexer,
        lex: function(){},
        is: function(){},
        peek: function(){ /* 返回表达式的下一个位置的数据,如果没有则返回false */ },
        isNumber: function(){ /* 判断当前表达式是否是一个数字 */ },
        isWhitespace: function(){/* 判断当前表达式是否是空格符 */},
        isIdent: function(){/* 判断当前表达式是否是英文字符(包含_和$) */},
        isExpOperator: function(){/* 判断当时表达式是否是-,+还是数字 */},
        throwError: function(){ /* 抛出异常 */},
        readNumber: function(){ /* 读取数字 */},
        readIdent: function(){ /* 读取字符 */},
        readString: function(){ /*读取携带''或""的字符串*/ }
    };

     这里指出一点,因为是表达式。所以类似"123"这类的东西,在Lexer看来应该算是数字而非字符串。表达式中的字符串必须使用单引号或者双引号来标识。Lexer的核心逻辑在lex方法中:

    lex: function(text) {
        this.text = text;
        this.index = 0;
        this.tokens = [];
    
        while (this.index < this.text.length) {
          var ch = this.text.charAt(this.index);
          if (ch === '"' || ch === "'") {
            /* 尝试判断是否是字符串 */
            this.readString(ch);
          } else if (this.isNumber(ch) || ch === '.' && this.isNumber(this.peek())) {
            /* 尝试判断是否是数字 */
            this.readNumber();
          } else if (this.isIdent(ch)) {
            /* 尝试判断是否是字母 */
            this.readIdent();
          } else if (this.is(ch, '(){}[].,;:?')) {
            /* 判断是否是(){}[].,;:? */
            this.tokens.push({index: this.index, text: ch});
            this.index++;
          } else if (this.isWhitespace(ch)) {
            /* 判断是否是空白符 */
            this.index++;
          } else {
            /* 尝试匹配操作运算 */
            var ch2 = ch + this.peek();
            var ch3 = ch2 + this.peek(2);
            var op1 = OPERATORS[ch];
            var op2 = OPERATORS[ch2];
            var op3 = OPERATORS[ch3];
            if (op1 || op2 || op3) {
              var token = op3 ? ch3 : (op2 ? ch2 : ch);
              this.tokens.push({index: this.index, text: token, operator: true});
              this.index += token.length;
            } else {
              this.throwError('Unexpected next character ', this.index, this.index + 1);
            }
          }
        }
        return this.tokens;
      }

    主要看一下匹配操作运算。这里源码中会调用OPERATORS。看一下OPERATORS:

    var OPERATORS = extend(createMap(), {
        '+':function(self, locals, a, b) {
          a=a(self, locals); b=b(self, locals);
          if (isDefined(a)) {
            if (isDefined(b)) {
              return a + b;
            }
            return a;
          }
          return isDefined(b) ? b : undefined;},
        '-':function(self, locals, a, b) {
              a=a(self, locals); b=b(self, locals);
              return (isDefined(a) ? a : 0) - (isDefined(b) ? b : 0);
            },
        '*':function(self, locals, a, b) {return a(self, locals) * b(self, locals);},
        '/':function(self, locals, a, b) {return a(self, locals) / b(self, locals);},
        '%':function(self, locals, a, b) {return a(self, locals) % b(self, locals);},
        '===':function(self, locals, a, b) {return a(self, locals) === b(self, locals);},
        '!==':function(self, locals, a, b) {return a(self, locals) !== b(self, locals);},
        '==':function(self, locals, a, b) {return a(self, locals) == b(self, locals);},
        '!=':function(self, locals, a, b) {return a(self, locals) != b(self, locals);},
        '<':function(self, locals, a, b) {return a(self, locals) < b(self, locals);},
        '>':function(self, locals, a, b) {return a(self, locals) > b(self, locals);},
        '<=':function(self, locals, a, b) {return a(self, locals) <= b(self, locals);},
        '>=':function(self, locals, a, b) {return a(self, locals) >= b(self, locals);},
        '&&':function(self, locals, a, b) {return a(self, locals) && b(self, locals);},
        '||':function(self, locals, a, b) {return a(self, locals) || b(self, locals);},
        '!':function(self, locals, a) {return !a(self, locals);},
    
        //Tokenized as operators but parsed as assignment/filters
        '=':true,
        '|':true
    });

    可以看到OPERATORS实际上存储的是操作符和操作符函数的键值对。根据操作符返回对应的操作符函数。我们看一下调用例子:

    var _l = new Lexer({});
    var a = _l.lex("a = a + 1");
    console.log(a);

     结合之前的lex方法,我们来回顾下代码执行过程:

    1.index指向'a'是一个字母。匹配isIdent成功。将生成的token存入tokens中

    2.index指向空格符,匹配isWhitespace成功,同上

    3.index指向=,匹配操作运算符成功,同上

    4.index指向空格符,匹配isWhitespace成功,同上

    5.index指向'a'是一个字母。匹配isIdent成功。同上

    7.index指向+,匹配操作运算符成功,同上

    8.index指向空格符,匹配isWhitespace成功,同上

    9.index指向1,匹配数字成功,同上

    以上则是"a = a + 1"的代码执行过程。9步执行结束之后,跳出while循环。刚才我们看到了,每次匹配成功,源码会生成一个token。因为匹配类型的不同,生成出来的token的键值对略有不同:

    number:{
          index: start,
          text: number,
          constant: true,
          value: Number(number)
        },
    string: {
              index: start,
              text: rawString,
              constant: true,
              value: string
            },
    ident: {
          index: start,
          text: this.text.slice(start, this.index),
          identifier: true /* 字符表示 */ 
        },
    '(){}[].,;:?': {
        index: this.index,
        text: ch
    },
    "操作符": {
         index: this.index, 
         text: token, 
         operator: true
    }
    //text是表达式,而value才是实际的值

    number和string其实都有相对应的真实值,意味着如果我们表达式是2e2,那number生成的token的值value就应该是200。到此我们通过lexer类获得了一个具有token值得数组。从外部看,实际上Lexer是将我们输入的表达式解析成了token json。可以理解为生成了表达式的语法树(AST)。但是目前来看,我们依旧还没有能获得我们定义表达式的结果。那就需要用到parser了。

    2.Parser

    先看一下Parser的内部结构:

    //构造函数
    var Parser = function(lexer, $filter, options) {
      this.lexer = lexer;
      this.$filter = $filter;
      this.options = options;
    };
    
    //原型
    Parser.prototype = {
      constructor: Parser,
      parse: function(){},
      primary: function(){},
      throwError: function(){ /* 语法抛错 */},
      peekToken: function(){},
      peek: function(){/*返回tokens中的第一个成员对象 */},
      peekAhead: function(){ /* 返回tokens中指定成员对象,否则返回false */},
      expect: function(){ /* 取出tokens中第一个对象,否则返回false */ },
      consume: function(){ /* 取出第一个,底层调用expect */ },
      unaryFn: function(){ /* 一元操作 */},
      binaryFn: function(){ /* 二元操作 */},
      identifier: function(){},
      constant: function(){},
      statements: function(){},
      filterChain: function(){},
      filter: function(){},
      expression: function(){},
      assignment: function(){},
      ternary: function(){},
      logicalOR: function(){ /* 逻辑或 */},
      logicalAND: function(){ /* 逻辑与 */ },
      equality: function(){ /* 等于 */ },
      relational: function(){ /* 比较关系 */ },
      additive: function(){ /* 加法,减法 */ },
      multiplicative: function(){ /* 乘法,除法,求余 */ },
      unary: function(){ /* 一元 */ },
      fieldAccess: function(){},
      objectIndex: function(){},
      functionCall: function(){},
      arrayDeclaration: function(){},
      object: function(){}
    }

    Parser的入口方法是parse,内部执行了statements方法。来看下statements:

    statements: function() {
        var statements = [];
        while (true) {
          if (this.tokens.length > 0 && !this.peek('}', ')', ';', ']'))
            statements.push(this.filterChain());
          if (!this.expect(';')) {
            // optimize for the common case where there is only one statement.
            // TODO(size): maybe we should not support multiple statements?
            return (statements.length === 1)
                ? statements[0]
                : function $parseStatements(self, locals) {
                    var value;
                    for (var i = 0, ii = statements.length; i < ii; i++) {
                      value = statements[i](self, locals);
                    }
                    return value;
                  };
          }
        }
      }

    这里我们将tokens理解为表达式,实际上它就是经过表达式通过lexer转换过来的。statements中。如果表达式不以},),;,]开头,将会执行filterChain方法。当tokens检索完成之后,最后返回了一个$parseStatements方法。其实Parser中很多方法都返回了类似的对象,意味着返回的内容将需要执行后才能得到结果。

    看一下filterChain:

    filterChain: function() {
        /* 针对angular语法的filter */
        var left = this.expression();
        var token;
        while ((token = this.expect('|'))) {
          left = this.filter(left);
        }
        return left;
      }

    其中filterChain是针对angular表达式独有的"|"filter写法设计的。我们先绕过这块,进入expression

    expression: function() {
        return this.assignment();
      }

    再看assignment:

    assignment: function() {
        var left = this.ternary();
        var right;
        var token;
        if ((token = this.expect('='))) {
          if (!left.assign) {
            this.throwError('implies assignment but [' +
                this.text.substring(0, token.index) + '] can not be assigned to', token);
          }
          right = this.ternary();
          return extend(function $parseAssignment(scope, locals) {
            return left.assign(scope, right(scope, locals), locals);
          }, {
            inputs: [left, right]
          });
        }
        return left;
      }

    我们看到了ternary方法。这是一个解析三目操作的方法。与此同时,assignment将表达式以=划分成left和right两块。并且两块都尝试执行ternary。

    ternary: function() {
        var left = this.logicalOR();
        var middle;
        var token;
        if ((token = this.expect('?'))) {
          middle = this.assignment();
          if (this.consume(':')) {
            var right = this.assignment();
    
            return extend(function $parseTernary(self, locals) {
              return left(self, locals) ? middle(self, locals) : right(self, locals);
            }, {
              constant: left.constant && middle.constant && right.constant
            });
          }
        }
    
        return left;
      }

    在解析三目运算之前,又根据?将表达式划分成left和right两块。左侧再去尝试执行logicalOR,实际上这是一个逻辑与的解析,按照这个执行流程,我们一下有了思路。这有点类似我们一般写三目时。代码的执行情况,比如: 2 > 2 ? 1 : 0。如果把这个当成表达式,那根据?划分left和right,left就应该是2 > 2,right应该就是 1: 0。然后尝试在left看是否有逻辑或的操作。也就是,Parser里面的方法调用的嵌套级数越深,其方法的优先级则越高。好,那我们一口气看看这个最高的优先级在哪?

    logicalOR -> logicalAND -> equality -> relational -> additive -> multiplicative ->  unary

    好吧,嵌套级数确实有点多。那么我们看下unary。

    unary: function() {
        var token;
        if (this.expect('+')) {
          return this.primary();
        } else if ((token = this.expect('-'))) {
          return this.binaryFn(Parser.ZERO, token.text, this.unary());
        } else if ((token = this.expect('!'))) {
          return this.unaryFn(token.text, this.unary());
        } else {
          return this.primary();
        }
      }

    这边需要看两个主要的方法,一个是binaryFn和primay。如果判断是-,则必须通过binaryFn去添加函数。看下binaryFn

    binaryFn: function(left, op, right, isBranching) {
        var fn = OPERATORS[op];
        return extend(function $parseBinaryFn(self, locals) {
          return fn(self, locals, left, right);
        }, {
          constant: left.constant && right.constant,
          inputs: !isBranching && [left, right]
        });
      }

    其中OPERATORS是之前聊Lexer也用到过,它根据操作符存储相应的操作函数。看一下fn(self, locals, left, right)。而我们随便取OPERATORS中的一个例子:

    '-':function(self, locals, a, b) {
              a=a(self, locals); b=b(self, locals);
              return (isDefined(a) ? a : 0) - (isDefined(b) ? b : 0);
            }

    其中a和b就是left和right,他们其实都是返回的跟之前类似的$parseStatements方法。默认存储着token中的value。经过事先解析好的四则运算来生成最终答案。其实这就是Parser的基本功能。至于嵌套,我们可以把它理解为js的操作符的优先级。这样就一目了然了。至于primay方法。塔刷选{ ( 对象做进一步的解析过程。

    Parser的代码并不复杂,只是函数方法间调用密切,让我们再看一个例子:

    var _l = new Lexer({});
    var _p = new Parser(_l);
    var a = _p.parse("1 + 1 + 2");
    console.log(a()); //4

    我们看下1+1+2生成的token是什么样的:

    [
    {"index":0,"text":"1","constant":true,"value":1},{"index":2,"text":"+","operator":true},{"index":4,"text":"1","constant":true,"value":1},{"index":6,"text":"+","operator":true},{"index":8,"text":"2","constant":true,"value":2}
    ]

    Parser根据lexer生成的tokens尝试解析。tokens每一个成员都会生成一个函数,其先后执行逻辑按照用户输入的1+1+2的顺序执行。注意像1和2这类constants为true的token,parser会通过constant生成需要的函数$parseConstant,也就是说1+1+2中的两个1和一个2都是返回$parseConstant函数,通过$parseBinaryFn管理加法逻辑。

    constant: function() {
        var value = this.consume().value;
    
        return extend(function $parseConstant() {
          return value; //这个函数执行之后,就是将value值返回。
        }, {
          constant: true,
          literal: true
        });
      },
    binaryFn: function(left, op, right, isBranching) {
        var fn = OPERATORS[op];//加法逻辑
        return extend(function $parseBinaryFn(self, locals) {
          return fn(self, locals, left, right);//left和right分别表示生成的对应函数
        }, {
          constant: left.constant && right.constant,
          inputs: !isBranching && [left, right]
        });
      }

    那我们demo中的a应该返回什么函数呢?当然是$parseBinaryFn。其中的left和right分别是1+1的$parseBinaryFn,right就是2的$parseConstant。

    再来一个例子:

    var _l = new Lexer({});
    var _p = new Parser(_l);
    var a = _p.parse('{"name": "hello"}');
    console.log(a);

    这边我们传入一个json,理论上我们执行完a函数,应该返回一个{name: "hello"}的对象。它调用了Parser中的object

    object: function() {
        var keys = [], valueFns = [];
        if (this.peekToken().text !== '}') {
          do {
            if (this.peek('}')) {
              // Support trailing commas per ES5.1.
              break;
            }
            var token = this.consume();
            if (token.constant) {
              //把key取出来
              keys.push(token.value);
            } else if (token.identifier) {
              keys.push(token.text);
            } else {
              this.throwError("invalid key", token);
            }
            this.consume(':');
            //冒号之后,则是值,将值存在valueFns中
            valueFns.push(this.expression());
            //根据逗号去迭代下一个
          } while (this.expect(','));
        }
        this.consume('}');
    
        return extend(function $parseObjectLiteral(self, locals) {
          var object = {};
          for (var i = 0, ii = valueFns.length; i < ii; i++) {
            object[keys[i]] = valueFns[i](self, locals);
          }
          return object;
        }, {
          literal: true,
          constant: valueFns.every(isConstant),
          inputs: valueFns
        });
      }

    比方我们的例子{"name": "hello"},object会将name存在keys中,hello则会生成$parseConstant函数存在valueFns中,最终返回$parseObjectLiternal函数。

    下一个例子:

    var a = _p.parse('{"name": "hello"}["name"]');

    这个跟上一个例子的差别在于后面尝试去读取name的值,这边则调用parser中的objectIndex方法。

    objectIndex: function(obj) {
        var expression = this.text;
    
        var indexFn = this.expression();
        this.consume(']');
    
        return extend(function $parseObjectIndex(self, locals) {
          var o = obj(self, locals), //parseObjectLiteral,实际就是obj
              i = indexFn(self, locals), //$parseConstant,这里就是name
              v;
    
          ensureSafeMemberName(i, expression);
          if (!o) return undefined;
          v = ensureSafeObject(o[i], expression);
          return v;
        }, {
          assign: function(self, value, locals) {
            var key = ensureSafeMemberName(indexFn(self, locals), expression);
            // prevent overwriting of Function.constructor which would break ensureSafeObject check
            var o = ensureSafeObject(obj(self, locals), expression);
            if (!o) obj.assign(self, o = {}, locals);
            return o[key] = value;
          }
        });
      }

    很简单吧,obj[xx]和obj.x类似。大家自行阅读,我们再看一个函数调用的demo

    var _l = new Lexer({});
    var _p = new Parser(_l, '', {});
    var demo = {
      "test": function(){
        alert("welcome");
      }
    };
    var a = _p.parse('test()');
    console.log(a(demo));

    我们传入一个test的调用。这边调用了parser中的functionCall方法和identifier方法

    identifier: function() {
        var id = this.consume().text;
    
        //Continue reading each `.identifier` unless it is a method invocation
        while (this.peek('.') && this.peekAhead(1).identifier && !this.peekAhead(2, '(')) {
          id += this.consume().text + this.consume().text;
        }
    
        return getterFn(id, this.options, this.text);
      }

    看一下getterFn方法

    ...
    forEach(pathKeys, function(key, index) {
          ensureSafeMemberName(key, fullExp);
          var lookupJs = (index
                          // we simply dereference 's' on any .dot notation
                          ? 's'
                          // but if we are first then we check locals first, and if so read it first
                          : '((l&&l.hasOwnProperty("' + key + '"))?l:s)') + '.' + key;
          if (expensiveChecks || isPossiblyDangerousMemberName(key)) {
            lookupJs = 'eso(' + lookupJs + ', fe)';
            needsEnsureSafeObject = true;
          }
          code += 'if(s == null) return undefined;
    ' +
                  's=' + lookupJs + ';
    ';
        });
        code += 'return s;';
    
        /* jshint -W054 */
        var evaledFnGetter = new Function('s', 'l', 'eso', 'fe', code); // s=scope, l=locals, eso=ensureSafeObject
        /* jshint +W054 */
        evaledFnGetter.toString = valueFn(code);
    ...

    这是通过字符串创建一个匿名函数的方法。我们看下demo的test生成了一个什么匿名函数:

    function('s', 'l', 'eso', 'fe'){
    if(s == null) return undefined;
    s=((l&&l.hasOwnProperty("test"))?l:s).test;
    return s;
    }

    这个匿名函数的意思,需要传入一个上下文,匿名函数通过查找上下文中是否有test属性,如果没有传上下文则直接返回未定义。这也就是为什么我们在生成好的a函数在执行它时需要传入demo对象的原因。最后补一个functionCall

    functionCall: function(fnGetter, contextGetter) {
        var argsFn = [];
        if (this.peekToken().text !== ')') {
          /* 确认调用时有入参 */
          do {
            //形参存入argsFn
            argsFn.push(this.expression());
          } while (this.expect(','));
        }
        this.consume(')');
    
        var expressionText = this.text;
        // we can safely reuse the array across invocations
        var args = argsFn.length ? [] : null;
    
        return function $parseFunctionCall(scope, locals) {
          var context = contextGetter ? contextGetter(scope, locals) : isDefined(contextGetter) ? undefined : scope;
          //或者之前创建生成的匿名函数
          var fn = fnGetter(scope, locals, context) || noop;
    
          if (args) {
            var i = argsFn.length;
            while (i--) {
              args[i] = ensureSafeObject(argsFn[i](scope, locals), expressionText);
            }
          }
    
          ensureSafeObject(context, expressionText);
          ensureSafeFunction(fn, expressionText);
    
          // IE doesn't have apply for some native functions
          //执行匿名函数的时候需要传入上下文
          var v = fn.apply
                ? fn.apply(context, args)
                : fn(args[0], args[1], args[2], args[3], args[4]);
    
          if (args) {
            // Free-up the memory (arguments of the last function call).
            args.length = 0;
          }
    
          return ensureSafeObject(v, expressionText);
          };
      }

    下面我们看一下$ParseProvider,这是一个基于Lex和Parser函数的angular内置provider。它对scope的api提供了基础支持。

    ...
    return function $parse(exp, interceptorFn, expensiveChecks) {
          var parsedExpression, oneTime, cacheKey;
    
          switch (typeof exp) {
            case 'string':
              cacheKey = exp = exp.trim();
    
              var cache = (expensiveChecks ? cacheExpensive : cacheDefault);
              parsedExpression = cache[cacheKey];
    
              if (!parsedExpression) {
                if (exp.charAt(0) === ':' && exp.charAt(1) === ':') {
                  oneTime = true;
                  exp = exp.substring(2);
                }
    
                var parseOptions = expensiveChecks ? $parseOptionsExpensive : $parseOptions;
                //调用lexer和parser
                var lexer = new Lexer(parseOptions);
                var parser = new Parser(lexer, $filter, parseOptions);
                parsedExpression = parser.parse(exp);
                //添加$$watchDelegate,为scope部分提供支持
                if (parsedExpression.constant) {
                  parsedExpression.$$watchDelegate = constantWatchDelegate;
                } else if (oneTime) {
                  //oneTime is not part of the exp passed to the Parser so we may have to
                  //wrap the parsedExpression before adding a $$watchDelegate
                  parsedExpression = wrapSharedExpression(parsedExpression);
                  parsedExpression.$$watchDelegate = parsedExpression.literal ?
                    oneTimeLiteralWatchDelegate : oneTimeWatchDelegate;
                } else if (parsedExpression.inputs) {
                  parsedExpression.$$watchDelegate = inputsWatchDelegate;
                }
                //做相关缓存
                cache[cacheKey] = parsedExpression;
              }
              return addInterceptor(parsedExpression, interceptorFn);
    
            case 'function':
              return addInterceptor(exp, interceptorFn);
    
            default:
              return addInterceptor(noop, interceptorFn);
          }
        };

    总结:Lexer和Parser的实现确实让我大开眼界。通过这两个函数,实现了angular自己的语法解析器。逻辑部分还是相对复杂

    时间不多,内容刚好,以上是个人阅读源码的一些理解,有不对或者偏差的地方,还希望园友们斧正。共同进步。

  • 相关阅读:
    linux 学习(二)防火墙
    linux学习(一)开始
    ajax和sap以及网络安全
    仿苹果导航菜单js问题
    基本类型和引用类型调用是的区别(Object.create)
    箴言
    思维的宽度
    笔记
    循环传值_闭包
    一个问题的解法(兔子三个月之后每月都生兔子的问题)
  • 原文地址:https://www.cnblogs.com/wumadi/p/6629452.html
Copyright © 2011-2022 走看看