zoukankan      html  css  js  c++  java
  • Nodejs之require加载机制(模块可以污染全局空间)

    以前就觉得Nodejs的MooTools库很奇怪,因为用他的时候,不需要把require的返回值保存起来,今天实在憋不住,就研究了下,对NodeJs的require机制又有了几分深刻的理解。

    MooTools库的“奇怪”用法:

    require('mootools');
    
      var QueryCommand = new Class({
            initialize: function (product, week, action) {
                this.product = product;
                this.weeknum = week;
                this.action = action;
            },
    
            toJsonString: function () {
                return JSON.stringify(this);
            }
        });

    看到没,把require当作C++的include用了,之后的代码可以直接使用Class类型了。其实,MooTools的做法是把Class这个类型“附加”到了全局作用域。我觉得MooTools可能就是这么设计的,因为他是一个OO库嘛。

    那么,MooTools是怎么把他自己附加到全局作用域的呢?其实很巧妙,是通过函数字面量做的:

    (function(){
    
    var Class = this.Class = new Type('Class', function(params){
    	if (instanceOf(params, Function)) params = {initialize: params};
    
    	var newClass = function(){
    		reset(this);
    		if (newClass.$prototyping) return this;
    		this.$caller = null;
    		var value = (this.initialize) ? this.initialize.apply(this, arguments) : this;
    		this.$caller = this.caller = null;
    		return value;
    	}.extend(this).implement(params);
    
    	newClass.$constructor = Class;
    	newClass.prototype.$constructor = newClass;
    	newClass.prototype.parent = parent;
    
    	return newClass;
    });
    
    ......
    
    })();
    

    他通过函数字面量定义了一个匿名函数,然后立即执行之。这个是JS的惯用法。有一点要注意的是,在执行这个函数的时候,其实是没有this指针的!那么在函数内部使用this,访问的就是全局对象,就是那个始祖对象!

    再看一个更加简单的例子

    app.js

    require('./a.js')
    console.log(a);

    a.js

    a = 3; //输出3

    注意,上面不是var a=3;所以该语句实在全局作用域上附加了a这个属性(注意,也不是this!是始祖对象),然后呢,在app.js模块就能使用了。其实,在其他的模块也能访问了。


    接下来说说,为什么会污染到全局作用域,nodejs不是用require来加载每个模块的嘛,模块应该互相独立的呀,其实nodeJs的模块加载顺序是这样的:

    Module._load("xxx.js") --> var module = new Module(); --> module.load("xxx.js") --> module._compile() --> 最终调用的是被wrapper的模块代码,上面那个例子的话,最终执行的是:

    (function (exports, require, module, __filename, __dirname) { 
        a = 3;
    });

    看一下module.compile()的代码就清楚了:

    // Returns exception if any
    Module.prototype._compile = function(content, filename) {
      var self = this;
      // remove shebang
      content = content.replace(/^#!.*/, '');
    
      function require(path) {
        return self.require(path);
      }
    
      require.resolve = function(request) {
        return Module._resolveFilename(request, self);
      };
    
      Object.defineProperty(require, 'paths', { get: function() {
        throw new Error('require.paths is removed. Use ' +
                        'node_modules folders, or the NODE_PATH ' +
                        'environment variable instead.');
      }});
    
      require.main = process.mainModule;
    
      // Enable support to add extra extension types
      require.extensions = Module._extensions;
      require.registerExtension = function() {
        throw new Error('require.registerExtension() removed. Use ' +
                        'require.extensions instead.');
      };
    
      require.cache = Module._cache;
    
      var dirname = path.dirname(filename);
    
      if (Module._contextLoad) {
        if (self.id !== '.') {
          debug('load submodule');
          // not root module
          var sandbox = {};
          for (var k in global) {
            sandbox[k] = global[k];
          }
          sandbox.require = require;
          sandbox.exports = self.exports;
          sandbox.__filename = filename;
          sandbox.__dirname = dirname;
          sandbox.module = self;
          sandbox.global = sandbox;
          sandbox.root = root;
    
          return runInNewContext(content, sandbox, filename, true);
        }
    
        debug('load root module');
        // root module
        global.require = require;
        global.exports = self.exports;
        global.__filename = filename;
        global.__dirname = dirname;
        global.module = self;
    
        return runInThisContext(content, filename, true);
      }
    
      // create wrapper function
      var wrapper = Module.wrap(content);
    
      var compiledWrapper = runInThisContext(wrapper, filename, true);
      if (global.v8debug) {
        if (!resolvedArgv) {
          // we enter the repl if we're not given a filename argument.
          if (process.argv[1]) {
            resolvedArgv = Module._resolveFilename(process.argv[1], null);
          } else {
            resolvedArgv = 'repl';
          }
        }
    
        // Set breakpoint on module start
        if (filename === resolvedArgv) {
          global.v8debug.Debug.setBreakPoint(compiledWrapper, 0, 0);
        }
      }
      var args = [self.exports, require, self, filename, dirname];
      return compiledWrapper.apply(self.exports, args);
    };

    Module._contextLoad是false,所以有一些代码可以忽略掉,但里面有几个重点:

    1. 在函数入口处,定义了require函数,require函数还有一些静态的属性、成员函数。最终,require作为调用compiledWrapper.apply()的一个实参。
    2. var wrapper其实是一个字符串类型的变量,值是 “(function (exports, require, module, __filename, __dirname) { ” + 你的模块代码 + "});",就是上面我贴出来的那包装过的函数。最终,他通过调用runInThisContext()把字符串转成一个真正的函数。
    3. 最终通过调用被包装好的函数,执行你的模块代码。this指针指向的是self.exports,也就是module.exports,也就是exports。所以在你的模块内部:this === exports === module.exports。
    4. args是参数,再次重申:之所以你能在模块内部使用require(),是因为他是作为一个参数传入的。

    exports其实是一个空的对象,所以exports的初值是空的,可以看这段代码:

    function Module(id, parent) {
      this.id = id;
      this.exports = {};
      this.parent = parent;
      if (parent && parent.children) {
        parent.children.push(this);
      }
    
      this.filename = null;
      this.loaded = false;
      this.children = [];
    }

    最终是怎么把exports返回出来的呢,是在这个函数里面:

    Module._load = function(request, parent, isMain) {
      if (parent) {
        debug('Module._load REQUEST  ' + (request) + ' parent: ' + parent.id);
      }
    
      var filename = Module._resolveFilename(request, parent);
    
      var cachedModule = Module._cache[filename];
      if (cachedModule) {
        return cachedModule.exports;
      }
    
      if (NativeModule.exists(filename)) {
        // REPL is a special case, because it needs the real require.
        if (filename == 'repl') {
          var replModule = new Module('repl');
          replModule._compile(NativeModule.getSource('repl'), 'repl.js');
          NativeModule._cache.repl = replModule;
          return replModule.exports;
        }
    
        debug('load native module ' + request);
        return NativeModule.require(filename);
      }
    
      var module = new Module(filename, parent);
    
      if (isMain) {
        process.mainModule = module;
        module.id = '.';
      }
    
      Module._cache[filename] = module;
    
      var hadException = true;
    
      try {
        module.load(filename);
        hadException = false;
      } finally {
        if (hadException) {
          delete Module._cache[filename];
        }
      }
    
      return module.exports;
    };

    NodeJs调试一次不容易,把所有的代码,都贴上来(看看还蛮有收获的,有一个stripBOM工具函数,呵呵):

    (function (exports, require, module, __filename, __dirname) { // Copyright Joyent, Inc. and other Node contributors.
    //
    // Permission is hereby granted, free of charge, to any person obtaining a
    // copy of this software and associated documentation files (the
    // "Software"), to deal in the Software without restriction, including
    // without limitation the rights to use, copy, modify, merge, publish,
    // distribute, sublicense, and/or sell copies of the Software, and to permit
    // persons to whom the Software is furnished to do so, subject to the
    // following conditions:
    //
    // The above copyright notice and this permission notice shall be included
    // in all copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
    // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
    // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
    // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
    // USE OR OTHER DEALINGS IN THE SOFTWARE.
    
    var NativeModule = require('native_module');
    var Script = process.binding('evals').NodeScript;
    var runInThisContext = Script.runInThisContext;
    var runInNewContext = Script.runInNewContext;
    var assert = require('assert').ok;
    
    
    // If obj.hasOwnProperty has been overridden, then calling
    // obj.hasOwnProperty(prop) will break.
    // See: https://github.com/joyent/node/issues/1707
    function hasOwnProperty(obj, prop) {
      return Object.prototype.hasOwnProperty.call(obj, prop);
    }
    
    
    function Module(id, parent) {
      this.id = id;
      this.exports = {};
      this.parent = parent;
      if (parent && parent.children) {
        parent.children.push(this);
      }
    
      this.filename = null;
      this.loaded = false;
      this.children = [];
    }
    module.exports = Module;
    
    // Set the environ variable NODE_MODULE_CONTEXTS=1 to make node load all
    // modules in thier own context.
    Module._contextLoad = (+process.env['NODE_MODULE_CONTEXTS'] > 0);
    Module._cache = {};
    Module._pathCache = {};
    Module._extensions = {};
    var modulePaths = [];
    Module.globalPaths = [];
    
    Module.wrapper = NativeModule.wrapper;
    Module.wrap = NativeModule.wrap;
    
    var path = NativeModule.require('path');
    
    Module._debug = function() {};
    if (process.env.NODE_DEBUG && /module/.test(process.env.NODE_DEBUG)) {
      Module._debug = function(x) {
        console.error(x);
      };
    }
    
    
    // We use this alias for the preprocessor that filters it out
    var debug = Module._debug;
    
    
    // given a module name, and a list of paths to test, returns the first
    // matching file in the following precedence.
    //
    // require("a.<ext>")
    //   -> a.<ext>
    //
    // require("a")
    //   -> a
    //   -> a.<ext>
    //   -> a/index.<ext>
    
    function statPath(path) {
      var fs = NativeModule.require('fs');
      try {
        return fs.statSync(path);
      } catch (ex) {}
      return false;
    }
    
    // check if the directory is a package.json dir
    var packageCache = {};
    
    function readPackage(requestPath) {
      if (hasOwnProperty(packageCache, requestPath)) {
        return packageCache[requestPath];
      }
    
      var fs = NativeModule.require('fs');
      try {
        var jsonPath = path.resolve(requestPath, 'package.json');
        var json = fs.readFileSync(jsonPath, 'utf8');
      } catch (e) {
        return false;
      }
    
      try {
        var pkg = packageCache[requestPath] = JSON.parse(json);
      } catch (e) {
        e.path = jsonPath;
        e.message = 'Error parsing ' + jsonPath + ': ' + e.message;
        throw e;
      }
      return pkg;
    }
    
    function tryPackage(requestPath, exts) {
      var pkg = readPackage(requestPath);
    
      if (!pkg || !pkg.main) return false;
    
      var filename = path.resolve(requestPath, pkg.main);
      return tryFile(filename) || tryExtensions(filename, exts) ||
             tryExtensions(path.resolve(filename, 'index'), exts);
    }
    
    // In order to minimize unnecessary lstat() calls,
    // this cache is a list of known-real paths.
    // Set to an empty object to reset.
    Module._realpathCache = {};
    
    // check if the file exists and is not a directory
    function tryFile(requestPath) {
      var fs = NativeModule.require('fs');
      var stats = statPath(requestPath);
      if (stats && !stats.isDirectory()) {
        return fs.realpathSync(requestPath, Module._realpathCache);
      }
      return false;
    }
    
    // given a path check a the file exists with any of the set extensions
    function tryExtensions(p, exts) {
      for (var i = 0, EL = exts.length; i < EL; i++) {
        var filename = tryFile(p + exts[i]);
    
        if (filename) {
          return filename;
        }
      }
      return false;
    }
    
    
    Module._findPath = function(request, paths) {
      var exts = Object.keys(Module._extensions);
    
      if (request.charAt(0) === '/') {
        paths = [''];
      }
    
      var trailingSlash = (request.slice(-1) === '/');
    
      var cacheKey = JSON.stringify({request: request, paths: paths});
      if (Module._pathCache[cacheKey]) {
        return Module._pathCache[cacheKey];
      }
    
      // For each path
      for (var i = 0, PL = paths.length; i < PL; i++) {
        var basePath = path.resolve(paths[i], request);
        var filename;
    
        if (!trailingSlash) {
          // try to join the request to the path
          filename = tryFile(basePath);
    
          if (!filename && !trailingSlash) {
            // try it with each of the extensions
            filename = tryExtensions(basePath, exts);
          }
        }
    
        if (!filename) {
          filename = tryPackage(basePath, exts);
        }
    
        if (!filename) {
          // try it with each of the extensions at "index"
          filename = tryExtensions(path.resolve(basePath, 'index'), exts);
        }
    
        if (filename) {
          Module._pathCache[cacheKey] = filename;
          return filename;
        }
      }
      return false;
    };
    
    // 'from' is the __dirname of the module.
    Module._nodeModulePaths = function(from) {
      // guarantee that 'from' is absolute.
      from = path.resolve(from);
    
      // note: this approach *only* works when the path is guaranteed
      // to be absolute.  Doing a fully-edge-case-correct path.split
      // that works on both Windows and Posix is non-trivial.
      var splitRe = process.platform === 'win32' ? /[/\]/ : ///;
      // yes, '/' works on both, but let's be a little canonical.
      var joiner = process.platform === 'win32' ? '\' : '/';
      var paths = [];
      var parts = from.split(splitRe);
    
      for (var tip = parts.length - 1; tip >= 0; tip--) {
        // don't search in .../node_modules/node_modules
        if (parts[tip] === 'node_modules') continue;
        var dir = parts.slice(0, tip + 1).concat('node_modules').join(joiner);
        paths.push(dir);
      }
    
      return paths;
    };
    
    
    Module._resolveLookupPaths = function(request, parent) {
      if (NativeModule.exists(request)) {
        return [request, []];
      }
    
      var start = request.substring(0, 2);
      if (start !== './' && start !== '..') {
        var paths = modulePaths;
        if (parent) {
          if (!parent.paths) parent.paths = [];
          paths = parent.paths.concat(paths);
        }
        return [request, paths];
      }
    
      // with --eval, parent.id is not set and parent.filename is null
      if (!parent || !parent.id || !parent.filename) {
        // make require('./path/to/foo') work - normally the path is taken
        // from realpath(__filename) but with eval there is no filename
        var mainPaths = ['.'].concat(modulePaths);
        mainPaths = Module._nodeModulePaths('.').concat(mainPaths);
        return [request, mainPaths];
      }
    
      // Is the parent an index module?
      // We can assume the parent has a valid extension,
      // as it already has been accepted as a module.
      var isIndex = /^index.w+?$/.test(path.basename(parent.filename));
      var parentIdPath = isIndex ? parent.id : path.dirname(parent.id);
      var id = path.resolve(parentIdPath, request);
    
      // make sure require('./path') and require('path') get distinct ids, even
      // when called from the toplevel js file
      if (parentIdPath === '.' && id.indexOf('/') === -1) {
        id = './' + id;
      }
    
      debug('RELATIVE: requested:' + request +
            ' set ID to: ' + id + ' from ' + parent.id);
    
      return [id, [path.dirname(parent.filename)]];
    };
    
    
    Module._load = function(request, parent, isMain) {
      if (parent) {
        debug('Module._load REQUEST  ' + (request) + ' parent: ' + parent.id);
      }
    
      var filename = Module._resolveFilename(request, parent);
    
      var cachedModule = Module._cache[filename];
      if (cachedModule) {
        return cachedModule.exports;
      }
    
      if (NativeModule.exists(filename)) {
        // REPL is a special case, because it needs the real require.
        if (filename == 'repl') {
          var replModule = new Module('repl');
          replModule._compile(NativeModule.getSource('repl'), 'repl.js');
          NativeModule._cache.repl = replModule;
          return replModule.exports;
        }
    
        debug('load native module ' + request);
        return NativeModule.require(filename);
      }
    
      var module = new Module(filename, parent);
    
      if (isMain) {
        process.mainModule = module;
        module.id = '.';
      }
    
      Module._cache[filename] = module;
    
      var hadException = true;
    
      try {
        module.load(filename);
        hadException = false;
      } finally {
        if (hadException) {
          delete Module._cache[filename];
        }
      }
    
      return module.exports;
    };
    
    Module._resolveFilename = function(request, parent) {
      if (NativeModule.exists(request)) {
        return request;
      }
    
      var resolvedModule = Module._resolveLookupPaths(request, parent);
      var id = resolvedModule[0];
      var paths = resolvedModule[1];
    
      // look up the filename first, since that's the cache key.
      debug('looking for ' + JSON.stringify(id) +
            ' in ' + JSON.stringify(paths));
    
      var filename = Module._findPath(request, paths);
      if (!filename) {
        var err = new Error("Cannot find module '" + request + "'");
        err.code = 'MODULE_NOT_FOUND';
        throw err;
      }
      return filename;
    };
    
    
    Module.prototype.load = function(filename) {
      debug('load ' + JSON.stringify(filename) +
            ' for module ' + JSON.stringify(this.id));
    
      assert(!this.loaded);
      this.filename = filename;
      this.paths = Module._nodeModulePaths(path.dirname(filename));
    
      var extension = path.extname(filename) || '.js';
      if (!Module._extensions[extension]) extension = '.js';
      Module._extensions[extension](this, filename);
      this.loaded = true;
    };
    
    
    Module.prototype.require = function(path) {
      assert(typeof path === 'string', 'path must be a string');
      assert(path, 'missing path');
      return Module._load(path, this);
    };
    
    
    // Resolved path to process.argv[1] will be lazily placed here
    // (needed for setting breakpoint when called with --debug-brk)
    var resolvedArgv;
    
    
    // Returns exception if any
    Module.prototype._compile = function(content, filename) {
      var self = this;
      // remove shebang
      content = content.replace(/^#!.*/, '');
    
      function require(path) {
        return self.require(path);
      }
    
      require.resolve = function(request) {
        return Module._resolveFilename(request, self);
      };
    
      Object.defineProperty(require, 'paths', { get: function() {
        throw new Error('require.paths is removed. Use ' +
                        'node_modules folders, or the NODE_PATH ' +
                        'environment variable instead.');
      }});
    
      require.main = process.mainModule;
    
      // Enable support to add extra extension types
      require.extensions = Module._extensions;
      require.registerExtension = function() {
        throw new Error('require.registerExtension() removed. Use ' +
                        'require.extensions instead.');
      };
    
      require.cache = Module._cache;
    
      var dirname = path.dirname(filename);
    
      if (Module._contextLoad) {
        if (self.id !== '.') {
          debug('load submodule');
          // not root module
          var sandbox = {};
          for (var k in global) {
            sandbox[k] = global[k];
          }
          sandbox.require = require;
          sandbox.exports = self.exports;
          sandbox.__filename = filename;
          sandbox.__dirname = dirname;
          sandbox.module = self;
          sandbox.global = sandbox;
          sandbox.root = root;
    
          return runInNewContext(content, sandbox, filename, true);
        }
    
        debug('load root module');
        // root module
        global.require = require;
        global.exports = self.exports;
        global.__filename = filename;
        global.__dirname = dirname;
        global.module = self;
    
        return runInThisContext(content, filename, true);
      }
    
      // create wrapper function
      var wrapper = Module.wrap(content);
    
      var compiledWrapper = runInThisContext(wrapper, filename, true);
      if (global.v8debug) {
        if (!resolvedArgv) {
          // we enter the repl if we're not given a filename argument.
          if (process.argv[1]) {
            resolvedArgv = Module._resolveFilename(process.argv[1], null);
          } else {
            resolvedArgv = 'repl';
          }
        }
    
        // Set breakpoint on module start
        if (filename === resolvedArgv) {
          global.v8debug.Debug.setBreakPoint(compiledWrapper, 0, 0);
        }
      }
      var args = [self.exports, require, self, filename, dirname];
      return compiledWrapper.apply(self.exports, args);
    };
    
    
    function stripBOM(content) {
      // Remove byte order marker. This catches EF BB BF (the UTF-8 BOM)
      // because the buffer-to-string conversion in `fs.readFileSync()`
      // translates it to FEFF, the UTF-16 BOM.
      if (content.charCodeAt(0) === 0xFEFF) {
        content = content.slice(1);
      }
      return content;
    }
    
    
    // Native extension for .js
    Module._extensions['.js'] = function(module, filename) {
      var content = NativeModule.require('fs').readFileSync(filename, 'utf8');
      module._compile(stripBOM(content), filename);
    };
    
    
    // Native extension for .json
    Module._extensions['.json'] = function(module, filename) {
      var content = NativeModule.require('fs').readFileSync(filename, 'utf8');
      try {
        module.exports = JSON.parse(stripBOM(content));
      } catch (err) {
        err.message = filename + ': ' + err.message;
        throw err;
      }
    };
    
    
    //Native extension for .node
    Module._extensions['.node'] = process.dlopen;
    
    
    // bootstrap main module.
    Module.runMain = function() {
      // Load the main module--the command line argument.
      Module._load(process.argv[1], null, true);
      // Handle any nextTicks added in the first tick of the program
      process._tickCallback();
    };
    
    Module._initPaths = function() {
      var isWindows = process.platform === 'win32';
    
      if (isWindows) {
        var homeDir = process.env.USERPROFILE;
      } else {
        var homeDir = process.env.HOME;
      }
    
      var paths = [path.resolve(process.execPath, '..', '..', 'lib', 'node')];
    
      if (homeDir) {
        paths.unshift(path.resolve(homeDir, '.node_libraries'));
        paths.unshift(path.resolve(homeDir, '.node_modules'));
      }
    
      if (process.env['NODE_PATH']) {
        var splitter = isWindows ? ';' : ':';
        paths = process.env['NODE_PATH'].split(splitter).concat(paths);
      }
    
      modulePaths = paths;
    
      // clone as a read-only copy, for introspection.
      Module.globalPaths = modulePaths.slice(0);
    };
    
    // bootstrap repl
    Module.requireRepl = function() {
      return Module._load('repl', '.');
    };
    
    Module._initPaths();
    
    // backwards compatibility
    Module.Module = Module;
    
    });


    References:

  • 相关阅读:
    .net大文件上传
    java文件上传和下载
    文件上传系统
    plupload上传大文件
    代码坏味道之夸夸其谈的未来性
    freemarker中的substring取子串
    【翻译自mos文章】在Oracle GoldenGate中循环使用ggserr.log的方法
    3种浏览器性能測试
    SDUTOJ 2772 KMP简单应用
    C++库研究笔记--用__attribute__((deprecated)) 管理过时代码
  • 原文地址:https://www.cnblogs.com/puncha/p/3876883.html
Copyright © 2011-2022 走看看