zoukankan      html  css  js  c++  java
  • Lua GC 之 Ephemeron

    lua 在 5.2 中对弱键表(table with weak keys)引入了 ephemeron 机制,来解决弱表的循环引用问题。我们先来解释下几个基本概念:

    弱引用(weak reference):可以访问对象,但不会阻止对象被收集。

    弱表(weak table):键或(和)值是弱引用。

    ephemeron:指的是键值对(pair),键是弱引用,但键对值的 mark 有如下影响。如果键可达(reachable),则 mark 其值;如果键不可达,则不必 mark 其值。

    先来看看一个循环引用的例子:

    et = {}
    mt = {}
    setmetatable(et, mt)
    -- sets the table to be an ephemerons table,
    -- in the new implementation.
    mt.__mode = "k"
    a = {}
    b = {}
    et[a] = b
    et[b] = a
    a = nil
    b = nil
    
    collectgarbage()
    
    count = 0
    for k,v in pairs(et) do
        count = count + 1
    end
    print(count) -- 0
    在5.1下运行输出: 2
    在5.2下运行输出: 0

    也就是 5.1 下表et的2组键值对永远不会被收集了。

    一、Lua 5.1 实现

    先来简单看看5.1的实现:

    static int traversetable (global_State *g, Table *h) {
      int i;
      int weakkey = 0;
      int weakvalue = 0;
      const TValue *mode;
      if (h->metatable)
        markobject(g, h->metatable);
      mode = gfasttm(g, h->metatable, TM_MODE);
      if (mode && ttisstring(mode)) {  /* is there a weak mode? */
        weakkey = (strchr(svalue(mode), 'k') != NULL);
        weakvalue = (strchr(svalue(mode), 'v') != NULL);
        if (weakkey || weakvalue) {  /* is really weak? */
          h->marked &= ~(KEYWEAK | VALUEWEAK);  /* clear bits */
          h->marked |= cast_byte((weakkey << KEYWEAKBIT) |
                                 (weakvalue << VALUEWEAKBIT));
          // 将弱表挂入weak链表,等待 atomic 阶段重新扫描
          h->gclist = g->weak;  /* must be cleared after GC, ... */
          g->weak = obj2gco(h);  /* ... so put in the appropriate list */
        }
      }
      if (weakkey && weakvalue) return 1; // 弱键弱值表,无需 mark
      if (!weakvalue) { // mark 弱键强值表的数组部分
        i = h->sizearray;
        while (i--)
          markvalue(g, &h->array[i]);
      }
      i = sizenode(h);
      while (i--) {
        Node *n = gnode(h, i);
        lua_assert(ttype(gkey(n)) != LUA_TDEADKEY || ttisnil(gval(n)));
        if (ttisnil(gval(n)))
          removeentry(n);  /* remove empty entries */ // 移除空键值对
        else {
          lua_assert(!ttisnil(gkey(n)));
          if (!weakkey) markvalue(g, gkey(n)); // 强键直接mark
          if (!weakvalue) markvalue(g, gval(n)); // 强值直接mark
        }
      }
      return weakkey || weakvalue;
    }
    static void atomic (lua_State *L) {
      global_State *g = G(L);
    
      propagateall(g);
      /* remark weak tables */ // weak 链 放入 gray 链,为 mark 弱表作准备
      g->gray = g->weak;
      g->weak = NULL;
    
      markobject(g, L);  /* mark running thread */
      markmt(g);  /* mark basic metatables (again) */
      propagateall(g); // 重新 mark 弱表
    
      cleartable(g->weak);  /* remove collected objects from weak tables */
    }

    扫描(mark) 弱表时很简单,将弱表放入 weak 链(因为 atomic 阶段,才可以真正断定一个弱引用是否还有效。即扫描阶段的 unreachable 不是真正的unreachable,扫描完成后的 unreachable 才是真正的 unreachable),然后 mark 掉强引用的键或值。

    二、Lua 5.2 实现

    相比之下 5.2 的实现就复杂不少。weak 链被分成了3个单独的链 allweak,ephemeron,weak。但和 5.1 一样依然是分为扫描阶段(propagate)和 atomic 阶段

    1. 扫描阶段

    扫描弱键表的时候,有条件的去 mark 它的值——如果键可达,则 mark 其值,否则不 mark 其值。

    static int traverseephemeron (global_State *g, Table *h) {
      int marked = 0;  /* true if an object is marked in this traversal */
      int hasclears = 0;  /* true if table has white keys */
      int prop = 0;  /* true if table has entry "white-key -> white-value" */
      Node *n, *limit = gnodelast(h);
      int i;
      /* traverse array part (numeric keys are 'strong') */
      for (i = 0; i < h->sizearray; i++) { // mark 数组部分(弱键强值表)
        if (valiswhite(&h->array[i])) {
          marked = 1;
          reallymarkobject(g, gcvalue(&h->array[i]));
        }
      }
      /* traverse hash part */
      for (n = gnode(h, 0); n < limit; n++) {
        checkdeadkey(n);
        if (ttisnil(gval(n)))  /* entry is empty? */
          removeentry(n);  /* remove it */
        else if (iscleared(g, gkey(n))) {  /* key is not marked (yet)? */
          hasclears = 1;  /* table must be cleared */
          if (valiswhite(gval(n)))  /* value not marked yet? */
            prop = 1;  /* must propagate again */ // 键无效,值同时也没mark,则放入ephemeron链(ephemeron链用途:如果键在后面的 atomic 阶段发现是有效的,则需 mark 其值)
        }
        else if (valiswhite(gval(n))) {  /* value not marked yet? */ // 键有效,但值还没mark,则mark值(原则:键有效则值也有效)
          marked = 1;
          reallymarkobject(g, gcvalue(gval(n)));  /* mark it now */
        }
      }
      if (prop)
        linktable(h, &g->ephemeron);  /* have to propagate again */ // 键值均不可达,则需要重新扫描
      else if (hasclears)  /* does table have white keys? */
        linktable(h, &g->allweak);  /* may have to clean white keys */ // 键不可达,值可达,后期需要清理掉不可达的键
      else  /* no white keys */
        linktable(h, &g->grayagain);  /* no need to clean */
      return marked;
    }

    2. atomic阶段

    ephemeron 链上的表的值是可以被连锁复活的,例如:有键值对 a->b 和 b->c,如果 a 在后来确认是可达的,则 b 应该被 mark,而 b 的mark,又会导致 c 需要被 mark ,这就需要反复遍历 ephemeron 链作值的 mark 操作,直到ephemeron 链上的无对象可 mark 为止。见  convergeephemerons 函数。

    static l_mem atomic (lua_State *L) {
      global_State *g = G(L);
    
      /* traverse objects caught by write barrier and by 'remarkupvals' */
      retraversegrays(g);
      work -= g->GCmemtrav;  /* restart counting */
      convergeephemerons(g); // 键是否可达已最终确定,mark 掉其可达键所关联的值
    
      /* at this point, all strongly accessible objects are marked. */
      /* clear values from weak tables, before checking finalizers */
      clearvalues(g, g->weak, NULL);
      clearvalues(g, g->allweak, NULL);
      origweak = g->weak; origall = g->allweak;
      work += g->GCmemtrav;  /* stop counting (objects being finalized) */
    
      separatetobefnz(L, 0);  /* separate objects to be finalized */
      markbeingfnz(g);  /* mark objects that will be finalized */ // 复活需 finalizer 的对象
      propagateall(g);  /* remark, to propagate `preserveness' */
    
      convergeephemerons(g); // 复活需 finalizer 的对象及这些对象所关联的对象后,重新mark ephemeron
      /* at this point, all resurrected objects are marked. */
      /* remove dead objects from weak tables */
      clearkeys(g, g->ephemeron, NULL);  /* clear keys from all ephemeron tables */
      clearkeys(g, g->allweak, NULL);  /* clear keys from all allweak tables */
      /* clear values from resurrected weak tables */
      clearvalues(g, g->weak, origweak);
      clearvalues(g, g->allweak, origall);
      g->currentwhite = cast_byte(otherwhite(g));  /* flip current white */
      work += g->GCmemtrav;  /* complete counting */
      return work;  /* estimate of memory marked by 'atomic' */
    }
    
    
    static void convergeephemerons (global_State *g) {
      int changed; // 关键值,指示对 g->ephemeron 的一次遍历中是否有对象被重新mark
      do {
        GCObject *w;
        GCObject *next = g->ephemeron;  /* get ephemeron list */
        g->ephemeron = NULL;  /* tables will return to this list when traversed */
        changed = 0;
        while ((w = next) != NULL) {
          next = gco2t(w)->gclist;
          if (traverseephemeron(g, gco2t(w))) {  /* traverse marked some value? */
            propagateall(g);  /* propagate changes */
            changed = 1;  /* will have to revisit all ephemeron tables */ 
          }
        }
      } while (changed); // 无对象可 mark 时,退出
    }

     参考资料:

    Alexandra Barros 和 Roberto Ierusalimschy 编写的 Eliminating Cycles inWeak Tables

  • 相关阅读:
    AppCrawler自动化遍历使用详解(版本2.1.0 )
    python 接口测试1 --如何创建和打印日志文件
    通过Xshell登录远程服务器实时查看log日志
    java基础知识6-- 抽象类,抽象方法,接口,构造方法,类方法等易混淆的知识点
    java基础知识5--集合类(Set,List,Map)和迭代器Iterator的使用
    java基础知识4--数组的常用方法(Array)
    java基础知识3--如何获取资源文件(Java中获取资源文件的url)
    java基础知识2--String,StringBufffer,StringBuilder的区别
    java基础知识1--String常用方法总结
    javascript正则表达式验证密码(必须含数字字符特殊符号,长度4-16位之间)
  • 原文地址:https://www.cnblogs.com/JesseFang/p/2851552.html
Copyright © 2011-2022 走看看