lua 的 __gc元方法,被称为 finalizer 。这里根据其特点结合源码进行分析:
1. 设置 __gc 元方法
要想对象的 __gc 元方法生效,必须在设置对象的元方法的时候元表已经存在 __gc 方法,(即调用 setmetatable 时,元表已经存在__gc方法)。以后即使对元表增加了 __gc 元方法,也不会生效。见下面的代码
1 mt = {} 2 obj = setmetatable({}, mt) 3 mt.__gc = function print "do gc" end 4 -- obj对象在内存释放前并不会执行__gc元方法。虽然查看obj的元表可以看到__gc元方法 5 6 setmetatable(obj, mt) 7 -- obj对象的__gc元方法,产生作用。会再内存释放前被调用
原因在于仅仅只有的 lua_setmetatable 才会调用 luaC_checkfinalizer 函数。luaC_checkfinalizer 函数检查表中是否有 __gc 方法,如果有,则将对象从 allgc 链中移到了 finobj 链中(lua 5.2 中 Table 和 Userdata 类型均支持__gc元方法,但 lua5.1 仅 Userdata 类型支持)。其实现如下:
LUA_API int lua_setmetatable (lua_State *L, int objindex) { TValue *obj; Table *mt; lua_lock(L); api_checknelems(L, 1); obj = index2addr(L, objindex); api_checkvalidindex(L, obj); if (ttisnil(L->top - 1)) mt = NULL; else { api_check(L, ttistable(L->top - 1), "table expected"); mt = hvalue(L->top - 1); } switch (ttypenv(obj)) { case LUA_TTABLE: { hvalue(obj)->metatable = mt; if (mt) luaC_objbarrierback(L, gcvalue(obj), mt); luaC_checkfinalizer(L, gcvalue(obj), mt); break; } case LUA_TUSERDATA: { uvalue(obj)->metatable = mt; if (mt) { luaC_objbarrier(L, rawuvalue(obj), mt); luaC_checkfinalizer(L, gcvalue(obj), mt); } break; } default: { G(L)->mt[ttypenv(obj)] = mt; break; } } L->top--; lua_unlock(L); return 1; } /* ** if object 'o' has a finalizer, remove it from 'allgc' list (must ** search the list to find it) and link it in 'finobj' list. */ void luaC_checkfinalizer (lua_State *L, GCObject *o, Table *mt) { global_State *g = G(L); if (testbit(gch(o)->marked, SEPARATED) || /* obj. is already separated... */ isfinalized(o) || /* ... or is finalized... */ gfasttm(g, mt, TM_GC) == NULL) /* or has no finalizer? */ return; /* nothing to be done */ else { /* move 'o' to 'finobj' list */ GCObject **p; GCheader *ho = gch(o); if (g->sweepgc == &ho->next) { /* avoid removing current sweep object */ lua_assert(issweepphase(g)); g->sweepgc = sweeptolive(L, g->sweepgc, NULL); } /* search for pointer pointing to 'o' */ for (p = &g->allgc; *p != o; p = &gch(*p)->next) { /* empty */ } *p = ho->next; /* remove 'o' from root list */ ho->next = g->finobj; /* link it in list 'finobj' */ g->finobj = o; l_setbit(ho->marked, SEPARATED); /* mark it as such */ if (!keepinvariant(g)) /* not keeping invariant? */ makewhite(g, o); /* "sweep" object */ else resetoldbit(o); /* see MOVE OLD rule */ } }
也就是说你单独对 metatable 的修改并不会触发 luaC_checkfinalizer 函数的对象分类操作。
为什么会这样?原因很简单,性能。表的操作太频繁了,所以不能在对表的赋值操作中插入 luaC_checkfinalizer 函数。
2. __gc 元方法执行
__gc 元方法执行的时候,弱引用特点:
弱键强值表可访问(以对象自身为键)
弱值表不可访问(以对象自身为值)
见下面代码:
1 mt = {__gc = function(self) 2 print("self as key:", tk[self]) 3 print("self as value:", tv[self.i]) 4 print("self as key and value:", tkv[self]) 5 end 6 } 7 8 tk = setmetatable({}, {__mode='k'}) 9 tv = setmetatable({}, {__mode='v'}) 10 tkv = setmetatable({}, {__mode='kv'}) 11 12 obj = setmetatable({}, mt) 13 obj.i = 1 14 15 16 tk[obj] = 'obj value' 17 tv[obj.i] = obj 18 tkv[obj] = obj 19 20 collectgarbage() 21 22 print '----' 23 24 obj = nil 25 26 27 collectgarbage() 28 print '----'
输出如下:
---- self as key: obj value self as value: nil self as key and value: nil ----
从上面的输出可以看到 __gc 元方法调用时,仅仅 tk 表中以自身为键的的值还可以访问。为什么仅仅是键可访问,而不是值?主要是基于这点:通常我们用表以该对象为弱键,来保存该对象的备注信息。可参考 PIL 中的 17.2 关联对象属性 和 lua 邮件组中的讨论。
筛选可释放对象,实现细节(有删减):
static l_mem atomic (lua_State *L) { global_State *g = G(L);
// 弱表(弱值、弱键弱值)中清除不可达的值 /* at this point, all strongly accessible objects are marked. */ /* clear values from weak tables, before checking finalizers */ clearvalues(g, g->weak, NULL); clearvalues(g, g->allweak, NULL);
// 将不可达对象从 finobj 链移入 tobefnz 链 separatetobefnz(L, 0); /* separate objects to be finalized */
// 将 tobefnz 链中所有对象及其引用的对象,都标记为可达对象 markbeingfnz(g); /* mark objects that will be finalized */ propagateall(g); /* remark, to propagate `preserveness' */
// 弱表(弱键、弱键弱值)中清理不可达的 键 /* at this point, all resurrected objects are marked. */ /* remove dead objects from weak tables */ clearkeys(g, g->ephemeron, NULL); /* clear keys from all ephemeron tables */ clearkeys(g, g->allweak, NULL); /* clear keys from all allweak tables */ return work; /* estimate of memory marked by 'atomic' */ }
可以看到, 先清理弱表中的键,再复活待执行对象,这时才开始清理弱表中的值。
g->allweak、g->weak、g->ephemeron 这三个链表存放的是弱表,下篇文章会详细介绍弱表
执行 __gc 元方法,实现细节:
/* ** performs a basic GC step */ void luaC_forcestep (lua_State *L) { global_State *g = G(L); int i; if (isgenerational(g)) generationalcollection(L); else incstep(L); // 一次最多执行 GCFINALIZENUM 个对象的 __gc 元方法 /* run a few finalizers (or all of them at the end of a collect cycle) */ for (i = 0; g->tobefnz && (i < GCFINALIZENUM || g->gcstate == GCSpause); i++) GCTM(L, 1); /* call one finalizer */ } static void GCTM (lua_State *L, int propagateerrors) { global_State *g = G(L); const TValue *tm; TValue v; setgcovalue(L, &v, udata2finalize(g)); // udata2finalize(g) 将对象从 tobefnz 链中移入 allgc 链 tm = luaT_gettmbyobj(L, &v, TM_GC); if (tm != NULL && ttisfunction(tm)) { /* is there a finalizer? */ int status; lu_byte oldah = L->allowhook; int running = g->gcrunning; L->allowhook = 0; /* stop debug hooks during GC metamethod */ g->gcrunning = 0; /* avoid GC steps */ setobj2s(L, L->top, tm); /* push finalizer... */ setobj2s(L, L->top + 1, &v); /* ... and its argument */ L->top += 2; /* and (next line) call the finalizer */ status = luaD_pcall(L, dothecall, NULL, savestack(L, L->top - 2), 0); // 执行 __gc 元方法 L->allowhook = oldah; /* restore hooks */ g->gcrunning = running; /* restore state */ if (status != LUA_OK && propagateerrors) { /* error while running __gc? */ if (status == LUA_ERRRUN) { /* is there an error object? */ const char *msg = (ttisstring(L->top - 1)) ? svalue(L->top - 1) : "no message"; luaO_pushfstring(L, "error in __gc metamethod (%s)", msg); status = LUA_ERRGCMM; /* error in __gc metamethod */ } luaD_throw(L, status); /* re-throw error */ } } }
GCTM 函数会从 tobefnz 链中移出一个对象,并放入 allobj 链,然后执行该对象的 __gc 元方法。
最后总结下对象的流向:
lua_setmetable separatetobefnz GCTM
allgc --------------------------> finobj ------------------------> tobefnz ---------------------> allgc