用slab分配内存的不同方法的共同的入口是__cache_alloc():
- 先用should_failslab初步判断是否可以完成分配(标志位可能是NOFAIL);
- 调用__do_cache_alloc(cachep, flags, &this_cpu)来完成分配;
- 调用cache_alloc_debugcheck_after(cachep, flags, objp, caller)进行分配后检查;
- prefetchw(objp)通过预取什么的来优化指令;
- return objp。
程序的代码如下:
static __always_inline void * __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
{
unsigned long save_flags;
int this_cpu;
void *objp;
if (should_failslab(cachep, flags))
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
slab_irq_save(save_flags, this_cpu);
objp = __do_cache_alloc(cachep, flags, &this_cpu);
slab_irq_restore(save_flags, this_cpu);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
prefetchw(objp);
if (unlikely((flags & __GFP_ZERO) && objp))
memset(objp, 0, obj_size(cachep));
return objp;
}
可以看出这个分配的过程最重要的就是__do_cache_alloc(cachep, flags, &this_cpu)函数了:
- 如果设置了PF_SPREAD_SLAB|PF_MEMPOLICY表示就要从别的NODE上分配;
- 用____cache_alloc(cache, flags, this_cpu)来从CPU的缓冲中取对象,如果取不到就重新填充缓存;
- 如果上步没有分配成功,就调用____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu)来进行非缓存的那种分配,这个会用到那三个链表;
- return objp。
下面是代码,两个主要的函数会在下面详细介绍:
static __always_inline void * __do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu)
{
void *objp;
if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
objp = alternate_node_alloc(cache, flags, this_cpu);
if (objp)
goto out;
}
objp = ____cache_alloc(cache, flags, this_cpu);
if (!objp)
objp = ____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu);
out:
return objp;
}
____cache_alloc(cache, flags, this_cpu);用来从CPU的缓存中去object,这个过程还是比较简单的,就是从对应的array_cache中取object:
- 调用cpu_cache_get(cachep, *this_cpu)取得对应的array_cache;
- 如果还有剩余的,objp = ac->entry[--ac->avail];
- 否则调用cache_alloc_refill(cachep, flags, this_cpu)重新填充;
- return objp。
代码如下:
static inline void * ____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
void *objp;
struct array_cache *ac;
check_irq_off();
ac = cpu_cache_get(cachep, *this_cpu);
if (likely(ac->avail)) {
STATS_INC_ALLOCHIT(cachep);
ac->touched = 1;
objp = ac->entry[--ac->avail];
} else {
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep, flags, this_cpu);
}
return objp;
}
上面的代码中最关键的就是cache_alloc_refill(cachep, flags, this_cpu)填充过程,下面就来具体看一下执行过程:
- 用锁来保护per-cpu list并使中断可用,这些用check_irq_off()检查;
- 用cpu_cache_get(cachep, *this_cpu)取得与CPU相关的array_cache;
- 用node = numa_node_id()取得现在的node;
- cachep->nodelists[cpu_to_node(*this_cpu)]取得kmem_list3;
- 尝试充共享的链表上移过来一些;
- 循环来填充batchcount个对象;
- 一次尝试从slabs_partial、slabs_free中查看是否有object,如果没有的话就得调用cache_grow()函数了,这个在下面再详细地讨论;
- 调用ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, cpu_to_node(*this_cpu))从对应的slab中取出来填充缓存;
- 检查slab的位置是不是应该重新放移过链表了。
- 如果cache_grow()都无法解决问题,那么就得retry了。
- return ac->entry[--ac->avail]来返回一个对象。
过程还是比较简单的,具体的代码如下:
static void * cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
int node;
retry:
check_irq_off();
node = numa_node_id();
ac = cpu_cache_get(cachep, *this_cpu);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
batchcount = BATCHREFILL_LIMIT;
}
l3 = cachep->nodelists[cpu_to_node(*this_cpu)];
BUG_ON(ac->avail > 0 || !l3);
spin_lock(&l3->list_lock);
if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
goto alloc_done;
while (batchcount > 0) {
struct list_head *entry;
struct slab *slabp;
entry = l3->slabs_partial.next;
if (entry == &l3->slabs_partial) {
l3->free_touched = 1;
entry = l3->slabs_free.next;
if (entry == &l3->slabs_free)
goto must_grow;
}
slabp = list_entry(entry, struct slab, list);
check_slabp(cachep, slabp);
check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu));
BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, cpu_to_node(*this_cpu));
}
check_slabp(cachep, slabp);
list_del(&slabp->list);
if (slabp->free == BUFCTL_END)
list_add(&slabp->list, &l3->slabs_full);
else
list_add(&slabp->list, &l3->slabs_partial);
}
must_grow:
l3->free_objects -= ac->avail;
alloc_done:
spin_unlock(&l3->list_lock);
if (unlikely(!ac->avail)) {
int x;
x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu);
ac = cpu_cache_get(cachep, *this_cpu);
if (!x && ac->avail == 0) /* no objects in sight? abort */
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
ac->touched = 1;
return ac->entry[--ac->avail];
}
在slabs_partial、slabs_free中没有需要的slab的时候就需要调用cache_grow()函数来补充了:
- 前面有一大段和上面是相同的,然后计算color offset;
- kmem_flagcheck(cachep, flags)检查标志;
- 通过kmem_getpages(cachep, local_flags, nodeid)调用伙伴系统分配需要的页;
- 用alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid)分配slab需要的内存并设置;
- 调用slab_map_pages(cachep, slabp, objp)把pages映射到给定的cache和slab;
- cache_init_objs(cachep, slabp)初始化objects;
- 用list_add_tail(&slabp->list, &(l3->slabs_free))把刚申请的slab加到slabs_free中;
- 如果成功return 1,否则return 0。
整体的过程还是很清楚的,不过细节还是要再仔细看看,下面是具体的代码:
static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *objp, int *this_cpu)
{
struct slab *slabp;
size_t offset;
gfp_t local_flags;
struct kmem_list3 *l3;
BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
check_irq_off();
l3 = cachep->nodelists[nodeid];
spin_lock(&l3->list_lock);
offset = l3->colour_next;
l3->colour_next++;
if (l3->colour_next >= cachep->colour)
l3->colour_next = 0;
spin_unlock(&l3->list_lock);
offset *= cachep->colour_off;
if (local_flags & __GFP_WAIT)
slab_irq_enable_nort(*this_cpu);
slab_irq_enable_rt(*this_cpu);
kmem_flagcheck(cachep, flags);
if (!objp)
objp = kmem_getpages(cachep, local_flags, nodeid);
if (!objp)
goto failed;
slabp = alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
if (!slabp)
goto opps1;
slab_map_pages(cachep, slabp, objp);
cache_init_objs(cachep, slabp);
slab_irq_disable_rt(*this_cpu);
if (local_flags & __GFP_WAIT)
slab_irq_disable_nort(*this_cpu);
check_irq_off();
spin_lock(&l3->list_lock);
list_add_tail(&slabp->list, &(l3->slabs_free));
STATS_INC_GROWN(cachep);
l3->free_objects += cachep->num;
spin_unlock(&l3->list_lock);
return 1;
opps1:
kmem_freepages(cachep, objp);
failed:
slab_irq_disable_rt(*this_cpu);
if (local_flags & __GFP_WAIT)
slab_irq_disable_nort(*this_cpu);
return 0;
}
这个如果画一个图出来就很清楚了。
------------------------
个人理解,欢迎拍砖。