zoukankan      html  css  js  c++  java
  • slab分配object

    在numa架构下,slab分配object:

    3192static __always_inline void *
    3193__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
    3194{
    3195    void *objp;

    ...
    3202 objp = ____cache_alloc(cache, flags); 3203 3204 /* 3205 * We may just have run out of memory on the local node. 3206 * ____cache_alloc_node() knows how to locate memory on other nodes 3207 */ 3208 if (!objp) 3209 objp = ____cache_alloc_node(cache, flags, numa_mem_id()); 3210 3211 out: 3212 return objp; 3213}

    首先,调用____cache_alloc来分配,该函数实现如下:

    2920static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
    2921{
    2922    void *objp;
    2923    struct array_cache *ac;
    2924    bool force_refill = false;
    2925
    2926    check_irq_off();
    2927
    2928    ac = cpu_cache_get(cachep);
    2929    if (likely(ac->avail)) {
    2930        ac->touched = 1;
    2931        objp = ac_get_obj(cachep, ac, flags, false);
    2932
    2933        /*
    2934         * Allow for the possibility all avail objects are not allowed
    2935         * by the current flags
    2936         */
    2937        if (objp) {
    2938            STATS_INC_ALLOCHIT(cachep);
    2939            goto out;
    2940        }
    2941        force_refill = true;
    2942    }
    2943
    2944    STATS_INC_ALLOCMISS(cachep);
    2945    objp = cache_alloc_refill(cachep, flags, force_refill);
    2946    /*
    2947     * the 'ac' may be updated by cache_alloc_refill(),
    2948     * and kmemleak_erase() requires its correct value.
    2949     */
    2950    ac = cpu_cache_get(cachep);
    2951
    2952out:
    2953    /*
    2954     * To avoid a false negative, if an object that is in one of the
    2955     * per-CPU caches is leaked, we need to make sure kmemleak doesn't
    2956     * treat the array pointers as a reference to the object.
    2957     */
    2958    if (objp)
    2959        kmemleak_erase(&ac->entry[ac->avail]);
    2960    return objp;
    2961}

    1. 先从array cache里面去找,如果找到,返回,如果没找到,走到2.

    2.调用cache_alloc_refill来从node的shared里去找object,或者slab的partial/free list里面获取object然后填充到cpu的array cache.

    cache_alloc_refill实现如下:

    2751static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
    2752                            bool force_refill)
    2753{
    2754    int batchcount;
    2755    struct kmem_cache_node *n;
    2756    struct array_cache *ac;
    2757    int node;
    2758
    2759    check_irq_off();
    2760    node = numa_mem_id();
    2761    if (unlikely(force_refill))
    2762        goto force_grow;
    2763retry:
    2764    ac = cpu_cache_get(cachep);
    2765    batchcount = ac->batchcount;
    2766    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
    2767        /*
    2768         * If there was little recent activity on this cache, then
    2769         * perform only a partial refill.  Otherwise we could generate
    2770         * refill bouncing.
    2771         */
    2772        batchcount = BATCHREFILL_LIMIT;
    2773    }
    2774    n = get_node(cachep, node);
    2775
    2776    BUG_ON(ac->avail > 0 || !n);
    2777    spin_lock(&n->list_lock);
    2778
    2779    /* See if we can refill from the shared array */
    2780    if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
    2781        n->shared->touched = 1;
    2782        goto alloc_done;
    2783    }
    2784
    2785    while (batchcount > 0) {
    2786        struct list_head *entry;
    2787        struct page *page;
    2788        /* Get slab alloc is to come from. */
    2789        entry = n->slabs_partial.next;
    2790        if (entry == &n->slabs_partial) {
    2791            n->free_touched = 1;
    2792            entry = n->slabs_free.next;
    2793            if (entry == &n->slabs_free)
    2794                goto must_grow;
    2795        }
    2796
    2797        page = list_entry(entry, struct page, lru);
    2798        check_spinlock_acquired(cachep);
    2799
    2800        /*
    2801         * The slab was either on partial or free list so
    2802         * there must be at least one object available for
    2803         * allocation.
    2804         */
    2805        BUG_ON(page->active >= cachep->num);
    2806
    2807        while (page->active < cachep->num && batchcount--) {
    2808            STATS_INC_ALLOCED(cachep);
    2809            STATS_INC_ACTIVE(cachep);
    2810            STATS_SET_HIGH(cachep);
    2811
    2812            ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
    2813                                    node));
    2814        }
    2815
    2816        /* move slabp to correct slabp list: */
    2817        list_del(&page->lru);
    2818        if (page->active == cachep->num)
    2819            list_add(&page->lru, &n->slabs_full);
    2820        else
    2821            list_add(&page->lru, &n->slabs_partial);
    2822    }
    2823
    2824must_grow:
    2825    n->free_objects -= ac->avail;
    2826alloc_done:
    2827    spin_unlock(&n->list_lock);
    2828
    2829    if (unlikely(!ac->avail)) {
    2830        int x;
    2831force_grow:
    2832        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
    2833
    2834        /* cache_grow can reenable interrupts, then ac could change. */
    2835        ac = cpu_cache_get(cachep);
    2836        node = numa_mem_id();
    2837
    2838        /* no objects in sight? abort */
    2839        if (!x && (ac->avail == 0 || force_refill))
    2840            return NULL;
    2841
    2842        if (!ac->avail)        /* objects refilled by interrupt? */
    2843            goto retry;
    2844    }
    2845    ac->touched = 1;
    2846
    2847    return ac_get_obj(cachep, ac, flags, force_refill);
    2848}

    3. 若从n->shared里面可以transfer nr(nr>0)个object,返回,分配成功。

    4. 若n->shared也没有可用的object,则从slab的partial/free list里获取object,填充ac.

    page->active是该slab里面已经使用的object的数量。

    ac->available是ac里面可用的object的index.递减使用。

    注意2825 n->free_objects -= ac->avail;  说明当ac被填充后,该ac里面的object就认为被分配出去了。

    如果3和4均未成功transfer object到ac,只能重新申请slab。如cache_grow的实现:

    2588static int cache_grow(struct kmem_cache *cachep,
    2589        gfp_t flags, int nodeid, struct page *page)
    2590{
    2591    void *freelist;
    2592    size_t offset;
    2593    gfp_t local_flags;
    2594    struct kmem_cache_node *n;
    2595
    2596    /*
    2597     * Be lazy and only check for valid flags here,  keeping it out of the
    2598     * critical path in kmem_cache_alloc().
    2599     */
    2600    BUG_ON(flags & GFP_SLAB_BUG_MASK);
    2601    local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
    2602
    2603    /* Take the node list lock to change the colour_next on this node */
    2604    check_irq_off();
    2605    n = get_node(cachep, nodeid);
    2606    spin_lock(&n->list_lock);
    2607
    2608    /* Get colour for the slab, and cal the next value. */
    2609    offset = n->colour_next;
    2610    n->colour_next++;
    2611    if (n->colour_next >= cachep->colour)
    2612        n->colour_next = 0;
    2613    spin_unlock(&n->list_lock);
    2614
    2615    offset *= cachep->colour_off;
    2616
    2617    if (local_flags & __GFP_WAIT)
    2618        local_irq_enable();
    2619
    2620    /*
    2621     * The test for missing atomic flag is performed here, rather than
    2622     * the more obvious place, simply to reduce the critical path length
    2623     * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
    2624     * will eventually be caught here (where it matters).
    2625     */
    2626    kmem_flagcheck(cachep, flags);
    2627
    2628    /*
    2629     * Get mem for the objs.  Attempt to allocate a physical page from
    2630     * 'nodeid'.
    2631     */
    2632    if (!page)
    2633        page = kmem_getpages(cachep, local_flags, nodeid);
    2634    if (!page)
    2635        goto failed;
    2636
    2637    /* Get slab management. */
    2638    freelist = alloc_slabmgmt(cachep, page, offset,
    2639            local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
    2640    if (!freelist)
    2641        goto opps1;
    2642
    2643    slab_map_pages(cachep, page, freelist);
    2644
    2645    cache_init_objs(cachep, page);
    2646
    2647    if (local_flags & __GFP_WAIT)
    2648        local_irq_disable();
    2649    check_irq_off();
    2650    spin_lock(&n->list_lock);
    2651
    2652    /* Make slab active. */
    2653    list_add_tail(&page->lru, &(n->slabs_free));
    2654    STATS_INC_GROWN(cachep);
    2655    n->free_objects += cachep->num;
    2656    spin_unlock(&n->list_lock);
    2657    return 1;
    2658opps1:
    2659    kmem_freepages(cachep, page);
    2660failed:
    2661    if (local_flags & __GFP_WAIT)
    2662        local_irq_disable();
    2663    return 0;
    2664}

    申请完pages之后,申请slabmgmt.如下:

    2445static void *alloc_slabmgmt(struct kmem_cache *cachep,
    2446                   struct page *page, int colour_off,
    2447                   gfp_t local_flags, int nodeid)
    2448{
    2449    void *freelist;
    2450    void *addr = page_address(page);
    2451
    2452    if (OFF_SLAB(cachep)) {
    2453        /* Slab management obj is off-slab. */
    2454        freelist = kmem_cache_alloc_node(cachep->freelist_cache,
    2455                          local_flags, nodeid);
    2456        if (!freelist)
    2457            return NULL;
    2458    } else {
    2459        freelist = addr + colour_off;
    2460        colour_off += cachep->freelist_size;
    2461    }
    2462    page->active = 0;
    2463    page->s_mem = addr + colour_off;
    2464    return freelist;
    2465}

    slabmgmt可以放在slab内部,也可以放在slab外部。放在slab外部的条件如下:

    2194    /*
    2195     * Determine if the slab management is 'on' or 'off' slab.
    2196     * (bootstrapping cannot cope with offslab caches so don't do
    2197     * it too early on. Always use on-slab management when
    2198     * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
    2199     */
    2200    if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
    2201        !(flags & SLAB_NOLEAKTRACE))
    2202        /*
    2203         * Size is large, assume best to place the slab management obj
    2204         * off-slab (should allow better packing of objs).
    2205         */
    2206        flags |= CFLGS_OFF_SLAB;
    2207

    colour_off

    freelist_size

    obj…

    如果在管理节点在slab内部,结构图如上。如果开启了CONFIG_DEBUG_SLAB_LEAK宏,freelist_size后面还会有每个object的状态。

    然后初始化page和object。

  • 相关阅读:
    linux下将可执行程序进行打包
    多节点OpenMPI集群的搭建和使用
    快排
    git 出现:warning: LF will be replaced by CRLF in
    String 根据 第一个逗号去掉逗好后面的内容
    SpringBoot项目中想去掉数据库查询到的数组双括号
    mybatis-plus多条件 or 的使用
    从linux上的docker持久化mysql数据恢复到本地mysql数据库
    @Repository和@Mapper
    JavaScript动画实例:爆裂的粒子
  • 原文地址:https://www.cnblogs.com/chaozhu/p/10149430.html
Copyright © 2011-2022 走看看