zoukankan      html  css  js  c++  java
  • groGeneric Receive Offload

    在处理 ngbe 驱动问题时, 目前发现 hook netifrecv 会使用GRO 功能:

      GRO(Generic Receive Offload)的功能将多个 TCP 数据聚合在一个skb结构,然后作为一个大数据包交付给上层的网络协议栈,以减少上层协议栈处理skb的开销,提高系统接收TCP数据包的性能。这个功能需要网卡驱动程序的支持。合并了多个skb的超级 skb能够一次性通过网络协议栈,从而减轻CPU负载。
         GRO是针对网络收包流程进行改进的,并且只有NAPI类型的驱动才支持此功能。因此如果要支持GRO,不仅要内核支持,驱动也必须调用相应的接口来开启此功能。用ethtool -K gro on来开启GRO,如果报错就说明网卡驱动本身就不支持GRO。
         GRO与TSO类似,但TSO只支持发送数据包。支持GRO的驱动会在NAPI的回调poll方法中读取数据包,然后调用GRO的接口napi_gro_receive或者napi_gro_frags来将数据包送进协议栈。

       GRO将数据送进协议栈的点有两处,一个是在napi_skb_finish里,它会通过判断dev_gro_receive的返回值,来决定是否需要将数据包送入进协议栈;还有一个点是当napi的循环执行完毕执行napi_complete或者主动调用napi_gro_complete 的时候

    gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    {
        trace_napi_gro_receive_entry(skb);
    
        skb_gro_reset_offset(skb);
    
        return napi_skb_finish(dev_gro_receive(napi, skb), skb);
    }
    static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
    {
        switch (ret) {
        case GRO_NORMAL://将数据包送进协议栈
            if (netif_receive_skb_internal(skb))
                ret = GRO_DROP;
            break;
    
        case GRO_DROP://表示skb可以被free,因为gro已经将skb合并并保存起来。
            kfree_skb(skb);
            break;
    
        case GRO_MERGED_FREE://表示skb可以被free,因为gro已经将skb合并并保存起来。
            if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
                napi_skb_free_stolen_head(skb);
            else
                __kfree_skb(skb);
            break;
       //这个表示当前数据已经被gro保存起来,但是并没有进行合并,因此skb还需要保存。
        case GRO_HELD:
        case GRO_MERGED:
            break;
        }
    
        return ret;
    }

    dev_gro_receive函数用于合并skb,并决定是否将合并后的大skb送入网络协议栈

    static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    {
        struct sk_buff **pp = NULL;
        struct packet_offload *ptype;
        __be16 type = skb->protocol;
        struct list_head *head = &offload_base;//&ptype_base[ntohs(type) & PTYPE_HASH_MASK]
        int same_flow;
        enum gro_result ret;
        int grow;
    
        if (!(skb->dev->features & NETIF_F_GRO))
            goto normal;
       /*/ gro不支持切片的ip包,因为ip切片的组包在内核的ip会做一遍,因此这里gro如果合并的话,
       没有多大意义,而且还增加复杂度
    */
        if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
            goto normal;
        /*它主要是遍历gro_list,然后给same_flow赋值,这里要注意,same_flow是一个标记,
            表示某个skb是否有可能会和当前要处理的skb是相同的流,而这里的相同会在每层都进行判断,
            也就是在设备层,ip层,tcp层都会判断,这里就是设备层的判断了。
            这里的判断很简单,有2个条件:
            1 设备是否相同
            2 mac的头必须相等
            */
        /*判断napi->gro_list链表里是否有跟skb是同一条流的,如果存在,
        则将skb合并到对应的skb里,如果不存在,返回到dev_gro_receive函数后,
        将新的skb插入到napi->gro_list的末尾,作为这条流的首包
        */    
        gro_list_prepare(napi, skb);
    
        rcu_read_lock();//开始遍历对应的协议表
        list_for_each_entry_rcu(ptype, head, list) {
            if (ptype->type != type || !ptype->callbacks.gro_receive)
                continue;
    
            skb_set_network_header(skb, skb_gro_offset(skb));
            skb_reset_mac_len(skb);
            NAPI_GRO_CB(skb)->same_flow = 0;
            NAPI_GRO_CB(skb)->flush = 0;
            NAPI_GRO_CB(skb)->free = 0;
            NAPI_GRO_CB(skb)->encap_mark = 0;
            NAPI_GRO_CB(skb)->recursion_counter = 0;
            NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
    
            /* Setup for GRO checksum validation */
            switch (skb->ip_summed) {
            case CHECKSUM_COMPLETE:
                NAPI_GRO_CB(skb)->csum = skb->csum;
                NAPI_GRO_CB(skb)->csum_valid = 1;
                NAPI_GRO_CB(skb)->csum_cnt = 0;
                break;
            case CHECKSUM_UNNECESSARY:
                NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
                NAPI_GRO_CB(skb)->csum_valid = 0;
                break;
            default:
                NAPI_GRO_CB(skb)->csum_cnt = 0;
                NAPI_GRO_CB(skb)->csum_valid = 0;
            }
            //调用对应的gro接收函数
            pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
            break;// 比如执行 gro     inet_gro_receive
        }
        rcu_read_unlock();
    
        if (&ptype->list == head)//如果是没有实现gro的协议则也直接调到normal处理
            goto normal;
        //到达这里,则说明gro_receive已经调用过了,因此进行后续的处理
        //得到same_flow
        same_flow = NAPI_GRO_CB(skb)->same_flow;
        //看是否有需要free对应的skb
        ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
    
        if (pp) {//如果返回值pp部为 非 空,则说明pp需要马上被feed进协议栈
            struct sk_buff *nskb = *pp;
    
            *pp = nskb->next;
            nskb->next = NULL;
            napi_gro_complete(nskb);
            napi->gro_count--;
        }
        //如果存在同一条流的, 说明在gro_receive流程里已经将skb合入到gro_list里了,因此这里不需在处理
        if (same_flow)//如果same_flow有设置,则说明skb已经被正确的合并,因此直接返回。
            goto ok;
        //这个skb需要直接上送协议栈,不能添加到gro_list
        if (NAPI_GRO_CB(skb)->flush)
            goto normal;
        //gro链表上一共有8条流了,则再添加新的一条流前,把链表里最老的那条流的skb先发送出去
        if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {//超过限制
            struct sk_buff *nskb = napi->gro_list;
    
            /* locate the end of the list to select the 'oldest' flow */
            while (nskb->next) {
                pp = &nskb->next;
                nskb = *pp;
            }
            *pp = NULL;
            nskb->next = NULL;
            napi_gro_complete(nskb);
        } else {
            napi->gro_count++;
        }
       /*到达这里说明skb对应gro list来说是一个新的skb,
            也就是说当前的gro list并不存在可以和skb合并的数据包,
            因此此时将这个skb插入到gro_list的头。
            */ //走到这里说明,待合入的skb是这条流的首包,因此将其挂到gro_list里,
        NAPI_GRO_CB(skb)->count = 1;
        NAPI_GRO_CB(skb)->age = jiffies;
        NAPI_GRO_CB(skb)->last = skb;
        skb_shinfo(skb)->gso_size = skb_gro_len(skb);
        skb->next = napi->gro_list;
        napi->gro_list = skb;
        ret = GRO_HELD;
    
    pull:
        /*
    有些包的包头会存在skb->frag[0]里面,gro合并时会调用skb_gro_header_slow将包头拉到线性空间中,
    那么在非线性skb->frag[0]中的包头部分就应该删掉。
    */
        grow = skb_gro_offset(skb) - skb_headlen(skb);
        if (grow > 0)
            gro_pull_from_frag0(skb, grow);
    ok:
        return ret;
    
    normal:
        ret = GRO_NORMAL;
        goto pull;
    }

     inet_gro_receive 函数是网络层skb聚合处理函数:

    /*接下来就是inet_gro_receive,这个函数是ip层的gro receive回调函数,函数很简单,
    首先取得ip头,然后判断是否需要从frag复制数据,如果需要则复制数据
    */static struct sk_buff **inet_gro_receive(struct sk_buff **head,
                         struct sk_buff *skb)
    {
        const struct net_offload *ops;
        struct sk_buff **pp = NULL;
        struct sk_buff *p;
        const struct iphdr *iph;
        unsigned int hlen;
        unsigned int off;
        unsigned int id;
        int flush = 1;
        int proto;
    
        off = skb_gro_offset(skb);//得到偏移
        hlen = off + sizeof(*iph);//得到头的整个长度(mac+ip)
        iph = skb_gro_header_fast(skb, off);//得到ip头
        if (skb_gro_header_hard(skb, hlen)) {//是否需要复制
            iph = skb_gro_header_slow(skb, hlen, off);
            if (unlikely(!iph))
                goto out;
        }
    
        proto = iph->protocol;
    
        rcu_read_lock();
        ops = rcu_dereference(inet_offloads[proto]);
        if (!ops || !ops->callbacks.gro_receive)
            goto out_unlock;//如协议是否支持gro_reveive,ip头是否合法等等
        //ip头是否合法, iph->version = 4, iph->ipl = 5
        if (*(u8 *)iph != 0x45)
            goto out_unlock;
        //ip头crc
        if (unlikely(ip_fast_csum((u8 *)iph, 5)))
            goto out_unlock;
    
        id = ntohl(*(__be32 *)&iph->id);
        flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
        id >>= 16;
    /*然后就是核心的处理部分,它会遍历整个gro_list,然后进行same_flow和是否需要flush的判断。
    这里ip层设置same_flow是根据下面的规则的:
    1 4层的协议必须相同
    2 tos域必须相同
    3 源,目的地址必须相同
    如果3个条件一个不满足,则会设置same_flow为0。 
    这里还有一个就是判断是否需要flush 对应的skb到协议栈,这里的判断条件是这样子的。
    1 ip包的ttl不一样
    2 ip包的id顺序不对
    3 如果是切片包
    如果上面两个条件某一个满足,则说明skb需要被flush出gro。
    */
        for (p = *head; p; p = p->next) {
            struct iphdr *iph2;
            //如果上一层已经不可能same flow则直接继续下一个
            if (!NAPI_GRO_CB(p)->same_flow)
                continue;
    
            iph2 = (struct iphdr *)(p->data + off);
            /* The above works because, with the exception of the top
             * (inner most) layer, we only aggregate pkts with the same
             * hdr length so all the hdrs we'll need to verify will start
             * at the same offset.
             *///取出ip头  //开始same flow的判断
            if ((iph->protocol ^ iph2->protocol) |
                ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
                ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
                NAPI_GRO_CB(p)->same_flow = 0;
                continue;
            }
           //开始flush的判断。这里注意如果不是same_flow的话,就没必要进行flush的判断。
            /* All fields must match except length and checksum. */
            NAPI_GRO_CB(p)->flush |=
                (iph->ttl ^ iph2->ttl) |
                (iph->tos ^ iph2->tos) |
                ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
    
            /* Save the IP ID check to be included later when we get to
             * the transport layer so only the inner most IP ID is checked.
             * This is because some GSO/TSO implementations do not
             * correctly increment the IP ID for the outer hdrs.
             *///pull ip头进gro,这里更新data_offset
            NAPI_GRO_CB(p)->flush_id =
                    ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
            NAPI_GRO_CB(p)->flush |= flush;
        }
    
        NAPI_GRO_CB(skb)->flush |= flush;
        skb_set_network_header(skb, off);
        /* The above will be needed by the transport layer if there is one
         * immediately following this IP hdr.
         */
    
        /* Note : No need to call skb_gro_postpull_rcsum() here,
         * as we already checked checksum over ipv4 header was 0
         */
        skb_gro_pull(skb, sizeof(*iph));//设置传输层的头的位置
        skb_set_transport_header(skb, skb_gro_offset(skb));
        //调用传输层的gso      reveive方法。 然后就是tcp层的gro方法,
        //它的主要实现函数是tcp_gro_receive 他的流程和inet_gro_receiv类似,就是取得tcp的头,然后对gro list进行遍历,最终会调用合并方法。
        pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
    
    out_unlock:
        rcu_read_unlock();
    
    out:
        NAPI_GRO_CB(skb)->flush |= flush;
    
        return pp;
    }

     tcp4_gro_receive/tcp_gro_receive函数 是传输层skb聚合处理函数:其会调用 skb_gro_receive 用于合并同流的skb

    2942 int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
    2943 {
    2944     struct sk_buff *p = *head;
    2945     struct sk_buff *nskb;
    2946     struct skb_shared_info *skbinfo = skb_shinfo(skb);
    2947     struct skb_shared_info *pinfo = skb_shinfo(p);
    2948     unsigned int headroom;
    2949     unsigned int len = skb_gro_len(skb);
    2950     unsigned int offset = skb_gro_offset(skb);
    2951     unsigned int headlen = skb_headlen(skb);
    2952     unsigned int delta_truesize;
    2953
    2954     if (p->len + len >= 65536)
    2955         return -E2BIG;
    2956
    2957     if (pinfo->frag_list)  //frag_list中有skb,证明不支持分散-聚集IO
    2958         goto merge;
    2959     else if (headlen <= offset) {//有一部分头在page中
    2960         skb_frag_t *frag;
    2961         skb_frag_t *frag2;
    2962         int i = skbinfo->nr_frags;
    2963         int nr_frags = pinfo->nr_frags + i;
    2964
    2965         offset -= headlen;
    2966
    2967         if (nr_frags > MAX_SKB_FRAGS)
    2968             return -E2BIG;
    2969
    2970         pinfo->nr_frags = nr_frags;
    2971         skbinfo->nr_frags = 0;
    2972
    2973         frag = pinfo->frags + nr_frags;
    2974         frag2 = skbinfo->frags + i;
    2975         do {//遍历赋值,将skb的frag加到pinfo的frgas后面
    2976             *--frag = *--frag2;
    2977         } while (--i);
    2978
    2979         frag->page_offset += offset;//去除剩余的头,只保留数据部分
    2980         skb_frag_size_sub(frag, offset);
    2981
    2982         /* all fragments truesize : remove (head size + sk_buff) */
    2983         delta_truesize = skb->truesize -
    2984                  SKB_TRUESIZE(skb_end_offset(skb));
    2985
    2986         skb->truesize -= skb->data_len;
    2987         skb->len -= skb->data_len;
    2988         skb->data_len = 0;
    2989
    2990         NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
    2991         goto done;
    2992     } else if (skb->head_frag) {//支持分散-聚集IO
    2993         int nr_frags = pinfo->nr_frags;
    2994         skb_frag_t *frag = pinfo->frags + nr_frags;
    2995         struct page *page = virt_to_head_page(skb->head);
    2996         unsigned int first_size = headlen - offset;
    2997         unsigned int first_offset;
    2998
    2999         if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
    3000             return -E2BIG;
    3001
    3002         first_offset = skb->data -
    3003                    (unsigned char *)page_address(page) +
    3004                    offset;
    3005
    3006         pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
    3007
    3008         frag->page.p      = page;
    3009         frag->page_offset = first_offset;
    3010         skb_frag_size_set(frag, first_size);
    3011
    3012         memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
    3013         /* We dont need to clear skbinfo->nr_frags here */
    3014
    3015         delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
    3016         NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
    3017         goto done;
    3018     } else if (skb_gro_len(p) != pinfo->gso_size)
    3019         return -E2BIG;
    3020 //不支持分散-聚集IO,则网卡不会将数据放在skb的frags数组中
    3021     headroom = skb_headroom(p);
    3022     nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);//申请新的skb
    3023     if (unlikely(!nskb))
    3024         return -ENOMEM;
    3025
    3026     __copy_skb_header(nskb, p);
    3027     nskb->mac_len = p->mac_len;
    3028
    3029     skb_reserve(nskb, headroom);
    3030     __skb_put(nskb, skb_gro_offset(p));
    3031
    3032     skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
    3033     skb_set_network_header(nskb, skb_network_offset(p));
    3034     skb_set_transport_header(nskb, skb_transport_offset(p));
    3035
    3036     __skb_pull(p, skb_gro_offset(p));
    3037     memcpy(skb_mac_header(nskb), skb_mac_header(p),
    3038            p->data - skb_mac_header(p));
    3039
    3040     skb_shinfo(nskb)->frag_list = p;//将旧GRO队列头放入frag_list队列中
    3041     skb_shinfo(nskb)->gso_size = pinfo->gso_size;
    3042     pinfo->gso_size = 0;
    3043     skb_header_release(p);
    3044     NAPI_GRO_CB(nskb)->last = p;
    3045
    3046     nskb->data_len += p->len;
    3047     nskb->truesize += p->truesize;
    3048     nskb->len += p->len;
    3049
    3050     *head = nskb;
    3051     nskb->next = p->next;
    3052     p->next = NULL;
    3053
    3054     p = nskb;
    3055
    3056 merge:
    3057     delta_truesize = skb->truesize;
    3058     if (offset > headlen) {
    3059         unsigned int eat = offset - headlen;
    3060
    3061         skbinfo->frags[0].page_offset += eat;
    3062         skb_frag_size_sub(&skbinfo->frags[0], eat);
    3063         skb->data_len -= eat;
    3064         skb->len -= eat;
    3065         offset = headlen;
    3066     }
    3067
    3068     __skb_pull(skb, offset);
    3069
    3070     NAPI_GRO_CB(p)->last->next = skb;//将包放入GRO队列中
    3071     NAPI_GRO_CB(p)->last = skb;
    3072     skb_header_release(skb);
    3073
    3074 done:
    3075     NAPI_GRO_CB(p)->count++;
    3076     p->data_len += len;
    3077     p->truesize += delta_truesize;
    3078     p->len += len;
    3079
    3080     NAPI_GRO_CB(skb)->same_flow = 1;  //标识当前skb已经找到同流的skb并进行了合并
    3081     return 0;
    3082 }

       当网卡支持分散-聚集IO时,GRO会将多个skb合并到一个skb的frag page数组中,否则会合并到skb的的frag_list中:

       即使在上述流程中skb被放入GRO队列中保存而没有被立即送入协议栈,它们也不会在队列中滞留太长时间,因为在收包软中断中会调用napi_gro_flush函数将GRO队列中的包送入协议栈:

    /* napi->gro_list contains packets ordered by age.
     * youngest packets at the head of it.
     * Complete skbs in reverse order to reduce latencies.
     */
    void napi_gro_flush(struct napi_struct *napi, bool flush_old)
    {
        struct sk_buff *skb, *prev = NULL;
    
        /* scan list and build reverse chain */
        for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
            skb->prev = prev;
            prev = skb;//按照从老到新的顺序构建链表  反转链表
        }
    
        for (skb = prev; skb; skb = prev) {
            skb->next = NULL;
    
            if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
                return;
    
            prev = skb->prev;
            napi_gro_complete(skb);
            napi->gro_count--;
        }
    
        napi->gro_list = NULL;
    }

      包加入GRO队列的时间比当前仅晚1个jiffies也会被视作旧包并交付协议栈处理,可见如果软中断每个jiffies都调用一次napi_gro_flush函数的话,开启GRO功能最多增加1个jiffies(1ms或10ms)的延迟 .

      GRO的基本原理是将MAC层、IP层和TCP层都能合并的包的头只留一个,数据部分在frag数组或frag_list中存储,这样大大提高了包携带数据的效率。

      在完成GRO处理后,skb会被交付到Linux网络协议栈入口进行协议处理。聚合后的skb在被送入到网络协议栈后,在网络层协议、TCP协议处理函数中会调用pskb_may_pull函数将GRO skb的数据整合到线性空间:

    pskb_may_pull的整合保证了TCP首部数据全部被放入线性空间,从而使GRO不影响TCP协议的处理

    int tcp_v4_rcv(struct sk_buff *skb)
    {
        const struct iphdr *iph;
        const struct tcphdr *th;
        struct sock *sk;
        int ret;
        struct net *net = dev_net(skb->dev);
    ------------------------------------------------------------
        if (!pskb_may_pull(skb, th->doff * 4))
            goto discard_it;
    
    ---------------------------------------
    }
    /**
     *    __pskb_pull_tail - advance tail of skb header
     *    @skb: buffer to reallocate
     *    @delta: number of bytes to advance tail
     *
     *    The function makes a sense only on a fragmented &sk_buff,
     *    it expands header moving its tail forward and copying necessary
     *    data from fragmented part.
     *
     *    &sk_buff MUST have reference count of 1.
     *
     *    Returns %NULL (and &sk_buff does not change) if pull failed
     *    or value of new tail of skb in the case of success.
     *
     *    All the pointers pointing into skb header may change and must be
     *    reloaded after call to this function.
     */
    
    /* Moves tail of skb head forward, copying data from fragmented part,
     * when it is necessary.
     * 1. It may fail due to malloc failure.
     * 2. It may change skb pointers.
     *
     * It is pretty complicated. Luckily, it is called only in exceptional cases.
     */
    unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
    {
        /* If skb has not enough free space at tail, get new one
         * plus 128 bytes for future expansions. If we have enough
         * room at tail, reallocate without expansion only if skb is cloned.
         */
        int i, k, eat = (skb->tail + delta) - skb->end;
    // eat 大于0  表示 tail如果移动len-head_len 就会超出 end区域 也就是为去除当前skb可用内存,还需要多少内存
    // 如果skb 是已经被cloned multiple shared copies
        if (eat > 0 || skb_cloned(skb)) {
            if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
                         GFP_ATOMIC))
                return NULL;
        }
        //当前skb可用内存 足够pull
        // head---data----tail----end----frag
        /*
            delta ==== pull len   headlen== len--data_len === linerdata
        */
    //end---tai > delta 可以直接copy 到 线性区内存块----从skb的offset(skb->tail),拷贝delta个字节到skb->tail之后
        if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
                BUG();
    
        /* Optimization: no fragments, no reasons to preestimate
         * size of pulled pages. Superb.
         *///没有分段
        if (!skb_has_frag_list(skb))
            goto pull_pages;
        //由于数据已经拷贝到了skb->data中,因此需要释放frags,frag_list中被拷贝过的数据
            //计算从frags数组中拷贝的数据量
        /* Estimate size of pulled pages. */
        eat = delta; 
        //寻找到满足eat这么多数据量的最后一个page
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
            int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
    
            if (size >= eat)
                goto pull_pages;
            eat -= size;
        }
    
        /* If we need update frag list, we are in troubles.
         * Certainly, it possible to add an offset to skb data,
         * but taking into account that pulling is expected to
         * be very rare operation, it is worth to fight against
         * further bloating skb head and crucify ourselves here instead.
         * Pure masohism, indeed. 8)8)
         *///eat仍不为0,说明从frag_list中进行了拷贝,释放frag_list
        if (eat) {skb_shared_info
            struct sk_buff *list = skb_shinfo(skb)->frag_list;
            struct sk_buff *clone = NULL;
            struct sk_buff *insp = NULL;
    
            do {
                BUG_ON(!list);
    
                if (list->len <= eat) {
                    /* Eaten as whole. */
                    eat -= list->len;
                    list = list->next;
                    insp = list;
                } else {
                    /* Eaten partially. */
    
                    if (skb_shared(list)) {
                        /* Sucks! We need to fork list. :-( */
                        clone = skb_clone(list, GFP_ATOMIC);
                        if (!clone)
                            return NULL;
                        insp = list->next;
                        list = clone;
                    } else {
                        /* This may be pulled without
                         * problems. */
                        insp = list;
                    }
                    if (!pskb_pull(list, eat)) {
                        kfree_skb(clone);
                        return NULL;
                    }
                    break;
                }
            } while (eat);
    //list指向frag_list头
     //直到list遍历到数据量足够的最后一个skb
            /* Free pulled out fragments. */
            while ((list = skb_shinfo(skb)->frag_list) != insp) {
                skb_shinfo(skb)->frag_list = list->next;
                kfree_skb(list);
            }
            /* And insert new clone at head. */
            if (clone) {
                clone->next = list;
                skb_shinfo(skb)->frag_list = clone;
            }
        }
        /* Success! Now we may commit changes to skb data. */
    
    pull_pages:
        eat = delta;
        k = 0;//释放frags中的page
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
            int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
    
            if (size <= eat) {
                skb_frag_unref(skb, i);
                eat -= size;
            } else {
                skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
                if (eat) {
                    skb_shinfo(skb)->frags[k].page_offset += eat;
                    skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
                    eat = 0;
                }
                k++;
            }
        }
        skb_shinfo(skb)->nr_frags = k;
    
        skb->tail     += delta;
        skb->data_len -= delta;
    
        return skb_tail_pointer(skb);
    }
    View Code
    http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!! 但行好事 莫问前程 --身高体重180的胖子
  • 相关阅读:
    DRF JWT认证基础
    Shell简单脚本
    DockerFile案例
    Shell基础知识
    DockerFile基础知识
    tensorflow
    使用cv2将图片分割成相等的部分
    序列化心得
    python正则化表达式
    python--匿名函数(lambda)
  • 原文地址:https://www.cnblogs.com/codestack/p/14863202.html
Copyright © 2011-2022 走看看