zoukankan      html  css  js  c++  java
  • sk_buff

    他里面包含了各层协议的头部,比如ethernet, ip ,tcp ,udp等等。熟悉他是进一步了解Linux网络协议栈的基础
    此结构定义头文件

    <include/linux/skbuff.h>
    struct sk_buff_head {
          /* These two members must be first. */
          struct sk_buff *next;
          struct sk_buff *prev;
          __u32       qlen; //代表元素节点数目
          spinlock_t      lock; //加锁,防止对表的并发访问
    };

    sk_buff_head

    管理函数

    int skb_headroom(const struct sk_buff *skb)
    int skb_tailroom(const struct sk_buff *skb)

    skb_headroom通常比较小(只有48),用的时候小心越界
    skb_tailroom大小1500,足够自由调整
    skb

    //同时增加len和tail。用于向数据报尾部追加数据。返回原来tail所在位置
    unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 

    skb_put

    //将data指针上移并增加len长度。这个函数用来向头部添加一些数据。当然前提是有足够的headroom
    unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 

    skb_push

    //将data指针下移,并减小len的值。这个函数一般用来除去某个头部
    unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 

    skb_pull

    //将data指针和tail指针同时下移。这个操作在存储空间的头部预留len长度的空隙
    void skb_reserve(struct sk_buff *skb, unsigned int len) 

    skb_reserve

    //将网络报文的长度缩减到len。这个操作丢弃了网络报文尾部的填充值
    void skb_trim(struct sk_buff *skb, unsigned int len) 

    skb_trim
    下图是穿过协议栈从tcp层向下到链路层的过程
    tcp

    分配内存

    alloc_skb 分配缓冲区和一个sk_buff结构
    dev_alloc_skb 设备驱动程序使用的缓冲区分配函数

    释放内存

    kfree_skb 只有skb->users计数器为1时才释放
    dev_kfree_skb

    缓冲区克隆

    skb_clone

    sk_buff

    /** 
     *   struct sk_buff - socket buffer
     *   @next: Next buffer in list
     *   @prev: Previous buffer in list
     *   @tstamp: Time we arrived
     *   @sk: Socket we are owned by
     *   @dev: Device we arrived on/are leaving by
     *   @cb: Control buffer. Free for use by every layer. Put private vars here
     *   @_skb_refdst: destination entry (with norefcount bit)
     *   @sp: the security path, used for xfrm
     *   @len: Length of actual data
     *   @data_len: Data length
     *   @mac_len: Length of link layer header
     *   @hdr_len: writable header length of cloned skb
     *   @csum: Checksum (must include start/offset pair)
     *   @csum_start: Offset from skb->head where checksumming should start
     *   @csum_offset: Offset from csum_start where checksum should be stored
     *   @priority: Packet queueing priority
     *   @local_df: allow local fragmentation
     *   @cloned: Head may be cloned (check refcnt to be sure)
     *   @ip_summed: Driver fed us an IP checksum
     *   @nohdr: Payload reference only, must not modify header
     *   @nfctinfo: Relationship of this skb to the connection
     *   @pkt_type: Packet class
     *   @fclone: skbuff clone status
     *   @ipvs_property: skbuff is owned by ipvs
     *   @peeked: this packet has been seen already, so stats have been
     *        done for it, don't do them again
     *   @nf_trace: netfilter packet trace flag
     *   @protocol: Packet protocol from driver
     *   @nfct: Associated connection, if any
     *   @nfct_reasm: netfilter conntrack re-assembly pointer
     *   @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
     *   @skb_iif: ifindex of device we arrived on
     *   @tc_index: Traffic control index
     *   @tc_verd: traffic control verdict
     *   @rxhash: the packet hash computed on receive
     *   @queue_mapping: Queue mapping for multiqueue devices
     *   @ndisc_nodetype: router type (from link layer)
     *   @ooo_okay: allow the mapping of a socket to a queue to be changed
     *   @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
     *        ports.
     *   @wifi_acked_valid: wifi_acked was set
     *   @wifi_acked: whether frame was acked on wifi or not
     *   @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
     *   @dma_cookie: a cookie to one of several possible DMA operations
     *        done by skb DMA functions
     *   @secmark: security marking
     *   @mark: Generic packet mark
     *   @dropcount: total number of sk_receive_queue overflows
     *   @vlan_tci: vlan tag control information
     *   @inner_transport_header: Inner transport layer header (encapsulation)
     *   @inner_network_header: Network layer header (encapsulation)
     *   @transport_header: Transport layer header
     *   @network_header: Network layer header
     *   @mac_header: Link layer header
     *   @tail: Tail pointer
     *   @end: End pointer
     *   @head: Head of buffer
     *   @data: Data head pointer
     *   @truesize: Buffer size
     *   @users: User count - see {datagram,tcp}.c
     *   @destructor: Destruct function
     */
    
    struct sk_buff {
         /* These two members must be first. */
         struct sk_buff      *next;
         struct sk_buff      *prev;
    
         ktime_t             tstamp; //skb接收包的时间
    
         struct sock         *sk; //从属于哪个socket,被4层用到
         struct net_device   *dev; //一个网络设备,当发出包时它表示输出设备,当接收包时它表示输入设备
    
         /*
          * This is the control buffer. It is free to use for every
          * layer. Please put your private variables there. If you
          * want to keep them across layers you have to do a skb_clone()
          * first. This is owned by whoever has the skb queued ATM.
          */
         char           cb[48] __aligned(8); //保存每层的控制信息
    
         unsigned long       _skb_refdst; //用于路由子系统。保存了一些路由相关信息
    #ifdef CONFIG_XFRM
         struct    sec_path  *sp;
    #endif
         unsigned int        len, //当前skb中的数据长度,这个值会随着从一层到另一层而改变
                        data_len; //分片数据长度
         __u16               mac_len, //mac头长度
                        hdr_len; //clone的skb头长度
         union {
              __wsum         csum;
              struct {
                   __u16     csum_start;
                   __u16     csum_offset;
              };
         };
         __u32               priority; //优先级。用于QOS
         kmemcheck_bitfield_begin(flags1);
         __u8           local_df:1, //是否可以本地切片
                        cloned:1, //头是否被clone
                        ip_summed:2, //硬件驱动是否已经进行了校验
                        nohdr:1, //skb的头指针是否分配完毕(比如head和data)
                        nfctinfo:3; 
         __u8           pkt_type:3, //数据包的类型
                        fclone:2, //clone标记。在fast clone中被设置
                        ipvs_property:1, //ipvs拥有的域
                        peeked:1,
                        nf_trace:1;
         kmemcheck_bitfield_end(flags1);
         __be16              protocol; //L3层的协议
    
    #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
         struct nf_conntrack *nfct;
    #endif
    #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
         struct sk_buff      *nfct_reasm;
    #endif
    #ifdef CONFIG_BRIDGE_NETFILTER
         struct nf_bridge_info    *nf_bridge;
    #endif
    
         int            skb_iif; //接收设备的index
    
         __u32               rxhash;
    
         __u16               vlan_tci;
    
    //流量控制的相关域
    #ifdef CONFIG_NET_SCHED
         __u16               tc_index; /* traffic control index */
    #ifdef CONFIG_NET_CLS_ACT
         __u16               tc_verd;  /* traffic control verdict */
    #endif
    #endif
    
         __u16               queue_mapping; //多队列设备的映射,映射到那个队列
         kmemcheck_bitfield_begin(flags2);
    #ifdef CONFIG_IPV6_NDISC_NODETYPE
         __u8           ndisc_nodetype:2;
    #endif
         __u8           pfmemalloc:1;
         __u8           ooo_okay:1;
         __u8           l4_rxhash:1;
         __u8           wifi_acked_valid:1;
         __u8           wifi_acked:1;
         __u8           no_fcs:1;
         __u8           head_frag:1;
         /* Encapsulation protocol and NIC drivers should use
          * this flag to indicate to each other if the skb contains
          * encapsulated packet or not and maybe use the inner packet
          * headers if needed
          */
         __u8           encapsulation:1;
         /* 7/9 bit hole (depending on ndisc_nodetype presence) */
         kmemcheck_bitfield_end(flags2);
    
    #ifdef CONFIG_NET_DMA
         dma_cookie_t        dma_cookie;
    #endif
    #ifdef CONFIG_NETWORK_SECMARK
         __u32               secmark;
    #endif
         union {
              __u32          mark; //skb的标记
              __u32          dropcount;
              __u32          reserved_tailroom;
         };
    
         sk_buff_data_t      inner_transport_header;
         sk_buff_data_t      inner_network_header;
         sk_buff_data_t      transport_header; //传输层的头
         sk_buff_data_t      network_header; //网络层的头
         sk_buff_data_t      mac_header; //链路层的头
         /* These elements must be at the end, see alloc_skb() for details.  */
         sk_buff_data_t      tail; //skb数据操作指针
         sk_buff_data_t      end;
         unsigned char       *head,
                        *data;
         unsigned int        truesize; //整个skb的大小
         atomic_t       users; //skb的引用计数
    
         void           (*destructor)(struct sk_buff *skb); //skb的析构函数(sock_rfree和sock_wfree)
    };

    skb_put

    static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
    {
        unsigned char *tmp = skb_tail_pointer(skb);
        SKB_LINEAR_ASSERT(skb);
        skb->tail += len;
        skb->len  += len;
        return tmp;
    }

    skb_push

    static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
    {
        skb->data -= len;
        skb->len  += len;
        return skb->data;
    }

    skb_pull

    static inline unsigned char *__skb_pull(struct sk_buff *skb , unsigned int len)  
    {  
        skb->len -= len ;  
        BUG_ON(skb->len < skb->data_len ) ;  
        return skb->data += len ;  
    }  

    alloc_skb

    struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                    int fclone, int node)
    {
        struct kmem_cache *cache;
        struct skb_shared_info *shinfo;
        struct sk_buff *skb;
        u8 *data;
    
    //这里通过fclone的值来判断是要从fclone cache还是说从head cache中取
        cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
    
    //首先是分配skb,也就是包头
        skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
        if (!skb)
            goto out;
    //首先将size对齐,这里是按一级缓存的大小来对齐
        size = SKB_DATA_ALIGN(size);
    //然后是数据区的大小,大小为size+ sizeof(struct skb_shared_info的大小
        data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
                gfp_mask, node);
        if (!data)
            goto nodata;
    
    //初始化相关域
        memset(skb, 0, offsetof(struct sk_buff, tail));
    //这里truesize可以看到就是我们分配的整个skb+data的大小
        skb->truesize = size + sizeof(struct sk_buff);
    //users加一
        atomic_set(&skb->users, 1);
    //一开始head和data是一样大的
        skb->head = data;
        skb->data = data;
    //设置tail指针
        skb_reset_tail_pointer(skb);
    //一开始tail也就是和data是相同的
        skb->end = skb->tail + size;
        kmemcheck_annotate_bitfield(skb, flags1);
        kmemcheck_annotate_bitfield(skb, flags2);
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        skb->mac_header = ~0U;
    #endif
    
    //初始化shinfo,这个我就不介绍了,前面的blog分析切片时,这个结构很详细的分析过了
        shinfo = skb_shinfo(skb);
        atomic_set(&shinfo->dataref, 1);
        shinfo->nr_frags  = 0;
        shinfo->gso_size = 0;
        shinfo->gso_segs = 0;
        shinfo->gso_type = 0;
        shinfo->ip6_frag_id = 0;
        shinfo->tx_flags.flags = 0;
        skb_frag_list_init(skb);
        memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
    
    //fclone为1,说明多分配了一块内存,因此需要设置对应的fclone域
        if (fclone) {
    //可以看到多分配的内存刚好在当前的skb的下方
            struct sk_buff *child = skb + 1;
            atomic_t *fclone_ref = (atomic_t *) (child + 1);
    
            kmemcheck_annotate_bitfield(child, flags1);
            kmemcheck_annotate_bitfield(child, flags2);
    //设置标记。这里要注意,当前的skb和多分配的skb设置的fclone是不同的
            skb->fclone = SKB_FCLONE_ORIG;
            atomic_set(fclone_ref, 1);
    
            child->fclone = SKB_FCLONE_UNAVAILABLE;
        }
    out:
        return skb;
    nodata:
        kmem_cache_free(cache, skb);
        skb = NULL;
        goto out;
    }

    skb_linearize

    int skb_linearize(struct sk_buff *skb, gfp_t gfp) 

    将frag_list链表里面的数据包整合成一个报文

    skb_clone

    
    struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
    {
        struct sk_buff *n;
    
    //n为skb紧跟着那块内存,这里如果skb是通过skb_fclone分配的,那么n就是一个skb
        n = skb + 1;
    //skb和n的fclone都要符合要求,可以看到这里的值就是我们在__alloc_skb中设置的值
        if (skb->fclone == SKB_FCLONE_ORIG &&
            n->fclone == SKB_FCLONE_UNAVAILABLE) {
    //到这里,就说明我们不需要alloc一个skb,直接取n就可以了,并且设置fclone的标记。并修改引用计数
            atomic_t *fclone_ref = (atomic_t *) (n + 1);
            n->fclone = SKB_FCLONE_CLONE;
            atomic_inc(fclone_ref);
        } else {
    
    //这里就需要从cache中取得一块内存
            n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
            if (!n)
                return NULL;
    
            kmemcheck_annotate_bitfield(n, flags1);
            kmemcheck_annotate_bitfield(n, flags2);
    //设置新的skb的fclone域。这里我们新建的skb,没有被fclone的都是这个标记
            n->fclone = SKB_FCLONE_UNAVAILABLE;
        }
    
        return __skb_clone(n, skb);
    }

    只是复制sk_buff结构,并不复制skb的数据缓冲区。Clone后的sk_buff结构与原始的sk_buff指向同一数据缓冲区。原始的和clone后的skb描述符的cloned值都会被置1,clone的skb描述符的users值置1,同时数据缓冲区的引用计数dataref增加1
    skb_clone()操作的skb结构的数据缓冲区是不能被修改的

    pskb_copy

    struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
    {
        /*
         *  Allocate the copy buffer
         */
        struct sk_buff *n;
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        n = alloc_skb(skb->end, gfp_mask);
    #else
        n = alloc_skb(skb->end - skb->head, gfp_mask);
    #endif
        if (!n)
            goto out;
    
        /* Set the data pointer */
        skb_reserve(n, skb->data - skb->head);
        /* Set the tail pointer and length */
        skb_put(n, skb_headlen(skb));
    //复制线性数据段
        skb_copy_from_linear_data(skb, n->data, n->len);
    //更新相关域
        n->truesize += skb->data_len;
        n->data_len  = skb->data_len;
        n->len       = skb->len;
    
    //下面只是复制切片数据的指针
    if (skb_shinfo(skb)->nr_frags) {
            int i;
    
            for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
                get_page(skb_shinfo(n)->frags[i].page);
            }
            skb_shinfo(n)->nr_frags = i;
        }
    
        copy_skb_header(n, skb);
    out:
        return n;
    }

    pskb_copy()与skb_copy()重量级拷贝,还拷贝skb->data指向的数据

    skb_copy

    struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
    {
        int headerlen = skb->data - skb->head;
        /*
         *  Allocate the copy buffer
         */
    //先alloc一个新的skb
        struct sk_buff *n;
    #ifdef NET_SKBUFF_DATA_USES_OFFSET
        n = alloc_skb(skb->end + skb->data_len, gfp_mask);
    #else
        n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
    #endif
        if (!n)
            return NULL;
    
        /* Set the data pointer */
        skb_reserve(n, headerlen);
        /* Set the tail pointer and length */
        skb_put(n, skb->len);
    //然后复制所有的数据
        if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
            BUG();
    
        copy_skb_header(n, skb);
        return n;
    }

    kfree_skb

    void kfree_skb(struct sk_buff *skb)
    {
        if (unlikely(!skb))
            return;
        if (likely(atomic_read(&skb->users) == 1))
            smp_rmb();
    //减一,然后判断
        else if (likely(!atomic_dec_and_test(&skb->users)))
            return;
        trace_kfree_skb(skb, __builtin_return_address(0));
        __kfree_skb(skb);
    }

    其他函数

    #ifdef NET_SKBUFF_DATA_USES_OFFSET
    static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
    {
        return skb->head + skb->transport_header;
    }
    static inline void skb_reset_transport_header(struct sk_buff *skb)
    {
        skb->transport_header = skb->data - skb->head;
    }
    static inline void skb_set_transport_header(struct sk_buff *skb,
             const int offset)
    {
        skb_reset_transport_header(skb);
        skb->transport_header += offset;
    }
    static inline unsigned char *skb_network_header(const struct sk_buff *skb)
    {
        return skb->head + skb->network_header;
    }
    static inline void skb_reset_network_header(struct sk_buff *skb)
    {
        skb->network_header = skb->data - skb->head;
    }
    static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
    {
        skb_reset_network_header(skb);
        skb->network_header += offset;
    }
    static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
    {
        return skb->head + skb->mac_header;
    }
    static inline int skb_mac_header_was_set(const struct sk_buff *skb)
    {
        return skb->mac_header != ~0U;
    }
    static inline void skb_reset_mac_header(struct sk_buff *skb)
    {
        skb->mac_header = skb->data - skb->head;
    }
    static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
    {
        skb_reset_mac_header(skb);
        skb->mac_header += offset;
    }
    #else /* NET_SKBUFF_DATA_USES_OFFSET */
    不使用相对偏移的情况
    static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
    {
        return skb->transport_header;
    }
    static inline void skb_reset_transport_header(struct sk_buff *skb)
    {
        skb->transport_header = skb->data;
    }
    static inline void skb_set_transport_header(struct sk_buff *skb,
             const int offset)
    {
        skb->transport_header = skb->data + offset;
    }
    static inline unsigned char *skb_network_header(const struct sk_buff *skb)
    {
        return skb->network_header;
    }
    static inline void skb_reset_network_header(struct sk_buff *skb)
    {
        skb->network_header = skb->data;
    }
    static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
    {
        skb->network_header = skb->data + offset;
    }
    static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
    {
        return skb->mac_header;
    }
    static inline int skb_mac_header_was_set(const struct sk_buff *skb)
    {
        return skb->mac_header != NULL;
    }
    static inline void skb_reset_mac_header(struct sk_buff *skb)
    {
        skb->mac_header = skb->data;
    }
    static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
    {
        skb->mac_header = skb->data + offset;
    }
    #endif /* NET_SKBUFF_DATA_USES_OFFSET */
    //获得sk_buff结构中TCP头的指针
    static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
    {
        return (struct tcphdr *)skb_transport_header(skb);
    }
    
    //获得TCP头的长度
    static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
    {
        return tcp_hdr(skb)->doff * 4;
    }
    
    //获取tcp option的长度
    static inline unsigned int tcp_optlen(const struct sk_buff *skb)
    {
        return (tcp_hdr(skb)->doff - 5) * 4;
    }
    
    //获得ip头
    static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
    {
        return (struct iphdr *)skb_network_header(skb);
    }
    
    //获得ipip头
    static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
    {
        return (struct iphdr *)skb_transport_header(skb);
    }
    
    //获取802.3MAC头指针
    static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
    {
        return (struct ebt_802_3_hdr *)skb_mac_header(skb);
    }
    
    //获取以太网MAC头指针
    static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
    {
        return (struct ethhdr *)skb_mac_header(skb);
    }

    skb_copy_expand

    struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
                                    int newheadroom,
                                    int newtailroom,
                                    int gfp_mask)
    {
        struct sk_buff *n;
    
        /*
         *        Allocate the copy buffer
         */
    
        n=alloc_skb(newheadroom + skb->len + newtailroom,
                    gfp_mask);
        if(n==NULL)
                return NULL;
    
        skb_reserve(n,newheadroom);
    
        /* Set the tail pointer and length */
        skb_put(n,skb->len);
        /* 2.4的代码 ---- start ---------*/
        /* Copy the data only. */
        if (skb_copy_bits(skb, 0, n->data, skb->len))
                BUG();
        /* 2.4的代码 ---- end ---------*/
        /* 2.6的代码 ---- start ---------*/
        head_copy_len = skb_headroom(skb);
        head_copy_off = 0;
        if (newheadroom <= head_copy_len)
                head_copy_len = newheadroom;
        else
                head_copy_off = newheadroom - head_copy_len;
    
        /* Copy the linear header and data. */
        if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
                          skb->len + head_copy_len))
                BUG();
        /* 2.6的代码 ---- end ---------*/
    
        copy_skb_header(n, skb);
        return n;
    }

    改变skb的数据域

    先判断skb的tailroom,如果空间够大,则我们可以把需要添加的数据放在skb的tailroom里。如果tailroom不够大,则需要调用skb_copy_expand函数来扩充tailroom或者headroom

    if(skb_tailroom(skb) < 16)  
    {  
        nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + 16, GFP_ATOMIC);  
        if(!nskb)  
        {  
            printk("low memory..../n");          
            dev_kfree_skb(skb);  
            return -1;  
        }  
    
        else  
        {  
            kfree_skb(skb);
            skb = nskb;  
        }  
    
        memcpy(skb_put(skb, 16), data, 16); //拷贝数据到skb tail  
    }
  • 相关阅读:
    2017ccpc全国邀请赛(湖南湘潭) E. Partial Sum
    Codeforces Round #412 C. Success Rate (rated, Div. 2, base on VK Cup 2017 Round 3)
    2017 中国大学生程序设计竞赛 女生专场 Building Shops (hdu6024)
    51nod 1084 矩阵取数问题 V2
    Power收集
    红色的幻想乡
    Koishi Loves Segments
    Wood Processing
    整数对
    Room and Moor
  • 原文地址:https://www.cnblogs.com/zhangxuechao/p/11709834.html
Copyright © 2011-2022 走看看