zoukankan      html  css  js  c++  java
  • IP输入 之 ip_rcv && ip_rcv_finish

    ip层收包流程概述:

    (1) 在inet_init中注册了类型为ETH_P_IP协议的数据包的回调ip_rcv

    (2) 当二层数据包接收完毕,会调用netif_receive_skb根据协议进行向上层分发

    (3) 类型为ETH_P_IP类型的数据包,被传递到三层,调用ip_rcv函数

    (4) ip_rcv完成基本的校验和处理工作后,经过PRE_ROUTING钩子点

    (5) 经过PRE_ROUTING钩子点之后,调用ip_rcv_finish完成数据包接收,包括选项处理,路由查询,并且根据路由决定数据包是发往本机还是转发

    以下为源码分析:

    1 static struct packet_type ip_packet_type __read_mostly = {
    2     .type = cpu_to_be16(ETH_P_IP),
    3     .func = ip_rcv,
    4 };
      1 /*
      2  *     Main IP Receive routine.
      3  */
      4 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
      5 {
      6     const struct iphdr *iph;
      7     struct net *net;
      8     u32 len;
      9 
     10     /* When the interface is in promisc. mode, drop all the crap
     11      * that it receives, do not try to analyse it.
     12      */
     13     /* 混杂模式下,非本机包 */
     14     if (skb->pkt_type == PACKET_OTHERHOST)
     15         goto drop;
     16 
     17 
     18     /* 获取net */
     19     net = dev_net(dev);
     20     __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
     21 
     22     /* 检查skb共享 */
     23     skb = skb_share_check(skb, GFP_ATOMIC);
     24     if (!skb) {
     25         __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
     26         goto out;
     27     }
     28 
     29     /* 测试是否可以取得ip头 */
     30     if (!pskb_may_pull(skb, sizeof(struct iphdr)))
     31         goto inhdr_error;
     32 
     33     /* 取ip头 */
     34     iph = ip_hdr(skb);
     35 
     36     /*
     37      *    RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
     38      *
     39      *    Is the datagram acceptable?
     40      *
     41      *    1.    Length at least the size of an ip header
     42      *    2.    Version of 4
     43      *    3.    Checksums correctly. [Speed optimisation for later, skip loopback checksums]
     44      *    4.    Doesn't have a bogus length
     45      */
     46 
     47     /* 头部长度不足20 或者版本不是4 */
     48     if (iph->ihl < 5 || iph->version != 4)
     49         goto inhdr_error;
     50 
     51     BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
     52     BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
     53     BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
     54     __IP_ADD_STATS(net,
     55                IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
     56                max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
     57 
     58     /* 测试实际应取的ip头 */
     59     if (!pskb_may_pull(skb, iph->ihl*4))
     60         goto inhdr_error;
     61 
     62     /* 取ip头 */
     63     iph = ip_hdr(skb);
     64 
     65     /* 校验和错误 */
     66     if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
     67         goto csum_error;
     68 
     69     /* 取总长度 */
     70     len = ntohs(iph->tot_len);
     71 
     72     /* skb长度比ip包总长度小 */
     73     if (skb->len < len) {
     74         __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
     75         goto drop;
     76     } 
     77     /* 比头部长度还小 */
     78     else if (len < (iph->ihl*4))
     79         goto inhdr_error;
     80 
     81     /* Our transport medium may have padded the buffer out. Now we know it
     82      * is IP we can trim to the true length of the frame.
     83      * Note this now means skb->len holds ntohs(iph->tot_len).
     84      */
     85     /* 设置总长度为ip包的长度 */
     86     if (pskb_trim_rcsum(skb, len)) {
     87         __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
     88         goto drop;
     89     }
     90 
     91     /* 取得传输层头部 */
     92     skb->transport_header = skb->network_header + iph->ihl*4;
     93 
     94     /* Remove any debris in the socket control block */
     95     /* 重置cb */
     96     memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
     97 
     98     /* 保存输入设备信息 */
     99     IPCB(skb)->iif = skb->skb_iif;
    100 
    101     /* Must drop socket now because of tproxy. */
    102     skb_orphan(skb);
    103 
    104     /* 经过PRE_ROUTING钩子点 */
    105     return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
    106                net, NULL, skb, dev, NULL,
    107                ip_rcv_finish);
    108 
    109 csum_error:
    110     __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
    111 inhdr_error:
    112     __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
    113 drop:
    114     kfree_skb(skb);
    115 out:
    116     return NET_RX_DROP;
    117 }
      1 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
      2 {
      3     const struct iphdr *iph = ip_hdr(skb);
      4     struct rtable *rt;
      5     struct net_device *dev = skb->dev;
      6     void (*edemux)(struct sk_buff *skb);
      7 
      8     /* if ingress device is enslaved to an L3 master device pass the
      9      * skb to its handler for processing
     10      */
     11     skb = l3mdev_ip_rcv(skb);
     12     if (!skb)
     13         return NET_RX_SUCCESS;
     14 
     15     /* 
     16         启用了early_demux
     17         skb路由缓存为空
     18         skb的sock为空
     19         不是分片包
     20     */
     21     if (net->ipv4.sysctl_ip_early_demux &&
     22         !skb_dst(skb) &&
     23         !skb->sk &&
     24         !ip_is_fragment(iph)) {
     25         const struct net_protocol *ipprot;
     26 
     27         /* 找到上层协议 */
     28         int protocol = iph->protocol;
     29 
     30         /* 获取协议对应的prot */
     31         ipprot = rcu_dereference(inet_protos[protocol]);
     32 
     33         /* 找到early_demux函数,如tcp_v4_early_demux */
     34         if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
     35 
     36             /* 调用该函数,将路由信息缓存到skb->refdst */
     37             edemux(skb);
     38             /* must reload iph, skb->head might have changed */
     39             /* 重新取ip头 */
     40             iph = ip_hdr(skb);
     41         }
     42     }
     43 
     44     /*
     45      *    Initialise the virtual path cache for the packet. It describes
     46      *    how the packet travels inside Linux networking.
     47      */
     48     /* 校验路由失败 */
     49     if (!skb_valid_dst(skb)) {
     50         /* 查路由 */
     51         int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
     52                            iph->tos, dev);
     53         if (unlikely(err)) {
     54             if (err == -EXDEV)
     55                 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
     56             goto drop;
     57         }
     58     }
     59 
     60 #ifdef CONFIG_IP_ROUTE_CLASSID
     61     if (unlikely(skb_dst(skb)->tclassid)) {
     62         struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
     63         u32 idx = skb_dst(skb)->tclassid;
     64         st[idx&0xFF].o_packets++;
     65         st[idx&0xFF].o_bytes += skb->len;
     66         st[(idx>>16)&0xFF].i_packets++;
     67         st[(idx>>16)&0xFF].i_bytes += skb->len;
     68     }
     69 #endif
     70 
     71     /* 处理ip选项 */
     72     if (iph->ihl > 5 && ip_rcv_options(skb))
     73         goto drop;
     74 
     75     /* 找到路由缓存项 */
     76     rt = skb_rtable(skb);
     77     if (rt->rt_type == RTN_MULTICAST) {
     78         __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
     79     } else if (rt->rt_type == RTN_BROADCAST) {
     80         __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
     81     } else if (skb->pkt_type == PACKET_BROADCAST ||
     82            skb->pkt_type == PACKET_MULTICAST) {
     83         struct in_device *in_dev = __in_dev_get_rcu(dev);
     84 
     85         /* RFC 1122 3.3.6:
     86          *
     87          *   When a host sends a datagram to a link-layer broadcast
     88          *   address, the IP destination address MUST be a legal IP
     89          *   broadcast or IP multicast address.
     90          *
     91          *   A host SHOULD silently discard a datagram that is received
     92          *   via a link-layer broadcast (see Section 2.4) but does not
     93          *   specify an IP multicast or broadcast destination address.
     94          *
     95          * This doesn't explicitly say L2 *broadcast*, but broadcast is
     96          * in a way a form of multicast and the most common use case for
     97          * this is 802.11 protecting against cross-station spoofing (the
     98          * so-called "hole-196" attack) so do it for both.
     99          */
    100         if (in_dev &&
    101             IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
    102             goto drop;
    103     }
    104 
    105     /* 调用路由项的input函数,可能为ip_local_deliver或者ip_forward */
    106     return dst_input(skb);
    107 
    108 drop:
    109     kfree_skb(skb);
    110     return NET_RX_DROP;
    111 }
  • 相关阅读:
    AO-XXXX
    最基础的rpm命令
    yum插件
    adjtimex修改tick值用法举例
    [工具]iostat
    chrony配置介绍
    CentOS / RHEL 7 : Chrony V/s NTP (Differences Between ntpd and chronyd)
    NTP测试1
    shell使用eval进行赋值bc计算,bad substitution
    Free中的buffer和cache理解
  • 原文地址:https://www.cnblogs.com/wanpengcoder/p/7577398.html
Copyright © 2011-2022 走看看