zoukankan      html  css  js  c++  java
  • TCP连接建立系列 — 服务端接收SYN段

    本文主要分析:服务器端接收到SYN包时的处理路径。

    内核版本:3.6

    Author:zhangskd @ csdn blog

    接收入口

    1. 状态为ESTABLISHED时,用tcp_rcv_established()接收处理。

    2. 状态为LISTEN时,说明这个sock处于监听状态,用于被动打开的接收处理,包括SYN和ACK。

    3. 当状态不为ESTABLISHED或TIME_WAIT时,用tcp_rcv_state_process()处理。

    int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
    {
        struct sock *rsk;
    
    #ifdef CONFIG_TCP_MD5SIG
        /* We really want to reject the packet as early as possible if :
         * We're expecting an MD5'd packet and this is no MD5 tcp option.
         * There is an MD5 option and we're not expecting one.
         */
        if (tcp_v4_inbound_md5_hash(sk, skb))
            goto discard;
    #endif
    
        /* 当状态为ESTABLISHED时,用tcp_rcv_established()接收处理 */
        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
            struct dst_entry *dst = sk->sk_rx_dst;
            sock_rps_save_rxhash(sk, skb);
    
            if (dst) {
                if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) {
                    dst_release(dst);
                    sk->sk_rx_dst = NULL;
                }
            }
     
            /* 连接已建立时的处理路径 */
            if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
                rsk = sk;
                goto reset;
            }
            return 0;
        }
    
        /* 检查报文长度、报文校验和 */
        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
            goto csum_err;
    
        /* 如果这个sock处于监听状态,被动打开时的处理,包括收到SYN或ACK */
        if (sk->sk_state == TCP_LISTEN) {
            /* 返回值:
             * NULL,错误
             * nsk == sk,接收到SYN
             * nsk != sk,接收到ACK
             */
            struct sock *nsk = tcp_v4_hnd_req(sk, skb); /* 接收ACK的处理 */
    
            if (! nsk)
                goto discard;
    
            if (nsk != sk) { /* 接收到ACK时 */
                sock_rps_save_rxhash(nsk, skb);
    
                if (tcp_child_process(sk, nsk, skb)) { /* 处理新的sock */
                    rsk = nsk;
                    goto reset;
                }
                return 0;
            }
        } else
            sock_rps_save_rx(sk, skb);
    
        /* 处理除了ESTABLISHED和TIME_WAIT之外的所有状态 */
        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
            rsk = sk;
            goto reset;
        }
        return 0;
    
    reset:
        tcp_v4_send_reset(rsk, skb); /* 发送RST包 */
    
    discard:
        kfree_skb(skb);
        return 0;
    
    csum_err:
        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
        goto discard;
    }
    

    当收到客户端发送的SYN包时,会进入tcp_rcv_state_process()进行处理。

    /*
     * This function implements the receiving procedure of RFC 793 for all states except
     * ESTABLISHED and TIME_WAIT.
     * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be address independent.
     */
    
    int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, 
                  unsigned int len)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        int queued = 0;
    
        tp->rx_opt.saw_tstamp = 0;
    
        switch(sk->sk_state) {
            case TCP_CLOSE:
                goto discard;
    
            case TCP_LISTEN:
                /* 收到SYN会走到这边,而ACK不会。
                 * 所以直接向服务器发送ACK包,会收到RST包(使用SYN Cookie时除外)。
                 */
                if (th->ack) 
                    return 1;
    
                if (th->rst)
                    goto discard;
    
                if (th->syn) {
                    if (th->fin)
                        goto discard;
    
                    /* 对于IPv4,对应的是ipv4_specific,调用tcp_v4_conn_request()处理收到的SYN包 */
                    if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
                        return 1;
    
                    /* Now we have several options: In theory there is nothing else in the frame.
                     * KA9Q has an option to send data with the syn, BSD accepts data with the syn up to
                     * the [to be] advertised window and Solaris 2.1 gives you a protocol error. For now
                     * we just ignore it, that fits the spec precisely and avoids incompatibilities. It would
                     * be nice in future to drop through and process the data.
                     *
                     * Now that TTCP is starting to be used we ought to queue this data.
                     * But, this leaves one open to an easy denial of service attack, and SYN cookies can't
                     * defend against this problem. So, we drop the data in the interest of security over
                     * speed unless it's still in use.
                     */
                     /* 这里讨论了SYN包携带数据的问题 */
    
                     kfree_skb(skb);
                     return 0;
                }
    
                goto discard;
            ...
        }
        ...
    discard:
            __kfree_skb(skb);
        }
        return 0;
    }
    

    处理SYN包

    SYN包的处理是地址族相关的,我们要研究的是IPv4。

    /*
     * Pointers to address related TCP functions
     * (i.e. things that depend on the address family)
     */
    struct inet_connection_sock_af_ops {
        ...
        int (*conn_request) (struct sock *sk, struct sk_buff *skb);
        ...
    };
    
    const struct inet_connection_sock_af_ops ipv4_specific = {
        ...
        .conn_request = tcp_v4_conn_request, /* IPv4 SYN包的处理函数 */
        ...
    };

    服务器端处理接收到的SYN包。

    int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
    {
        struct tcp_extend_values tmp_ext;
        struct tcp_options_received tmp_opt;
        const u8 *hash_location;
        struct request_sock *req;
        struct inet_request_sock *ireq;
        struct tcp_sock *tp = tcp_sk(sk);
        struct dst_entry *dst = NULL;
        __be32 saddr = ip_hdr(skb)->saddr;
        __be32 daddr = ip_hdr(skb)->daddr;
        __u32 isn = TCP_SKB_CB(skb)->when;
        bool want_cookie = false;
    
        /* Never answer to SYNs send to broadcast or multicast.
         * 忽略广播、多播的SYN段。
         */
        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
            goto drop;
    
        /* 如果半连接队列满了
         * when变量在tcp_v4_rcv()中置0。
         */
        if (inet_csk_reqsk_queue_is_full(sk) && ! isn) {
    
           /* 判断是直接丢弃,还是使用SYN Cookie */
            want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
    
            if (! want_cookie)
                goto drop; /* 如果不允许使用SYN Cookie,则直接丢弃 */
        }
    
        /* Accept backlog is full. If we have already queued enough of warm entries in
         * syn queue, drop request. It is better than clogging syn queue with openreqs with
         * exponentially increasing timeout.
         */
        /* 如果全连接队列满了,且有未重传过的半连接,则直接丢弃SYN请求 */
        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
            goto drop;
    
        /* 从缓存块中分配一个request_sock实例,指定此实例的操作函数集为tcp_request_sock_ops */
        req = inet_reqsk_alloc(&tcp_request_sock_ops); 
        if (! req)
            goto drop;
    
    #ifdef CONFIG_TCP_MD5SIG
        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
    #endif
    
        tcp_clear_options(&tmp_opt); /* 清零TCP选项 */
        tmp_opt.mss_clamp = TCP_MSS_DEFAULT; /* 默认的MSS为536 */
        tmp_opt.user_mss = tp->rx_opt.user_mss; /* mss requested by user in ioctl */
        tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); /* 全面解析TCP选项,并保存 */
    
         /* 注意这部分实现的是:TCP Cookie Transaction (TCPCT) 选项。
          * TCPCT选项在2013年3月从内核代码中移除了!
          * 这个选项是在2009年加入的,功能类似于SYN Cookie。
          */
        if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && ! tp->rx_opt.cookie_out_never &&
             (sysctl_tcp_cookie_size > 0 || (tp->cookie_values != NULL &&
               tp->cookie_values->cookie_desired > 0))) {
            u8 *c;
            u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
            int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; /* Cookie长度 */
    
            if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
                goto drop_and_release;
    
            /* Secret recipe starts with IP addresses */
            *mess++ ^= (__force u32) daddr;
            *mess++ ^= (__force u32) saddr;
    
            /* plus variable length Initiator Cookie */
            c = (u8 *) mess;
            while (l-- > 0)
                *c++ ^= *hash_location++;
    
            want_cookie = false; /* not our kind of cookie */
            tmp_ext.cookie_out_never = 0; /* false */
            tmp_ext.cookie_plus = tmp_opt.cookie_plus;
        } else if (! tp->rx_opt.cookie_in_always) {
            /* redundant indications, but ensure initialization. */
            tmp_ext.cookie_out_never = 1; /* true */
            tmp_ext.cookie_plus = 0;
        } else {
            goto drop_and_release;
        }
    
        tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
        /* Code above have already been removed in mainstream. */    
    
        /* 如果启用了SYN Cookie,且连接不使用TIMESTAMP选项 */
        if (want_cookie && ! tmp_opt.saw_tstamp)
            tcp_clear_options(&tmp_opt); /* 清零TCP选项 */
    
        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
    
        /* 初始化连接请求块,保存连接信息 */
        tcp_openreq_init(req, &tmp_opt, skb);
        ireq->loc_addr = daddr; /* 本端IP地址 */
        ireq->rmt_addr = saddr; /* 对端IP地址 */
        ireq->no_srccheck = inet_sk(sk)->transparent;
        ireq->opt = tcp_v4_save_options(sk, skb); /* 保存IP选项 */
    
        if (security_inet_conn_request(sk, skb, req)) /* SELinux相关 */
            goto drop_and_free;
     
        /* 如果没使用SYN Cookie,或者使用了TIMESTAMP选项 */
        if (! want_cookie || tmp_opt.tstamp_ok)
            TCP_ECN_create_request(req, skb); /* 判断连接是否要启用ECN */
    
        if (want_cookie) { /* 如果使用SYN Cookie */
            isn = cookie_v4_init_sequence(sk, skb, &req->mss); /* 计算Cookie的值 */
            req->cookie_ts = tmp_opt.tstamp_ok;
    
        } else if (! isn) {
            struct flowi4 fl4;
    
            /* VJ's idea. We save last timestamp seen from destination in peer table,
             * when entering state TIME-WAIT, and check against it before accepting new
             * connection request.
             * If isn is not zero, this request hit alive timewait bucket, so that all the necessary
             * checks are made in the function processing timewait state.
             */
            /* TIME-WAIT状态检查,要确定是否PAWS */
            if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle &&
                (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && fl4.daddr = saddr) {
                if (! tcp_peer_is_proven(req, dst, true)) {
                    NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
                    goto drop_and_release;
                }
            } else if (! sysctl_tcp_syncookies && 
                 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2))
                    && ! tcp_peer_is_proven(req, dst, false)) {
                /* Without syncookies last quarter of backlog is filled with destinations, proven to be alive.
                 * It means that we continue to communicate to destinations, already remembered to the
                 * moment of synflood.
                 */
                LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u
    "), &saddr,
                              ntohs(tcp_hdr(skb)->source));
            }
    
            isn = tcp_v4_init_sequence(skb); /* 本端的初始序列号 */
        }
    
        tcp_rsk(req)->snt_isn = isn; /* 保存本端的初始序列号 */
        tcp_rsk(req)->snt_synack = tcp_time_stamp; /* 记录SYNACK的发送时间 */
    
        /* 发送SYNACK包,如果使用SYN Cookie则不把这个req链接到半连接队列中 */
        if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext, 
                  skb_get_queue_mapping(skb), want_cookie) || want_cookie)
            goto drop_and_free;
    
        /* 把连接请求块链入半连接队列,设置超时时间,启动定时器 */
        inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
        return 0;
    
    drop_and_release:
        dst_release(dst);
    
    drop_and_free:
        reqsk_free(req);
    
    drop:
        return 0;
    }
    

    队列长度

    判断半连接队列是否满了。

    static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
    {
        return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
    }
    

    半连接队列的最大长度为:2^max_qlen_log。

    static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
    {
        return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
    }

    判断全连接队列是否满了,全连接队列的最大长度为:sk->sk_max_ack_backlog。

    static inline bool sk_acceptq_is_full(const struct sock *sk)
    {
        return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
    }
    

    获取未重传过SYNACK的半连接个数。

    static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
    {
        return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
    }
    
    static inline int reqsk_queue_len_young(const struct requst_sock_queue *queue)
    {
        return queue->listen_opt->qlen_young;
    } 

    初始序列号

    根据源IP、目的IP、源端口、目的端口计算出本端的初始序列号isn。

    static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
    {
        return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, 
                                tcp_hdr(skb)->dest, tcp_hdr(skb)->source);
    }
    
    __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport)
    {
        u32 hash[MD5_DIGEST_WORDS];
    
        hash[0] = (__force u32) saddr;
        hash[1] = (__force u32) daddr;
        hash[2] = ((__force u16) sport << 16) + (__force u16) dport;
        hash[3] = net_secret[15]; /* 获取一个随机数 */
    
        md5_transform(hash, net_secret); /* 计算MD5值 */
     
        return seq_scale(hash[0]);
    }
    #define MD5_DIGEST_WORDS 4
    #define MD5_MESSAGE_BYTES 64
    
    static u32 net_secret[MD5_MESSAGE_BYTES / 4] __cacheline_aligned;
    
    static int __init net_secret_init(void)
    {
        get_random_bytes(net_secret, sizeof(net_secret)); /* 随机获取 */
        return 0;
    }
    
    /*
     * This function is the exported kernel interface.
     * It returns some number of good random numbers, suitable for key generation,
     * seeding TCP sequence numbers, etc. It does not use the hw random number
     * generator, if available; use get_random_bytes_arch() for that.
     */
    void get_random_bytes(void *buf, int bytes) {};
    
    static u32 seq_scale(u32 seq)
    {
        return seq + (ktime_to_ns(ktime_get_real()) >> 6);
    }

    最终使用MD5。

    Message Digest Algorithm 5,消息摘要算法第五版。是一种散列函数,用于提供消息的完整性保护。

    除了MD5外,比较著名的还有SHA1。

    void md5_transform(__u32 *hash, __u32 const *in) {}

  • 相关阅读:
    设置MYSQL允许用IP访问
    EasyUI中那些不容易被发现的坑——EasyUI重复请求2次的问题
    Oracle初级性能优化总结
    Asp.Net MVC3.0网站统计登录认证的在线人数
    App.config和Web.config配置文件的配置节点的解析
    App.config和Web.config配置文件的自定义配置节点
    Asp.Net Web API 2第十八课——Working with Entity Relations in OData
    win7凭据管理、win7多用户远程登录、主机头设置、nuget.org无法访问
    Asp.Net Web API 2第十七课——Creating an OData Endpoint in ASP.NET Web API 2(OData终结点)
    C#基础知识系列八(const和readonly关键字)
  • 原文地址:https://www.cnblogs.com/aiwz/p/6333308.html
Copyright © 2011-2022 走看看