zoukankan      html  css  js  c++  java
  • TCP连接建立系列 — 服务端接收ACK段(一)

    本文主要分析:三次握手中最后一个ACK段到达时,服务器端的处理路径。

    内核版本:3.6

    Author:zhangskd @ csdn blog

    函数路径

    以下是第三次握手时,服务端接收到ACK后的处理路径。

    接收入口

    1. 状态为ESTABLISHED时,用tcp_rcv_established()接收处理。

    2. 状态为LISTEN时,说明这个sock处于监听状态,用于被动打开的接收处理,包括SYN和ACK。

    3. 当状态不为ESTABLISHED或TIME_WAIT时,用tcp_rcv_state_process()处理。

    int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
    {
        struct sock *rsk;
    
    #ifdef CONFIG_TCP_MD5SIG
        /* We really want to reject the packet as early as possible if :
         * We're expecting an MD5'd packet and this is no MD5 tcp option.
         * There is an MD5 option and we're not expecting one.
         */
        if (tcp_v4_inbound_md5_hash(sk, skb))
            goto discard;
    #endif
    
        /* 当状态为ESTABLISHED时,用tcp_rcv_established()接收处理 */
        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
            struct dst_entry *dst = sk->sk_rx_dst;
            sock_rps_save_rxhash(sk, skb);
    
            if (dst) {
                if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) {
                    dst_release(dst);
                    sk->sk_rx_dst = NULL;
                }
            }
     
            /* 连接已建立时的处理路径 */
            if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
                rsk = sk;
                goto reset;
            }
            return 0;
        }
    
        /* 检查报文长度、报文校验和 */
        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
            goto csum_err;
    
        /* 如果这个sock处于监听状态,被动打开时的处理,包括收到SYN或ACK */
        if (sk->sk_state == TCP_LISTEN) {
            /* 返回值:
             * NULL,错误
             * nsk == sk,接收到SYN
             * nsk != sk,接收到ACK
             */
            struct sock *nsk = tcp_v4_hnd_req(sk, skb); /* 接收ACK的处理 */
    
            if (! nsk)
                goto discard;
    
            if (nsk != sk) { /* 接收到ACK时 */
                sock_rps_save_rxhash(nsk, skb);
    
                if (tcp_child_process(sk, nsk, skb)) { /* 处理新的sock */
                    rsk = nsk;
                    goto reset;
                }
                return 0;
            }
        } else
            sock_rps_save_rx(sk, skb);
    
        /* 处理除了ESTABLISHED和TIME_WAIT之外的所有状态 */
        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
            rsk = sk;
            goto reset;
        }
        return 0;
    
    reset:
        tcp_v4_send_reset(rsk, skb); /* 发送RST包 */
    
    discard:
        kfree_skb(skb);
        return 0;
    
    csum_err:
        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
        goto discard;
    }
    

    收到SYN段后,服务器端会分配一个连接请求块,并初始化这个连接请求块。

    构造和发送SYNACK段。

    然后把这个连接请求块链入半连接队列中,启动超时定时器。

    之后如果再收到ACK,就能完成三次握手了。

    static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
    {
        struct tcphdr *th = tcp_hdr(skb);
        const struct iphdr *iph = ip_hdr(skb);
        struct sock *nsk;
        struct request_sock **prev;
    
        /* 在半连接队列中查找是否已有符合的连接请求块,如果有,则说明这是三次握手的最后一个ACK。*/
        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr);
        if (req)
            return tcp_check_req(sk, skb, req, prev); /* 服务器端处理三次握手的最后一个ACK */
    
        /* 如果在半连接队列中没找到,则在ESTABLISHED状态的哈希表中查找。*/
        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source,
                  iph->daddr, th->dest, inet_iif(skb));
    
        if (nsk) { /* 如果在ehash表中找到对应的sock,且不处于TIME_WAIT状态 */
            if (nsk->sk_state != TCP_TIME_WAIT) {
                bh_lock_sock(nsk);
                return nsk;
            }
    
            inet_twsk_put(inet_twsk(nsk)); /* 释放tw结构体 */
            return NULL;
        }
    
    #ifdef CONFIG_SYN_COOKIES
        /* 如果使用SYN Cookie,则检查cookie是否合法,合法则直接完成三次握手 */
        if (! th->syn) 
            sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
    #endif
    
        return sk;
    }
    

    在表示半连接队列的哈希表中,寻找符合条件的连接请求块。

    struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp,
                    const __be16 rport, const __be32 raddr, const __be32 laddr)
    {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; /* 半连接队列 */
        struct request_sock *req, **prev;
    
        /* 通过哈希值,找到哈希桶,然后遍历哈希桶寻找符合条件的连接请求块 */
        for(prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, lopt->nr_table_entries)];
            (req = *prev) != NULL; prev = &req->dl_next) {
            const struct inet_request_sock *ireq = inet_rsk(req);
    
            if (ireq->rmt_port == rport && ireq->rmt_addr == raddr && ireq->loc_addr = laddr
                && AF_INET_FAMILY(req->rsk_ops->family)) {
                WARN_ON(req->sk); /* 连接尚未建立,sk应该为NULL */
                *prevp = prev; /* 保存此req指针的指针 */
                break;
            }
        }
    
        return req;
    }
    

    第三次握手

    inet_csk_search_req()在半连接队列中查找是否已有符合的连接请求块,如果有,则说明这可能是三次握手的最后一个ACK。

    接着调用tcp_check_req()来进行验证,如果合法,则完成三次握手。

    /* Process an incoming packet for SYN_RECV sockets represented as a request_sock. */
    struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req,
        struct request_sock **prev)
    {
        struct tcp_options_received tmp_opt;
        const u8 *hash_location;
        struct sock *child;
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_ACK);
        bool paws_reject = false;
    
        tmp_opt.saw_tstamp = 0;
    
        /* 如果此ACK带有选项 */
        if (th->doff > (sizeof(struct tcphdr) >> 2)) {
            tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); /* 解析TCP选项,保存到实例中 */
            if (tmp_opt.saw_tstamp) {
                tmp_opt.ts_recent = req->ts_recent; /* 客户端发送SYN段的时间 */
    
                /* We do not store true stamp, but it is not required,
                 * it can be estimated (approximately) from another data.
                 */
                tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ) << req->retrans);
                paws_reject = tcp_paws_reject(&tmp_opt, th->rst); /* 检查客户端时间戳是否回绕 */
            }
        }
    
        /* Check for pure retransmitted SYN. 处理重传的SYN */
        if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN
            && ! paws_reject) {
            /* 重新发送SYNACK。
             * 实例为tcp_request_sock_ops,调用tcp_v4_rtx_synack()
             */
            req->rsk_ops->rtx_syn_ack(sk, req, NULL);
            return NULL;
        }
     
    
        /* 如果接收段包含ACK标志,但确认序号不对,则返回监听sock。
         * 然后在tcp_v4_do_rcv()中发送RST段。
         */
        if ((flg & TCP_FLAG_ACK) && (TCP_SKB_CB(skb)->ack_seq != 
             tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
            return sk;
    
        /* 如果发生了回绕,或者接收序号不在接收窗口内 */
        if (paws_reject || ! tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
              tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
            /* Out of window: send ACK and drop. */
            if (! (flg & TCP_FLAG_RST))
                /* 发送ACK段。
                 * 实例为tcp_request_sock_ops,调用tcp_v4_reqsk_send_ack()
                 */
                req->rsk_ops->send_ack(sk, skb, req);
    
            if (paws_reject)
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
    
            return NULL;
        }
    
        /* In sequence, PAWS is ok. */
        if (tmp_opt.saw_tstamp && ! after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
            req->ts_recent = tmp_opt.rcv_tsval; /* 保存ACK段的时间戳 */
    
        if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
            /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1 */
            flg &= ~TCP_FLAG_SYN;
        }
    
        if (flg & (TCP_FLAG_RST | TCP_FLAG_SYN)) {
            TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
            goto embryonic_reset;
        }
    
        /* ACK sequence verified above, just make sure ACK is set.
         * If ACK not set, just silently drop the packet.
         */
        if (! (flg & TCP_FLAG_ACK))
            return NULL;
    
        /* 如果设置了TCP_DEFER_ACCEPT选项,则不接收纯ACK,等待有负荷的数据包到达后,
         * 再建立连接。直接丢弃纯ACK。
         */
        if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
            TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
            inet_rsk(req)->acked = 1;
            NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
            return NULL;
        }
    
        if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
            tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
        else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
            tcp_rsk(req)->snt_synack = 0;
    
        /* OK, ACK is valid, create big socket and feed this segment to it.
         * This segment must move socket to established state. If it will be dropped
         * after socket is created, wait for troubles.
         */
        /*  三次握手完成以后,调用tcp_v4_syn_recv_sock()创建和初始化一个新的传输控制块 */
        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
    
        if (child == NULL)
            goto listen_overflow;
    
        inet_csk_reqsk_queue_unlink(sk, req, prev); /* 把连接请求块从半连接队列中删除 */
        inet_csk_reqsk_queue_removed(sk, req); /* 更新半连接队列的长度,如果为0,则删除定时器 */
    
        /* 把完成三次握手的连接请求块,和新的sock关联起来,并把它移入全连接队列中 */
        inet_csk_reqsk_queue_add(sk, req, child);
    
        return child;
    
    listen_overflow:
        /* tcp_abort_on_overflow表示全连接队列满了,是给客户端发RST段,还是默默丢弃 */
        if (! sysctl_tcp_abort_on_overflow) {
            inet_rsk(req)->acked = 1;
            return NULL;
        }
    
    embryonic_reset:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
        if (! (flg & TCP_FLAG_RST))
            /* 实例为tcp_request_sock_ops,调用tcp_v4_send_reset()。*/
            req->rsk_ops->send_reset(sk, skb);
    
        /* 把连接请求块从半连接队列中删除,更新半连接队列 */
        inet_csk_reqsk_queue_drop(sk, req, prev);
    
        return NULL;
    } 

    是否发生了回绕。

    static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, int rst)
    {
        if (tcp_paws_check(rx_opt, 0))
            return false;
    
        /* ACK段包含RST标志 */
        if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
            return false;
    
        return true;
    }
    

    检查客户端的时间戳是否合法。

    要求客户端发送SYN的时间戳 <= 客户端重传SYN的时间戳 、客户端发送ACK的时间戳。

    static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, int paws_win)
    {
        if ((s32) (rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
            return true;
    
        /* 重传时间超过24天?*/
        if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
            return true;
    
        /* Some OSes send SYN and SYNACK messages with tsval = 0 tsecr = 0,
         * then following tcp messages have valid values. Ignore 0 value, or else 'negative'
         * tsval might forbid us to accept their packets.
         */
        if (! rx_opt->ts_recent)
            return true;
    
        return false;
    } 

    检查序号是否合法。

    /* @seq:接收段的序号。
     * @end_seq:接收段的结束序号。
     * @s_win:接收窗口的起始序号。
     * @e_win:接收窗口的结束序号。
     */
    static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
    {
        if (seq == s_win)
            return true;
    
        if (after(end_seq, s_win) && before(seq, e_win))
            return true;
    
        return seq == e_win && seq == end_seq;
    }
    

    连接请求块操作

    request_sock_ops为处理连接请求块的函数指针表,对于TCP,它的实例为tcp_request_sock_ops。

    struct request_sock_ops tcp_request_sock_ops __read_mostly = {
        .family = PF_INET,
        .obj_size = sizeof(struct tcp_request_sock),
        .rtx_syn_ack = tcp_v4_rtx_synack, /* 重传SYNACK段 */
        .send_ack = tcp_v4_reqsk_send_ack, /* 发送ACK段 */
        .destructor = tcp_v4_reqsk_destructor,
        .send_reset = tcp_v4_send_reset, /* 发送RST段 */
        .syn_ack_timeout = tcp_syn_ack_timeout, /* SYNACK段超时处理 */
    };
    

    (1) 重传SYNACK段

    static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp)
    {
        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
        return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
    }

    我们在上一篇中已分析过tcp_v4_send_synack(),它主要用于构造和发送SYNACK段。

    (2) 发送ACK段 

    在tcp_check_req()中,如果接收到的ACK段时间戳不合法、或者序号不在接收窗口内,且不含RST标志,

    则需要给客户端发送一个ACK。

    static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req)
    {
        tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
            req->rcv_wnd, req->ts_recent, 0, 
            tcp_md5_do_lookup(sk, (union tcp_md5_addr *) &ip_hdr(skb)->daddr, AF_INET),
            inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos);
    }
    
    static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif,
        struct tcp_md5sig_key *key, int reply_flags, u8 tos)
    {
        const struct tcphdr *th = tcp_hdr(skb);
        struct {
            struct tcphdr th;
            __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
    #ifdef CONFIG_TCP_MD5SIG
                + (TCPOLEN_MD5SIG_ALIGNED >> 2)
    #endif
                ];
        } rep;
    
        struct ip_reply_arg arg;
        struct net *net = dev_net(skb_dst(skb)->dev);
        memset(&rep.th, 0, sizeof(struct tcphdr));
        memset(&arg, 0, sizeof(arg));
    
        arg.iov[0].iov_base = (unsigned char *) &rep;
        arg.iov[0].iov_len = sizeof(rep.th);
    
        if (ts) { /* 时间戳 */
            rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) |
                               TCPOLEN_TIMESTAMP);
            rep.opt[1] = htonl(tcp_time_stamp);
            rep.opt[2] = htonl(ts);
            arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
        }
    
        /* Swap the send and the receive. */
        rep.th.dest = th->source;
        rep.th.source = th->dest;
        rep.th.doff = arg.iov[0].iov_len / 4;
        rep.th.seq = htonl(seq);
        rep.th.ack_seq = htonl(ack);
        rep.th.ack = 1;
        rep.th.window = htons(win);
    
    #ifdef CONFIG_TCP_MD5SIG
        if (key) {
            int offset = (ts) ? 3 : 0;
            rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
                                    (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
            arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
            rep.th.doff = arg.iov[0].iov_len / 4;
    
            tcp_v4_md5_hash_addr((__u8 *) &rep.opt[offset], key, ip_hdr(skb)->saddr, 
                ip_hdr(skb)->daddr, &rep.th);
        }
    #endif
    
        arg.flags = reply_flags;
        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
                        arg.iov[0].iov_len, IPPROTO_TCP, 0); /* 累加伪首部 */
        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
        if (oif)
            arg.bound_dev_if = oif;
        arg.tos = tos;
    
        /* 调用IP层函数,发送此ACK段 */
        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
    }
    struct ip_reply_arg {
       struct kvec iov[1];
       int flags;
       __wsum csum;
       int csumoffset; /* u16 offset of csum in iov[0].iov_base */
       int bound_dev_if;
       u8 tos;
    };
    
    struct kvec {
       void *iov_base;
       size_t iov_len;
    
    };
    

    (3) 发送RST段

    检测到对端异常时,发送RST段。

    static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
    {
        const struct tcphdr *th = tcp_hdr(skb);
        struct {
            struct tcphdr th;
    #ifdef CONFIG_TCP_MD5SIG
            __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2) ];
    #endif
        } rep;
        struct ip_reply_arg arg; /* 数据报的控制信息 */
    #ifdef CONFIG_TCP_MD5SIG
        struct tcp_md5sig_key *key;
        const __u8 *hash_location = NULL;
        unsigned char newhash[16];
        int genhash;
        struct sock *sk1 = NULL;
    #endif
        struct net *net;
    
        /* Never send a reset in response to a reset. */
        if (th->rst)
            return;
    
        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
            return;
    
        /* Swap the send and the receive. */
        memset(&rep, 0, sizeof(rep));
        rep.th.dest = th->source;
        rep.th.source = th->dest;
        rep.th.doff = sizeof(struct tcphdr) / 4;
        rep.th.rst = 1;
    
        if (th->ack) {
            rep.th.seq = th->ack_seq;
        } else {
            rep.th.ack = 1;
            rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2));
        }
    
        memset(&arg, 0, sizeof(arg));
        arg.iov[0].iov_base = (unsigned char *) &rep;
        arg.iov[0].iov_len = sizeof(rep.th);
    
    #ifdef CONFIG_TCP_MD5SIG
        /* 此处省略MD5选项的处理 */
        ...
    #endif
    
        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
        /* When socket is gone, all binding information is lost.
         * routing might fail in this case. No choice here, if we choose to force input interface,
         * we will misroute in case of asymmetric route.
         */
        if (sk)
            arg.bound_dev_if = sk->sk_bound_dev_if;
        net = dev_net(skb_dst(skb)->dev);
        arg.tos = ip_hdr(skb)->tos;
    
        /* 调用IP层函数发送 */
        ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
    
        TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
        TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
    
    #ifdef CONFIG_TCP_MD5SIG
        /* 省略MD5处理 */
        ...
    #endif 
    }

    (4) 析构函数

    释放request_sock实例前调用。

    /* IPv4 request_sock destructor. */
    static void tcp_v4_reqsk_destructor(struct request_sock *req)
    {
        kfree(inet_rsk(req)->opt); /* 释放IP选项实例 */
    }
    

    (5) 超时处理函数

    不是真正的SYNACK超时处理函数,简单更新下统计变量。

    void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
    {
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
    }
    
  • 相关阅读:
    JBoss+Ant实现EJB无状态会话bean实例
    Nginx简单介绍
    SVN版本号管理工具使用中常见的代码提交冲突问题的解决方法
    深入分析Java中的I/O类的特征及适用场合
    ZOJ 3689 Digging(贪心+dp)
    uva 10641 (来当雷锋的这回....)
    Java编程中“为了性能”尽量要做到的一些地方
    wikioi 1306 机智Trie树
    PE文件结构(三) 输入表
    初始化的数值(int、double等)(一)
  • 原文地址:https://www.cnblogs.com/aiwz/p/6333304.html
Copyright © 2011-2022 走看看