zoukankan      html  css  js  c++  java
  • tcp 客户端 synack的接收 以及 相互connect

    接收入口

    tcp_v4_rcv

        |--> tcp_v4_do_rcv

                   |-> tcp_rcv_state_process

                             |-> tcp_rcv_synsent_state_process

    1. 状态为ESTABLISHED时,用tcp_rcv_established()接收处理。
    2. 状态为LISTEN时,说明这个sock处于监听状态,用于被动打开的接收处理,包括SYN和ACK。
    3. 当状态不为ESTABLISHED或TIME_WAIT时,用tcp_rcv_state_process()处理。

    /* The socket must have it's spinlock held when we get
     * here.
     *
     * We have a potential double-lock case here, so even when
     * doing backlog processing we use the BH locking scheme.
     * This is because we cannot sleep with the original spinlock
     * held.
     *//*
     * TCP传输层接收到段之后,经过了简单的
     * 校验,并确定接收处理该段的传输控制
     * 块之后,除非处于FIN_WAIT_2或TIME_WAIT状态,
     * 否则都会调用tcp_v4_do_rcv()作具体的处理
     */
    int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
    {
        struct sock *rsk;
    #ifdef CONFIG_TCP_MD5SIG
        /*
         * We really want to reject the packet as early as possible
         * if:
         *  o We're expecting an MD5'd packet and this is no MD5 tcp option
         *  o There is an MD5 option and we're not expecting one
         */
        if (tcp_v4_inbound_md5_hash(sk, skb))
            goto discard;
    #endif
    
        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 
            sock_rps_save_rxhash(sk, skb->rxhash);
            TCP_CHECK_TIMER(sk);
            if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
                rsk = sk;
                goto reset;
            }
            TCP_CHECK_TIMER(sk);
            return 0;
        }
    
        if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
            goto csum_err;
    
        if (sk->sk_state == TCP_LISTEN) { //说明收到的是三次握手第一步SYN或者第三步ACK,这里是服务器端的情况
            struct sock *nsk = tcp_v4_hnd_req(sk, skb);
            if (!nsk)
                goto discard;
    
            if (nsk != sk) {//如果是第一次握手的SYN,这里的nsk应该是'父'sk, 如果这里是三次握手的第三步ACK,则这里的nsk是‘子'sk
                if (tcp_child_process(sk, nsk, skb)) { //这里面还是会调用tcp_rcv_state_process
                    rsk = nsk;
                    goto reset;
                }
                return 0; //如果是握手的第三步,这里直接退出
            } //如果是三次握手中的第一步SYN,则继续后面的操作
        } else
            sock_rps_save_rxhash(sk, skb->rxhash);
    
        //走到这里说明只能是客户端收到SYN+ACK,或者是服务器端收到SYN
        TCP_CHECK_TIMER(sk);
        if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
            rsk = sk;
            goto reset;
        }
        TCP_CHECK_TIMER(sk);
        return 0;
    
    reset:
        tcp_v4_send_reset(rsk, skb);
    discard:
        kfree_skb(skb);
        /* Be careful here. If this function gets more complicated and
         * gcc suffers from register pressure on the x86, sk (in %ebx)
         * might be destroyed here. This current version compiles correctly,
         * but you have been warned.
         */
        return 0;
    
    csum_err:
        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
        goto discard;
    }

    当客户端connect()之后,sock进入TCP_SYN_SENT状态,并插入到ehash中, 如果是阻塞socket则connect()等待握手完成
    本文考虑收到服务端synack的过程,也就是客户端握手的第二阶段;

    发送SYN段后,连接的状态变为SYN_SENT。此时如果收到SYNACK段,处理函数为tcp_rcv_state_process()。

    对于协议栈的接收路径,

    • tcp_v4_rcv
      • ->__inet_lookup_skb() //在ehash中找到TCP_SYN_SENT状态的sk
      • ->!sock_owned_by_user() //connect()即使阻塞也不占有锁
        • ->!tcp_prepare() //对于synack,不会排入prepare队列
        • ->tcp_v4_do_rcv()
          • ->tcp_rcv_state_process() //进入TCP_SYN_SENT状态处理逻辑
            • -> tcp_rcv_synsent_state_process

    整体代码先折叠

    int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcphdr *th = tcp_hdr(skb);
        struct request_sock *req;
        int queued = 0;
        bool acceptable;
    
        switch (sk->sk_state) {
        case TCP_CLOSE:
            goto discard;
    
        case TCP_LISTEN:
            //服务器端收到SYN
            /*
             * 在半连接的LISTEN状态下,只处理SYN段。如果是
             * ACK段,此时连接尚未开始建立,因此返回1。在调用
             * tcp_rcv_state_process()函数中会给对方发送RST段;
             * 如果接收的是RST段,则丢弃
             */
            if (th->ack)
                return 1;
    
            if (th->rst)
                goto discard;
    
            if (th->syn) {
                if (th->fin)
                    goto discard;
                /*
                 * 处理SYN段,主要由conn_request接口(TCP中为tcp_v4_conn_request)处理,
                 * icsk_af_ops成员在创建套接字时被初始化,参见tcp_v4_init_sock()
                 */
                 /*收到三次握手的第一步SYN,
                    则在tcp_v4_conn_request中创建连接请求控制块request_sock
                    */
                if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)//ipv4_specific--->tcp_v4_conn_request
                    return 1;
    
                consume_skb(skb);
                return 0;
            }
            goto discard;
    
        case TCP_SYN_SENT://客户端收到SYN+ACK
        /*
    对于TCP_SYN_SENT状态的sock,会调用tcp_rcv_synsent_state_process来进行处理
    解析tcp选项,获取服务端的支持情况, 比如sack, TFO, wscale, MSS, timestamp等
    如果有ack, 进行tcp_ack, 这时候可能fastopen确认了之前的数据
    调用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED
    如果包含fastopen cookie则保存
    判断是否需要立即ack还是延时ack
    如果包里没有ack,只有syn,则表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 并发送synack 
        */
            tp->rx_opt.saw_tstamp = 0;
            queued = tcp_rcv_synsent_state_process(sk, skb, th);
            if (queued >= 0)
                return queued;
    
            /* Do step6 onward by hand. */
            tcp_urg(sk, skb, th);
            __kfree_skb(skb);
            tcp_data_snd_check(sk);
            return 0;
        }
    
        tp->rx_opt.saw_tstamp = 0;
        req = tp->fastopen_rsk;
        if (req) {
            WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
                sk->sk_state != TCP_FIN_WAIT1);
    
            if (!tcp_check_req(sk, skb, req, true))
                goto discard;
        }
    
        if (!th->ack && !th->rst && !th->syn)
            goto discard;
    
        if (!tcp_validate_incoming(sk, skb, th, 0))
            return 0;
    /*
             * 处理TCP段ACK标志,tcp_ack()返回非零值表示处理
             * ACK段成功,是正常的第三次握手TCP段
             */
        /* step 5: check the ACK field */
        acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
                          FLAG_UPDATE_TS_RECENT) > 0;
    /*
    tcp_rcv_state_process函数中对于ack的处理步骤中,假如连接处于FIN_WAIT_1,
    且数据均已经被确认完,则进入TIME_WAIT_2状态;如果无需在该状态等待(linger2<0),
    或者收到了乱序数据段,则直接关闭连接;如果需要等待,
    则需要判断等待时间与TIMEWAIT时间的大小关系,若>TIMEWAIT_LEN,
    则添加TIME_WAIT_2定时器,否则直接进入TIME_WAIT接管(其子状态仍然是FIN_WAIT_2),
    接管之后会添加TIME_WAIT定时器;
    */
        switch (sk->sk_state) {
        case TCP_SYN_RECV:////握手完成时的新建连接的初始状态
            if (!acceptable)
                return 1;
    
            if (!tp->srtt_us)
                tcp_synack_rtt_meas(sk, req);
    /*/这里是由tcp_v4_do_rcv里面的tcp_child_process走到这里,
    在tcp_child_process前会通过tcp_check_req创建一个新的struct sock
             Once we leave TCP_SYN_RECV, we no longer need req
             * so release it.
             */
            if (req) {
                tp->total_retrans = req->num_retrans;
                reqsk_fastopen_remove(sk, req, false);    //回收fastopen req
            } else {
                /* Make sure socket is routed, for correct metrics. */
                icsk->icsk_af_ops->rebuild_header(sk);
                tcp_init_congestion_control(sk);
    
                tcp_mtup_init(sk);
                tp->copied_seq = tp->rcv_nxt;
                tcp_init_buffer_space(sk);
            }
            smp_mb();
            tcp_set_state(sk, TCP_ESTABLISHED);// TCP_SYN_RECV->TCP_ESTABLISHED
            sk->sk_state_change(sk);//sock_def_wakeup, 唤醒epoll
    /*
    sock_init_data中 有
    sk->sk_state_change    =    sock_def_wakeup;
    sk->sk_data_ready    =    sock_def_readable;
    sk->sk_write_space    =    sock_def_write_space;
    sk->sk_error_report    =    sock_def_error_report;
    sk->sk_destruct        =    sock_def_destruct;
    */
    //epoll然后调用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll
     /*
                     * 设置"子"传输控制块为ESTABLISHED状态
                     */
            /* Note, that this wakeup is only for marginal crossed SYN case.
             * Passively open sockets are not waked up, because
             * sk->sk_sleep == NULL and sk->sk_socket == NULL.
             */
             /*
                     * 发信号给那些将通过该套接字发送数据的进程,
                     * 通知他们套接字目前已经可以发送数据了
         sk_state_change()->sock_def_wakeup()->ep_poll_callback(), 添加到epoll的ready list中,并唤醒阻塞中的epoll。
    epoll然后调用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll
                     */
                     
            if (sk->sk_socket)
                sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
         /*
                     * 初始化传输控制块各字段,如果存在时间戳选项,
                     * 同时平滑RTT为零,则需计算重传超时时间等
                     */
            tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
            tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
            tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
    
            if (tp->rx_opt.tstamp_ok)
                tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
    
            if (req) {
                /* Re-arm the timer because data may have been sent out.
                 * This is similar to the regular data transmission case
                 * when new data has just been ack'ed.
                 *
                 * (TFO) - we could try to be more aggressive and
                 * retransmitting any data sooner based on when they
                 * are sent out.
                 */
                tcp_rearm_rto(sk);
            } else
                tcp_init_metrics(sk);
    /*
                     * 为该套接字建立路由,初始化拥塞控制模块
                     */
                      /*
                     * 初始化与路径MTU有关的成员
                     */
            tcp_update_pacing_rate(sk);
    /*
                     * 更新最近一次发送数据包的时间
                     */
            /* Prevent spurious tcp_cwnd_restart() on first data packet */
            tp->lsndtime = tcp_time_stamp;
    
            tcp_initialize_rcv_mss(sk);
            /*
                     * 计算有关TCP首部预测的标志
                     */
            tcp_fast_path_on(tp);
            break;
    
        case TCP_FIN_WAIT1: {
            struct dst_entry *dst;
            int tmo;
    
            /* If we enter the TCP_FIN_WAIT1 state and we are a
             * Fast Open socket and this is the first acceptable
             * ACK we have received, this would have acknowledged
             * our SYNACK so stop the SYNACK timer.
             */
            if (req) {
                /* Return RST if ack_seq is invalid.
                 * Note that RFC793 only says to generate a
                 * DUPACK for it but for TCP Fast Open it seems
                 * better to treat this case like TCP_SYN_RECV
                 * above.
                 */
                if (!acceptable)
                    return 1;
                /* We no longer need the request sock. */
                reqsk_fastopen_remove(sk, req, false);
                tcp_rearm_rto(sk);
            }        /* 发送数据未确认完毕 */
            if (tp->snd_una != tp->write_seq)
                break;
    
            tcp_set_state(sk, TCP_FIN_WAIT2); /* 进入FIN_WAIT_2状态 */
            sk->sk_shutdown |= SEND_SHUTDOWN;/* 关闭发送端 */
    
            dst = __sk_dst_get(sk);
            if (dst)/* 路由缓存确认 */
                dst_confirm(dst);
    
            if (!sock_flag(sk, SOCK_DEAD)) {
                /* Wake up lingering close() */
                sk->sk_state_change(sk); /* 套接口不是DEAD状态,状态发生变化,唤醒等待进程 */
                break;
            }
     /* linger2<0,无需在FIN_WAIT_2等待 */
            if (tp->linger2 < 0 || /* 收到期望序号以后的数据段(data, fin) */
                (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
                 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
                tcp_done(sk);/* 关闭连接 */
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
                return 1;
            }
    
            tmo = tcp_fin_time(sk); /* 获取FIN_WAIT_2等待时间 */
            if (tmo > TCP_TIMEWAIT_LEN) {  /* > TIMEWAIT_LEN,加入FIN_WAIT_2定时器 */
                inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
            } else if (th->fin || sock_owned_by_user(sk)) {
                /* Bad case. We could lose such FIN otherwise.
                 * It is not a big problem, but it looks confusing
                 * and not so rare event. We still can lose it now,
                 * if it spins in bh_lock_sock(), but it is really
                 * marginal case.
                 */ /* 有fin?? 或者 被用户进程锁定,加入FIN_WAIT_2定时器 */
                inet_csk_reset_keepalive_timer(sk, tmo);
            } else { /* 正常等待时间< TIMEWAIT_LEN,进入TIMEWAIT接管状态 */
                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                goto discard;
            }
            break;
        }
    
        case TCP_CLOSING:
            if (tp->snd_una == tp->write_seq) {
                tcp_time_wait(sk, TCP_TIME_WAIT, 0);
                goto discard;
            }
            break;
    
        case TCP_LAST_ACK:
            if (tp->snd_una == tp->write_seq) {
                tcp_update_metrics(sk);
                tcp_done(sk);
                goto discard;
            }
            break;
        }
    
        /* step 6: check the URG bit */
        tcp_urg(sk, skb, th);
    /*
    FIN_WAIT_2状态的走向有以下几个流程触发点,
    (1)TIME_WAIT_2定时器未超时时间内,收到数据段触发; 
    (2)TIME_WAIT_2定时器超时触发; 
    (3)TIME_WAIT定时器未超时时间内,收到数据段触发;
    (4)TIME_WAIT定时器超时触发;
    */
        /* step 7: process the segment text */
        switch (sk->sk_state) {
        case TCP_CLOSE_WAIT:
        case TCP_CLOSING:
        case TCP_LAST_ACK:
            if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                break;
        case TCP_FIN_WAIT1:
        case TCP_FIN_WAIT2://TIME_WAIT_2定时器未超时时间内,收到数据段触发,如果设置FIN标记,则直接进入TIME_WAIT状态;
            /* RFC 793 says to queue data in these states,
             * RFC 1122 says we MUST send a reset.
             * BSD 4.4 also does reset.
             */
            if (sk->sk_shutdown & RCV_SHUTDOWN) {
                if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
                    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
                    NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
                    tcp_reset(sk);
                    return 1;
                }
            }
            /* Fall through */
        case TCP_ESTABLISHED:
            tcp_data_queue(sk, skb); //如果带数据部分则处理,比如客户端设置了deferaccept的时候
            queued = 1;
            break;
        }
    
        /* tcp_data could move socket to TIME-WAIT */
        if (sk->sk_state != TCP_CLOSE) {
            tcp_data_snd_check(sk);//给数据一个发送机会,tcp_push_pending_frame
            tcp_ack_snd_check(sk);//检查是否有ack被推迟,判断是否需要立即发送
        }
    
        if (!queued) {
    discard:
            tcp_drop(sk, skb);
        }
        return 0;
    }
    EXPORT_SYMBOL(tcp_rcv_state_process);
    View Code

    参考:https://blog.csdn.net/zhangskd/article/details/47380761

    http://www.cnhalo.net/2016/06/13/linux-tcp-synack-rcv/

    tcp_rcv_synsent_state_process()用于SYN_SENT状态的处理,具体又分两种场景。
    (1) 接收到SYNACK
    一般情况下会收到服务端的SYNACK,处理如下:
    检查ack_seq是否合法。如果使用了时间戳选项,检查回显的时间戳是否合法。检查TCP的标志位是否合法。如果SYNACK是合法的,更新sock的各种信息。 

      把连接的状态设置为TCP_ESTABLISHED,唤醒调用connect()的进程。判断是马上发送ACK,还是延迟发送。

    (2) 接收到SYN
    本端之前发送出一个SYN,现在又接收到了一个SYN,双方同时向对端发起建立连接的请求。
    处理如下:把连接状态置为SYN_RECV。更新sock的各种信息。构造和发送SYNACK。接者对端也会回应SYNACK,之后的处理流程和服务器端接收ACK类似

    当tcp_rcv_synsent_state_process()的返回值大于0时,会导致上层调用函数发送一个被动的RST。 

    Q:那么什么情况下此函数的返回值会大于0?

    A:收到一个ACK段,但ack_seq的序号不正确,或者回显的时间戳不正确。

    分析:tcp_rcv_synsent_state_process

    对于TCP_SYN_SENT状态的sock,会调用tcp_rcv_synsent_state_process来进行处理

    • 解析tcp选项,获取服务端的支持情况, 比如sack, TFO, wscale, MSS, timestamp等
    • 如果有ack, 进行tcp_ack, 这时候可能fastopen确认了之前的数据
    • 调用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED
    • 如果包含fastopen cookie则保存
    • 判断是否需要立即ack还是延时ack
    • 如果包里没有ack,只有syn,则表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 并发送synack
    static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         const struct tcphdr *th)
    {
        struct inet_connection_sock *icsk = inet_csk(sk);    //客户端sk
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_fastopen_cookie foc = { .len = -1 };
        int saved_clamp = tp->rx_opt.mss_clamp;
        tcp_parse_options(skb, &tp->rx_opt, 0, &foc);    //解析tcp选项,可能带fastopen cookie
        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
            tp->rx_opt.rcv_tsecr -= tp->tsoffset; //在repair模式下的时间修正
        if (th->ack) {
            /* rfc793:
             * "If the state is SYN-SENT then
             *    first check the ACK bit
             *      If the ACK bit is set
             *      If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
             *        a reset (unless the RST bit is set, if so drop
             *        the segment and return)"
             */
            if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||    //初始化的时候snd_una设置为syn序号,返回的ack为syn+1, 或者fastopen的时候更大
                after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))    //ack的是还没有发送的数据
                goto reset_and_undo;
            if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
                !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,    //retrans_stamp会在发送syn的时候记录,接收包需要在时间范围内
                     tcp_time_stamp)) {
                NET_INC_STATS(sock_net(sk),
                        LINUX_MIB_PAWSACTIVEREJECTED);
                goto reset_and_undo;
            }
            /* Now ACK is acceptable.
             *
             * "If the RST bit is set
             *    If the ACK was acceptable then signal the user "error:
             *    connection reset", drop the segment, enter CLOSED state,
             *    delete TCB, and return."
             */
            if (th->rst) {
                tcp_reset(sk);    //进入TCP_CLOSE状态
                goto discard;    //丢弃包
            }
            /* rfc793:
             *   "fifth, if neither of the SYN or RST bits is set then
             *    drop the segment and return."
             *
             *    See note below!
             *                                        --ANK(990513)
             */
            if (!th->syn)    //如果rst和syn都没被设置,则丢弃并返回
                goto discard_and_undo;
            /* rfc793:
             *   "If the SYN bit is on ...
             *    are acceptable then ...
             *    (our SYN has been ACKed), change the connection
             *    state to ESTABLISHED..."
             */
            tcp_ecn_rcv_synack(tp, th);
            tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
            tcp_ack(sk, skb, FLAG_SLOWPATH);    //ack确认,有可能fastopen的数据被确认了
            /* Ok.. it's good. Set up sequence numbers and
             * move to established.
             */
            tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
            tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
            /* RFC1323: The window in SYN & SYN/ACK segments is
             * never scaled.
             */
            tp->snd_wnd = ntohs(th->window);    //更新收到的窗口通告
            if (!tp->rx_opt.wscale_ok) {    // 如果对方不支持wsacle
                tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
                tp->window_clamp = min(tp->window_clamp, 65535U);    //本机发送给对方的最大窗口也不能带wscale的大小
            }
            if (tp->rx_opt.saw_tstamp) { /* 有时间戳选项 */
    /* 在syn中有时间戳选项 */
    tp
    ->rx_opt.tstamp_ok = 1; tp->tcp_header_len = /* tcp首部需要增加时间戳长度 */ sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* mss需要减去时间戳长度 */ tcp_store_ts_recent(tp);/* 设置回显时间戳 */ } else { tp->tcp_header_len = sizeof(struct tcphdr); } if (tcp_is_sack(tp) && sysctl_tcp_fack) //服务端支持sack,并且系统支持fack,则开启fack tcp_enable_fack(tp); tcp_mtup_init(sk); //此时收到对方的tcp MSS选项,可以初始化mtu探测区间 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); //使用pmtu更新探测区间和mss tcp_initialize_rcv_mss(sk); //更新对对方mss的猜测,不会超过TCP_MSS_DEFAULT=536 /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ /* 记录用户空间待读取的序号 */ tp->copied_seq = tp->rcv_nxt; smp_mb();
    //tcp_finish_connect主要是客户端进入连接完成状态(TCP_ESTABLISHED),可以发送数据了/* 连接建立完成的状态改变和相关初始化 */ tcp_finish_connect(sk, skb);
    // TCP_SYN_SENT->TCP_ESTABLISHED/* 连接建立完成的状态改变和相关初始化 */ if ((tp->syn_fastopen || tp->syn_data) && tcp_rcv_fastopen_synack(sk, skb, &foc)) //fastopen处理,保存cookie return -1; //有部分数据未确认,重传了 if (sk->sk_write_pending || //还有数据等待写 icsk->icsk_accept_queue.rskq_defer_accept || //client设置了TCP_DEFER_ACCEPT, 先不ack,等待有数据发送的时候 icsk->icsk_ack.pingpong) { //pingpong模式,没有开启快速ack //延时ack,等待数据一起发送 /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * * It may be deleted, but with this feature tcpdumps * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); //标记有ack被推迟 icsk->icsk_ack.lrcvtime = tcp_time_stamp; //记录时间 tcp_enter_quickack_mode(sk); // 进入快速ack模式,加速慢启动 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, //重置延时ack定时器 TCP_DELACK_MAX, TCP_RTO_MAX); discard: tcp_drop(sk, skb); return 0; } else { tcp_send_ack(sk); //不需要等待,立即发送ack } return -1; } /* No ACK in the segment */ //没有ack,但是待rst, 忽略这个包 if (th->rst) { /* rfc793: * "If the RST bit is set * * Otherwise (no ACK) drop the segment and return." */ goto discard_and_undo; } /* PAWS check. */ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_reject(&tp->rx_opt, 0)) //paws检测时间戳 goto discard_and_undo; if (th->syn) { /* 收到了SYN段,即同时打开 *///相互connect /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. */
    1. /* 发送SYN后,状态为SYN_SENT,如果此时也收到SYN,
    2. * 状态则变为SYN_RECV。
    3. */ tcp_set_state(sk, TCP_SYN_RECV);
    if (tp->rx_opt.saw_tstamp) {
                tp->rx_opt.tstamp_ok = 1;
                tcp_store_ts_recent(tp);/* 记录对端的时间戳,作为下次发送的回显值 */
                tp->tcp_header_len =
                    sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
            } else {
                tp->tcp_header_len = sizeof(struct tcphdr);
            }
            tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的要接收的下一个序号 */
            tp->copied_seq = tp->rcv_nxt;
            tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的左端 */
            /* RFC1323: The window in SYN & SYN/ACK segments is
             * never scaled.
             */* 更新对端接收窗口的大小。在三次握手时,不使用窗口扩大因子。
            tp->snd_wnd    = ntohs(th->window);
            tp->snd_wl1    = TCP_SKB_CB(skb)->seq;/* 记录最近更新发送窗口的ACK序号 */
            tp->max_window = tp->snd_wnd;/* 目前见过的对端的最大通告窗口 */
            tcp_ecn_rcv_syn(tp, th);
            tcp_mtup_init(sk);/* TCP的MTU初始化  mss更新 */
            tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
            tcp_initialize_rcv_mss(sk);/* 对端有效发送MSS估值的初始化 */
            tcp_send_synack(sk); //发送synack
    #if 0
            /* Note, we could accept data and URG from this segment.
             * There are no obstacles to make this (except that we must
             * either change tcp_recvmsg() to prevent it from returning data
             * before 3WHS completes per RFC793, or employ TCP Fast Open).
             *
             * However, if we ignore data in ACKless segments sometimes,
             * we have no reasons to accept it sometimes.
             * Also, seems the code doing it in step6 of tcp_rcv_state_process
             * is not flawless. So, discard packet for sanity.
             * Uncomment this return to process the data.
             */
            return -1;
    #else
            goto discard;
    #endif
        }
        /* "fifth, if neither of the SYN or RST bits is set then
         * drop the segment and return."
         */
    discard_and_undo:
        tcp_clear_options(&tp->rx_opt);
        tp->rx_opt.mss_clamp = saved_clamp;
        goto discard;
    reset_and_undo:
        tcp_clear_options(&tp->rx_opt);
        tp->rx_opt.mss_clamp = saved_clamp;
        return 1;
    }

    同时打开时,在SYN_SENT状态,收到SYN段后,状态变为SYN_RECV,然后发送SYNACK。之后如果收到合法的SYNACK后,就能完成连接的建立。

    /*
    tcp_finish_connect()用来完成连接的建立,主要做了以下事情:
    1. 把连接的状态从SYN_SENT置为ESTABLISHED。
    2. 根据路由缓存,初始化TCP相关的变量。
    3. 获取默认的拥塞控制算法。
    4. 调整发送缓存和接收缓存的大小。
    5. 如果使用了SO_KEEPALIVE选项,激活保活定时器。
    6. 唤醒此socket等待队列上的进程(即调用connect的进程)。
    7 如果使用了异步通知,则发送SIGIO通知异步通知队列上的进程可写。
    
    */
    void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
    
        tcp_set_state(sk, TCP_ESTABLISHED); /* 设置为已连接状态 */
    
        if (skb) {/* 设置接收路由缓存 */
            icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);//inet_sk_rx_dst_set
            security_inet_conn_established(sk, skb);
        }
    
        /* Make sure socket is routed, for correct metrics.  */
        icsk->icsk_af_ops->rebuild_header(sk); /* 检查或重建路由 */
    
        tcp_init_metrics(sk);//创建初始化tcp metric
    
        tcp_init_congestion_control(sk);//调用拥塞算法init函数
    
        /* Prevent spurious tcp_cwnd_restart() on first data
         * packet.
         */
        tp->lsndtime = tcp_time_stamp;   /* 记录最后一次发送数据包的时间 */
    
        tcp_init_buffer_space(sk);//根据收到的对端信息初始化缓存配置
    
        if (sock_flag(sk, SOCK_KEEPOPEN)) /* 开启了保活,则打开保活定时器 */
            inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
    
        if (!tp->rx_opt.snd_wscale)//对方没有开启wscale窗口影子,则开启快速路径/* 设置预测标志,判断快慢路径的条件之一 */
            __tcp_fast_path_on(tp, tp->snd_wnd);
        else
            tp->pred_flags = 0;
    
        if (!sock_flag(sk, SOCK_DEAD)) {
            sk->sk_state_change(sk);/* 指向sock_def_wakeup,唤醒调用connect()的进程 */
            sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);/* 如果使用了异步通知,则发送SIGIO通知进程可写 */
        }
    }
  • 相关阅读:
    javascript Date format(js日期格式化)
    hcharts 教程
    UVA 10594 Data Flow
    UVA 10746 Crime Wave
    UVA 753 A Plug for UNIX
    UVA 11045 My T-shirt suits me
    UVA 10273 Eat or not to Eat?
    UVA 10806 Dijkstra, Dijkstra.
    UVA 10330 Power Transmission
    UVA 10803 Thunder Mountain
  • 原文地址:https://www.cnblogs.com/codestack/p/11148495.html
Copyright © 2011-2022 走看看