zoukankan      html  css  js  c++  java
  • 连接建立定时器

    1、连接建立定时器:

      tcp 连接建立时, client 会发出syn 然后等待ack,server 收到syn 后会回复ack 同时也会带上新的syn,此时等待客户端回复ack,当时server没有收到ack,server 会超时重发几次synack,最后没有收到ack,导致连接建立将终止。

    创建request_sock, 并进入TCP_NEW_SYN_RECV状态后,插入ehash表中,发送synack,并初始化reqsk_timer定时器,准备好重传synack的准备

    static void reqsk_queue_hash_req(struct request_sock *req,
                     unsigned long timeout)
    {
        req->num_retrans = 0;
        req->num_timeout = 0;
        req->sk = NULL;
    
        setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
        mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
    
        inet_ehash_insert(req_to_sk(req), NULL);//短链接会频繁操作establish hash表
        /* before letting lookups find us, make sure all req fields
         * are committed to memory and refcnt initialized.
         */
        smp_wmb();
        atomic_set(&req->rsk_refcnt, 2 + 1);
    }
    
    /*
    
    启动SYNACK定时器。这便是SYNACK定时器的激活时机,三次握手的详情可见之前的文章。
    */
    void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
                       unsigned long timeout)
    {
        reqsk_queue_hash_req(req, timeout);
        inet_csk_reqsk_queue_added(sk);
    }
    static inline void inet_csk_reqsk_queue_added(struct sock *sk)
    {
        reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue);
    }
    
    static inline void reqsk_queue_added(struct request_sock_queue *queue)
    {
        atomic_inc(&queue->young);//没有重传过synack的请求 
        atomic_inc(&queue->qlen);//  目前有多少个未完成握手的请求
    }
    /*
     * 在TCP传输控制块中有一个用于存放连接请求块(处于SYN_RECV状态以及
     * 已连接但未被accept的传输控制块)的容器
     */ 
     //该结构在inet_connection_sock中的icsk_accept_queue
    struct request_sock_queue {
        spinlock_t        rskq_lock;
           /*
         * 保存相关套接字TCP层的选项TCP_DEFER_ACCEPT的值,参见
         * TCP_DEFER_ACCEPT
         * 保存的是启用TCP_DEFER_ACCEPT时允许重传SYN+ACK段的次数。
         * 注意:如果启用了TCP_DEFER_ACCEPT选项,将使用rskq_defer_accept
         * 作为允许重传的最大次数,不再是sysctl_tcp_synack_retries,
         * 参见inet_csk_reqsk_queue_prune()。
         */
        u8            rskq_defer_accept;
    
        u32            synflood_warned;
        atomic_t        qlen;
        atomic_t        young;
     /*
         * rskq_accept_head和rskq_accept_tail表示的链表保存的是
         * 已完成连接建立过程的连接请求块  服务器端accept的时候
        struct sock是从这个队列上面取出来的
          已经建立连接的连接的节点添加到这里,  
          这些链表的节点信息结构体是tcp_request_sock。
          当应用程序调用accept函数后,会从这里面取走这个tcp_request_sock
          当应用程序accept的时候,
          会调用reqsk_queue_get_child取走这个新创建的sock,
          同时就需要把这个取出的tcp_request_sock释放掉
    */
    
        struct request_sock    *rskq_accept_head;
        struct request_sock    *rskq_accept_tail;
        struct fastopen_queue    fastopenq;  /* Check max_qlen != 0 to determine
                             * if TFO is enabled.
                             */
    };

     

    static void reqsk_timer_handler(unsigned long data)
    {
        struct request_sock *req = (struct request_sock *)data;
        struct sock *sk_listener = req->rsk_listener;
        struct net *net = sock_net(sk_listener);
        struct inet_connection_sock *icsk = inet_csk(sk_listener);
        struct request_sock_queue *queue = &icsk->icsk_accept_queue;
        int qlen, expire = 0, resend = 0;
        int max_retries, thresh;
        u8 defer_accept;
    
        if (sk_state_load(sk_listener) != TCP_LISTEN)
            goto drop;
    //优先使用TCP_SYNCNT socket选项
        max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
        thresh = max_retries;//默认3-5
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
         * If synack was not acknowledged for 1 second, it means
         * one of the following things: synack was lost, ack was lost,
         * rtt is high or nobody planned to ack (i.e. synflood).
         * When server is a bit loaded, queue is populated with old
         * open requests, reducing effective size of queue.
         * When server is well loaded, queue size reduces to zero
         * after several minutes of work. It is not synflood,
         * it is normal operation. The solution is pruning
         * too old entries overriding normal timeout, when
         * situation becomes dangerous.
         *
         * Essentially, we reserve half of room for young
         * embrions; and abort old ones without pity, if old
         * ones are about to clog our table.
         */
        qlen = reqsk_queue_len(queue);//// 目前有多少个未完成握手的请求
        if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {//没有完成三次握手的数量,超过syn请求队列最大长度的一半
            int young = reqsk_queue_len_young(queue) << 1;//  使用 * 2 而不是 除以 2 作比较
    //young  // 没有重传过synack的请求
            while (thresh > 2) {//没重传过的请求大于等待完成三次握手数的一半
                if (qlen < young)//队列中还在等待客户端的第三个ACK报文并且没有超时的请求套接口(young状态)的数量大于当前队列长度的一半,说明队列尚在健康状态
                    break;
                thresh--;
                young <<= 1;
            }
        }
        defer_accept = READ_ONCE(queue->rskq_defer_accept);//defer_accept指定的重传次数
        if (defer_accept)
            max_retries = defer_accept;
        syn_ack_recalc(req, thresh, max_retries, defer_accept,
                   &expire, &resend);//计算是否需要重传
        req->rsk_ops->syn_ack_timeout(req);
        if (!expire &&//没有超过最大重传次数; 对于defer_accept来说,如果收到ack了,但是一直没有收到数据
            (!resend ||//不需要重传
             !inet_rtx_syn_ack(sk_listener, req) ||/// 需要重传且重传synack成功 --->执行tcp_v4_send_synack 重传 成功
             inet_rsk(req)->acked)) {////重传失败,但是被ack, 说明是defer_accept
            unsigned long timeo;
    
            if (req->num_timeout++ == 0)
                atomic_dec(&queue->young);//第一次重传,则标记为old
            timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
            mod_timer_pinned(&req->rsk_timer, jiffies + timeo);//指数增加超时时间
            return;
        }
    drop://超过最大重传次数,删除这个req,从ehash中删除,并清除定时器
        inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
    }
    /* Decide when to expire the request and when to resend SYN-ACK */
    static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
                      const int max_retries,
                      const u8 rskq_defer_accept,
                      int *expire, int *resend)
    {
        if (!rskq_defer_accept) {
            //不考虑延时accept的情况下,实现逻辑  超时次数已经大于限定的阈值,说明已经超时,需要销毁此请求套接口
            *expire = req->num_timeout >= thresh;
            *resend = 1;
            return;
        }
        //如果当前的超时次数大于阈值thresh,并且大于最大重传次数(即延时accept--max_retries的次数),判定为超时;同时
        //同时acked等于0(即未接收到单独的ACK报文)也判定为超时,其它情况下判定未超时
        *expire = req->num_timeout >= thresh &&
              (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
        /*
         * Do not resend while waiting for data after ACK,
         * start to resend on end of deferring period to give
         * last chance for data or ACK to create established socket.
         重传resend,如果未接收到单独的ACK报文或者是已到延时accept的最后*/
        *resend = !inet_rsk(req)->acked ||
              req->num_timeout >= rskq_defer_accept - 1;
    }

    延时ACCEPT功能

    用户层可通过setsockopt设置延时accept功能

    开启此功能,处理逻辑位于函数tcp_check_req中。如果仅仅是接收到客户端回复的第三个握手ACK报文,无数据,不进行处理,设置acked为1。反之如果接收到数据和ACK,进行正常处理,忽略延时accept功能。

    struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen)
    {
        /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
        if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
            TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
            inet_rsk(req)->acked = 1;
            return NULL
    }

    注意:

    static void reqsk_queue_hash_req(struct request_sock *req,
                     unsigned long timeout)
    {
        req->num_retrans = 0;
        req->num_timeout = 0;
        req->sk = NULL;
    
        setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
        mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
    
        inet_ehash_insert(req_to_sk(req), NULL);//短链接会频繁操作establish hash表
        /* before letting lookups find us, make sure all req fields
         * are committed to memory and refcnt initialized.
         */
        smp_wmb();
        atomic_set(&req->rsk_refcnt, 2 + 1);
    }

    /* insert a socket into ehash, and eventually remove another one
     * (The another one can be a SYN_RECV or TIMEWAIT
     */
    bool inet_ehash_insert(struct sock *sk, struct sock *osk)
    {//--->tcp_hashinfo  需要注意的是tcp_hashinfo.ehash不仅包括已建立连接的TCP套接口,
            //还包括除了在LISTEN状态的其它所有状态的套接口。
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct hlist_nulls_head *list;
        struct inet_ehash_bucket *head;
        spinlock_t *lock;
        bool ret = true;

        WARN_ON_ONCE(!sk_unhashed(sk));

        sk->sk_hash = sk_ehashfn(sk);
        head = inet_ehash_bucket(hashinfo, sk->sk_hash);
        list = &head->chain;
        lock = inet_ehash_lockp(hashinfo, sk->sk_hash);//,在当前的Linux TCP实现中,每一个hash bucket拥有一个spinlock 多核cpu 添加删除 的时候 抢占lock 有点麻烦

        spin_lock(lock);
        if (osk) {
            WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
            ret = sk_nulls_del_node_init_rcu(osk);
        }
        if (ret)
            __sk_nulls_add_node_rcu(sk, list);
        spin_unlock(lock);
        return ret;
    }


  • 相关阅读:
    Handle/Body pattern(Wrapper pattern)
    Python: PS 滤镜--万花筒效果
    Java 工程与 Eclipse 高级用法
    更新服务
    Diskpart挂载/卸载VHD
    Ping批量函数
    Sysprep命令详解
    Hash Table构建
    Invoke-Express 执行多个批处理命令的函数
    磁盘扩容
  • 原文地址:https://www.cnblogs.com/codestack/p/12862638.html
Copyright © 2011-2022 走看看