zoukankan      html  css  js  c++  java
  • TCP的核心系列 — SACK和DSACK的实现(三)

    不论是18版,还是37版,一开始都会从TCP的控制块中取出SACK选项的起始地址。

    SACK选项的起始地址是保存在tcp_skb_cb结构的sacked项中的,那么这是在什么时候做的呢?

    SACK块并不是总是合法的,非法的SACK块可能会引起处理错误,所以还需要进行SACK块的合法性检查。

    本文主要内容:TCP首部中SACK选项的解析和地址的获取,SACK块的合法性检查。

    Author:zhangskd @ csdn

    SACK选项的地址

    TCP_SKB_CB(skb)->sacked is initialized to offset corresponding to the start of the SACK option in the

    TCP header for the segment received.

    处理时机为:

    tcp_rcv_established(),进入慢速路径时调用

        | --> tcp_validate_incoming()

                    | --> tcp_fast_parse_options()

                               | --> tcp_parse_options()


    在慢速路径中,有可能只带有TIMESTAMP选项,因此先用tcp_fast_parse_options()快速解析。

    /* Fast parse options. This hopes to only see timestamps.
     * If it is wrong it falls back on tcp_parse_options().
     */
    static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_sock *tp, u8 **hvpp)
    {
        /* In the spirit of fast parsing, compare doff directly to constant values.
         * Because equality is used, short doff can be ignored here.
         */
        if (th->doff == (sizeof(*th) / 4)) { /* 没有带选项 */
            tp->rx_opt.saw_tstamp = 0;
            return 0;
    
        } else if (tp->rx_opt.tstamp_ok &&
            th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { /* 只带有时间戳选项 */
            if (tcp_parse_aligned_timestamp(tp, th))
                return 1;
        }
    
        /* 如果以上的快速解析失败,则进行全面解析 */
        tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
    
        return 1;
    }
    static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
    {
        __be32 *ptr = (__be32 *) (th + 1); /* 指向选项部分 */
     
        /* 如果选项部分的前4个字节分别为:0x 01 01 08 0A */
        if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
             | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
    
            tp->rx_opt.saw_tstamp = 1;
            ++ptr;
    
            tp->rx_opt.rcv_tsval = ntohl(*ptr); /* 提取接收包的时间戳*/
            ++ptr;
    
            tp->rx_opt.rcv_tsecr = ntohl(*ptr); /* 提取接收包的回显值*/
            return 1;
        }
    
        return 0;
    }
    

    在慢速路径中,如果tcp_fast_parse_options()失败,则调用tcp_parse_options()全面解析TCP选项。

    /* Look for tcp options. Normally only called on SYN and SYNACK packets.
     * But, this can also be called on packets in the established flow when the fast version
     * below fails.
     */
    void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, u8 **hvpp, int estab)
    {
        unsigned char *ptr;
        struct tcphdr *th = tcp_hdr(skb);
        int length = (th->doff * 4) - sizeof(struct tcphdr); /* 选项总长度 */
    
        ptr = (unsigned char *) (th + 1); /* 选项起始地址 */
        opt_rx->saw_tstamp = 0; /* 此ACK有没有带时间戳接下来才知道 */
    
        while (length > 0) {
            int opcode = *ptr++; /* 选项kind */
            int opsize;
    
            switch (opcode) {
                case TCPOPT_EOL: /* 结束选项,不常见到 */
                    return;
    
                case TCPOPT_NOP: /* 填充选项 */
                    length--; /* 此选项只占一个字节 */
                    continue;
    
                default:
                    opsize = *ptr++; /* 此选项长度 */
    
                    if (opsize < 2) /* "silly options" */
                        return; /* 选项长度过小 */
    
                    if (opsize > length)
                        return; /* don't parse partial options */
    
                    switch (opcode) {
                        ...
                        case TCPOPT_SACK_PERM: 
                            if (opsize == TCPOLEN_SACK_PERM && th->syn && 
                                 !estab && sysctl_tcp_sack) {
    
                                opt_rx->sack_ok = 1; /* SYN包中显示支持SACK */
                                 tcp_sack_reset(opt_rx); /* 清空dsack和num_sacks */
                            }
                            break;
    
                            case TCPOPT_SACK:
                                if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
                                   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
                                   opt_rx->sack_ok) {
                                    
                                    /*保存SACK选项的起始地址偏移*/
                                    TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *) th; 
                                }
                                break;
                            ...
                    }
            }
        }
    }
    
    /* TCP options */
    #define TCPOPT_NOP 1 /* Padding */
    #define TCPOPT_EOL 0 /* End of options */
    #define TCPOPT_MSS 2 /* Segment size negotiating */
    #define TCPOPT_WINDOW 3 /* Window Scaling */
    #define TCPOPT_SACK_PERM 4 /* SACK Permitted */
    #define TCPOPT_SACK 5 /* SACK Block */
    #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
    
    static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
    {
        rx_opt->dsack = 0;
        rx_opt->num_sacks = 0;
    }
    
    /* This is the max number of SACKS that we'll generate and process.
     * It's safe to increase this, although since:
     * size = TCPOLEN_SACK_BASE_ALIGNED(4) + n * TCPOLEN_SACK_PERBLOCK(8)
     * only four options will fit in a standard TCP header
     */
    #define TCP_NUM_SACKS 4 /* SACK块数最多为4 */
    

    SACK块合法性检查

    检查SACK块或者DSACK块是否合法。

    2.6.24之前的版本没有检查SACK块的合法性,而某些非法的SACK块可能会触发空指针的引用。

    在3.1版本之前有一个小bug,处理DSACK时会产生问题,修复非常简单:

    @if (! after(end_seq, tp->snd_una)),把非去掉。

    符合以下任一条件的SACK块是合法的:

    1. sack块和dsack块:snd_una < start_seq < end_seq <= snd_nxt

    2. dsack块:undo_marker <= start_seq < end_seq <= snd_una

    3. dsack块:start_seq < undo_marker < end_seq <= snd_una 且 end_seq - start_seq <= max_window

    /* SACK block range validation checks that the received SACK block fits to the 
     * expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
     */
    static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, u32 start_seq, u32 end_seq)
    {
        /* Too far in future, or reversed (interpretation is ambiguous)
         * end_seq超过了snd_nxt,或者start_seq >= end_seq,那么不合法
         */
        if (after(end_seq, tp->snd_nxt) || ! before(start_seq, end_seq))
            return 0;
    
        /* Nasty start_seq wrap-around check (see comments above) */
         * start_seq超过了snd_nxt
         */
        if (! before(start_seq, tp->snd_nxt))
            return 0;
    
        /* In outstanding window? This is valid exit for D-SACKs too.
         * start_seq == snd_una is non-sensical (see comments above)
         */
        if (after(start_seq, tp->snd_una))
            return 1; /* 合法 */
    
        if (! is_dsack || ! tp->undo_marker)
            return 0;
    
        /* Then it's D-SACK, and must reside below snd_una completely.
         * 注意在3.1以前这里是:! after(end_seq, tp->snd_una),是一个bug
         */
        if (after(end_seq, tp->snd_una))
            return 0; 
    
        if (! before(start_seq, tp->undo_marker))
            return 1; /* dsack块合法 */
    
        /* Too old,DSACK块太旧了*/
        if (! after(end_seq, tp->undo_marker))
            return 0;
    
        /* Undo_marker boundary crossing */
        return !before(start_seq, end_seq - tp->max_window);
    }
    
  • 相关阅读:
    2008年Web2.0峰会:发展是绝对的硬道理
    盖茨"接班人":微软产品为何总是挨批
    如何使用命令方式检测mx记录是否生效
    IBM公布未来5年将改变人类生活的五大科技
    谷歌李开复:我的传奇人生源于十句箴言
    VCL已死,RAD已死(3)
    VCL已死,RAD已死(2)
    主要程序设计语言范型综论与概要
    谷歌正式放弃与雅虎的广告合作计划
    模仿google分页代码
  • 原文地址:https://www.cnblogs.com/aiwz/p/6333337.html
Copyright © 2011-2022 走看看