zoukankan      html  css  js  c++  java
  • tcp pmtu探测

    root@ubuntu:~/c++# cat /proc/sys/net/ipv4/tcp_mtu_probing
    0
    root@ubuntu:~/c++# 
    int val = 1;
    setsockopt(sd, IPPROTO_IP, IP_DONTFRAG, &val, sizeof(val));
    

    Here's a page explaining this in further detail.

    For Linux, it appears you have to use the IP_MTU_DISCOVER option with the value IP_PMTUDISC_DO (or IP_PMTUDISC_DONT to turn it off):

    int val = IP_PMTUDISC_DO;
    setsockopt(sd, IPPROTO_IP, IP_MTU_DISCOVER, &val, sizeof(val));


    /* emsgsize.c: test whether IP_PMTUDISC_PROBE suppresses EMSGSIZE
     *
     * Usage: emsgsize packet_size
     */
    
    #include <arpa/inet.h>
    #include <errno.h>
    #include <netinet/in.h>
    #include <netinet/ip.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    
    #define CHECK(w_, s_) do { if ((s_) < 0) { perror(w_); return 1; }} while (0)
    
    /* Payload */
    static unsigned char data[64 * 1024];
    
    int
    main (int argc, char **argv)
    {
        int fd, on, s, size;
        struct sockaddr_in si;
        ssize_t sent;
    
        if (argc != 2)
        {
            fprintf(stderr, "usage: emsgsize size
    ");
            return 1;
        }
        size = atoi(argv[1]);
    
        memset(&si, 0, sizeof(si));
        si.sin_family = AF_INET;
    
        fd = socket(si.sin_family, SOCK_DGRAM, 0);
        CHECK("socket", fd);
    
        s = bind(fd, (struct sockaddr *) &si, sizeof(si));
        CHECK("bind", s);
    
        /* This is supposed to suppress sendmsg(2) returning -1 with
         * errno = EMSGSIZE, see ip(7):
         *
         "        It is possible to implement RFC 4821 MTU probing with SOCK_DGRAM
         "        or SOCK_RAW sockets by  setting  a  value  of  IP_PMTUDISC_PROBE
         "        (available  since Linux 2.6.22).  This is also particularly use-
         "        ful for diagnostic tools such as tracepath(8) that wish  to  de-
         "        liberately send probe packets larger than the observed Path MTU.
         */
        on = IP_PMTUDISC_PROBE;
        s = setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &on, sizeof(on));
        CHECK("setsockopt", s);
    
        memset(&si, 0, sizeof(si));
        si.sin_family = AF_INET;
        si.sin_port = htons(12345); /* Destination does not matter */
        s = inet_pton(AF_INET, "127.0.0.1", &si.sin_addr);
        CHECK("inet_pton", s);
        sent = sendto(fd, data, (size_t) size, 0, (struct sockaddr *) &si,
                                                                sizeof(si));
        CHECK("sendto", sent);
    
        return 0;
    }




    当TCP客户端发起连接建立请求时,在函数tcp_connect_init中调用TCP的MTU探测初始化函数tcp_mtup_init。如上所述默认情况下enabled为零,使用MSS最大限制值mss_clamp加上TCP头部长度和网络层头部长度作为MTU探测的上限值,下限值由函数tcp_mss_to_mtu通过基础MSS值计算得到。
     

    void tcp_mtup_init(struct sock *sk)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
     
        icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
        icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len;
        icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
        icsk->icsk_mtup.probe_size = 0;
        if (icsk->icsk_mtup.enabled)
            icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
    
    }
    static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
    {
        struct net *net = sock_net(sk);
        /* Black hole detection */
        if (net->ipv4.sysctl_tcp_mtu_probing) {    //2=启用, 1表示只有检测到black hole的时候才启用
            if (!icsk->icsk_mtup.enabled) {    //说明sysctl_tcp_mtu_probing=1
                icsk->icsk_mtup.enabled = 1;    //检测到black hole,启用tcp mtu probe
                icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
                tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);    //更新mss_cache
            } else {    //说明sysctl_tcp_mtu_probing=2,已经启用tcp mtu probe了
                struct net *net = sock_net(sk);
                struct tcp_sock *tp = tcp_sk(sk);
                int mss;
                mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
                mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
                mss = max(mss, 68 - tp->tcp_header_len);
                icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);    //减小下限,再试
                tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
            }
        }
    }

    配置及初始化

    想要启用tcp mtu probe, 先要设置ip_no_pmtu_disc=0(默认值), 表示启用pmtu discovery。 这样tcp发送的时候才会设置DF标记。
    通过DF标记,中间路由设备如果需要分片就会返还ICMP消息通知, 但是有可能因为防火墙等原因,发送方收不到ICMP消息,因此发送方一直发送探测包,却一直没收到回应, 这个就称为black hole。
    系统默认tcp_mtu_probe=1, 表示默认禁用mtu,只有当检测到black hole的时候,才会开启tcp mtu probe

    root@ubuntu:~# ping -s 2500  -M do  8.8.8.8 
    PING 8.8.8.8 (8.8.8.8) 2500(2528) bytes of data.
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ping: local error: Message too long, mtu=1500
    ^C
    --- 8.8.8.8 ping statistics ---
    10 packets transmitted, 0 received, +10 errors, 100% packet loss, time 9213ms
    
    root@ubuntu:~# 

    demo2

    root@ubuntu:~# ping -s 1400  -M do  8.8.8.8 
    PING 8.8.8.8 (8.8.8.8) 1400(1428) bytes of data.
    76 bytes from 8.8.8.8: icmp_seq=1 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=2 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=3 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=4 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=5 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=6 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=7 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=8 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=9 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=10 ttl=101 (truncated)
    76 bytes from 8.8.8.8: icmp_seq=11 ttl=101 (truncated)
    ^C
    --- 8.8.8.8 ping statistics ---
    11 packets transmitted, 11 received, 0% packet loss, time 10006ms
    rtt min/avg/max/mdev = 47.750/47.863/48.119/0.285 ms
    root@ubuntu:~# 
    root@ubuntu:~#  tcpdump -i enahisic2i0 icmp and  host 8.8.8.8 -nnvv
    tcpdump: listening on enahisic2i0, link-type EN10MB (Ethernet), capture size 262144 bytes
    16:05:25.061926 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 1428)
        10.10.1.82 > 8.8.8.8: ICMP echo request, id 20914, seq 1, length 1408
    16:05:25.110142 IP (tos 0x0, ttl 101, id 0, offset 0, flags [none], proto ICMP (1), length 96)
        8.8.8.8 > 10.10.1.82: ICMP echo reply, id 20914, seq 1, length 76
    16:05:26.063245 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 1428)
        10.10.1.82 > 8.8.8.8: ICMP echo request, id 20914, seq 2, length 1408
    16:05:26.111113 IP (tos 0x0, ttl 101, id 0, offset 0, flags [none], proto ICMP (1), length 96)
        8.8.8.8 > 10.10.1.82: ICMP echo reply, id 20914, seq 2, length 76
    16:05:27.065189 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 1428)
        10.10.1.82 > 8.8.8.8: ICMP echo request, id 20914, seq 3, length 1408
    16:05:27.113076 IP (tos 0x0, ttl 101, id 0, offset 0, flags [none], proto ICMP (1), length 96)
        8.8.8.8 > 10.10.1.82: ICMP echo reply, id 20914, seq 3, length 76
  • 相关阅读:
    IntelliJ IDEA 2017版 SpringBoot的核心配置详解
    路由追踪程序traceroute/tracert分析与科普
    traceroute追踪路由命令
    ping 命令
    hostname命令,修改主机名及host文件
    net-tools工具arp命令
    ifup 和 ifdown
    net-tools工具ifconfig 命令
    iproute2 对决 net-tools
    什么是带内管理 带外管理
  • 原文地址:https://www.cnblogs.com/dream397/p/14611022.html
Copyright © 2011-2022 走看看