zoukankan      html  css  js  c++  java
  • TCP connect EADDRNOTAVAIL(99)错误原因分析

    转自:http://blog.chinaunix.net/uid-20662820-id-3371081.html

    关于TCP connect 返回错误99,可以能大家都会遇到,这里就分析一下这个错误的真正含义:

    基于内核2.6.32
    应用层调用connect,对应的系统调用的套接口实现是inet_stream_connect,对应tcp协议对应的传输层接口是tcp_v4_connect, 这里就从这个函数作为入口,该函数定义在net/ipv4/tcp_ipv4.c文件中

    点击(此处)折叠或打开

    1. int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    2. {
    3.     struct inet_sock *inet = inet_sk(sk);
    4.     struct tcp_sock *tp = tcp_sk(sk);
    5.     struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
    6.     struct rtable *rt;
    7.     __be32 daddr, nexthop;
    8.     int tmp;
    9.     int err;
    10.     if (addr_len < sizeof(struct sockaddr_in))
    11.         return -EINVAL;
    12.     if (usin->sin_family != AF_INET)
    13.         return -EAFNOSUPPORT;
    14.     nexthop = daddr = usin->sin_addr.s_addr;
    15.     if (inet->opt && inet->opt->srr) {
    16.         if (!daddr)
    17.             return -EINVAL;
    18.         nexthop = inet->opt->faddr;
    19.     }
    20.     tmp = ip_route_connect(&rt, nexthop, inet->saddr,
    21.              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
    22.              IPPROTO_TCP,
    23.              inet->sport, usin->sin_port, sk, 1);
    24.     if (tmp < 0) {
    25.         if (tmp == -ENETUNREACH)
    26.             IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
    27.         return tmp;
    28.     }
    29.     if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
    30.         ip_rt_put(rt);
    31.         return -ENETUNREACH;
    32.     }
    33.     if (!inet->opt || !inet->opt->srr)
    34.         daddr = rt->rt_dst;
    35.     if (!inet->saddr)
    36.         inet->saddr = rt->rt_src;
    37.     inet->rcv_saddr = inet->saddr;
    38.     if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
    39.         /* Reset inherited state */
    40.         tp->rx_opt.ts_recent     = 0;
    41.         tp->rx_opt.ts_recent_stamp = 0;
    42.         tp->write_seq         = 0;
    43.     }
    44.     if (tcp_death_row.sysctl_tw_recycle &&
    45.      !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
    46.         struct inet_peer *peer = rt_get_peer(rt);
    47.         /*
    48.          * VJ's idea. We save last timestamp seen from
    49.          * the destination in peer table, when entering state
    50.          * TIME-WAIT * and initialize rx_opt.ts_recent from it,
    51.          * when trying new connection.
    52.          */
    53.         if (peer != NULL &&
    54.          peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
    55.             tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
    56.             tp->rx_opt.ts_recent = peer->tcp_ts;
    57.         }
    58.     }
    59.     inet->dport = usin->sin_port;
    60.     inet->daddr = daddr;
    61.     inet_csk(sk)->icsk_ext_hdr_len = 0;
    62.     if (inet->opt)
    63.         inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
    64.     tp->rx_opt.mss_clamp = 536;
    65.     /* Socket identity is still unknown (sport may be zero).
    66.      * However we set state to SYN-SENT and not releasing socket
    67.      * lock select source port, enter ourselves into the hash tables and
    68.      * complete initialization after this.
    69.      */
    70.     tcp_set_state(sk, TCP_SYN_SENT);
    71.     err = inet_hash_connect(&tcp_death_row, sk);
    72.     if (err)
    73.         goto failure;
    74.     err = ip_route_newports(&rt, IPPROTO_TCP,
    75.                 inet->sport, inet->dport, sk);
    76.     if (err)
    77.         goto failure;
    78.     /* OK, now commit destination to socket. */
    79.     sk->sk_gso_type = SKB_GSO_TCPV4;
    80.     sk_setup_caps(sk, &rt->u.dst);
    81.     if (!tp->write_seq)
    82.         tp->write_seq = secure_tcp_sequence_number(inet->saddr,
    83.                              inet->daddr,
    84.                              inet->sport,
    85.                              usin->sin_port);
    86.     inet->id = tp->write_seq ^ jiffies;
    87.     err = tcp_connect(sk);
    88.     rt = NULL;
    89.     if (err)
    90.         goto failure;
    91.     return 0;
    92. failure:
    93.     /*
    94.      * This unhashes the socket and releases the local port,
    95.      * if necessary.
    96.      */
    97.     tcp_set_state(sk, TCP_CLOSE);
    98.     ip_rt_put(rt);
    99.     sk->sk_route_caps = 0;
    100.     inet->dport = 0;
    101.     return err;
    102. }
    这里进入第84行代码的函数inet_hash_connect 查找一个本地可用端口与服务器建立连接

    点击(此处)折叠或打开

    1. int inet_hash_connect(struct inet_timewait_death_row *death_row,
    2.          struct sock *sk)
    3. {
    4.     return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
    5.             __inet_check_established, __inet_hash_nolisten);
    6. }
    该函数实际上调用_inet_hash_connect

    点击(此处)折叠或打开

    1. int __inet_hash_connect(struct inet_timewait_death_row *death_row,
    2.         struct sock *sk, u32 port_offset,
    3.         int (*check_established)(struct inet_timewait_death_row *,
    4.             struct sock *, __u16, struct inet_timewait_sock **),
    5.         void (*hash)(struct sock *sk))
    6. {
    7.     struct inet_hashinfo *hinfo = death_row->hashinfo;
    8.     const unsigned short snum = inet_sk(sk)->num;
    9.     struct inet_bind_hashbucket *head;
    10.     struct inet_bind_bucket *tb;
    11.     int ret;
    12.     struct net *net = sock_net(sk);
    13.     if (!snum) {
    14.         int i, remaining, low, high, port;
    15.         static u32 hint;
    16.         u32 offset = hint + port_offset;
    17.         struct hlist_node *node;
    18.         struct inet_timewait_sock *tw = NULL;
    19.         inet_get_local_port_range(&low, &high);
    20.         remaining = (high - low) + 1;
    21.         local_bh_disable();
    22.         for (i = 1; i <= remaining; i++) {
    23.             port = low + (i + offset) % remaining;
    24.             head = &hinfo->bhash[inet_bhashfn(net, port,
    25.                     hinfo->bhash_size)];
    26.             spin_lock(&head->lock);
    27.             /* Does not bother with rcv_saddr checks,
    28.              * because the established check is already
    29.              * unique enough.
    30.              */
    31.             inet_bind_bucket_for_each(tb, node, &head->chain) {
    32.                 if (ib_net(tb) == net && tb->port == port) {
    33.                     if (tb->fastreuse >= 0)
    34.                         goto next_port;
    35.                     WARN_ON(hlist_empty(&tb->owners));
    36.                     if (!check_established(death_row, sk,
    37.                                 port, &tw))
    38.                         goto ok;
    39.                     goto next_port;
    40.                 }
    41.             }
    42.             tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
    43.                     net, head, port);
    44.             if (!tb) {
    45.                 spin_unlock(&head->lock);
    46.                 break;
    47.             }
    48.             tb->fastreuse = -1;
    49.             goto ok;
    50.         next_port:
    51.             spin_unlock(&head->lock);
    52.         }
    53.         local_bh_enable();
    54.         return -EADDRNOTAVAIL;
    55. ok:
    56.         hint += i;
    57.         /* Head lock still held and bh's disabled */
    58.         inet_bind_hash(sk, tb, port);
    59.         if (sk_unhashed(sk)) {
    60.             inet_sk(sk)->sport = htons(port);
    61.             hash(sk);
    62.         }
    63.         spin_unlock(&head->lock);
    64.         if (tw) {
    65.             inet_twsk_deschedule(tw, death_row);
    66.             inet_twsk_put(tw);
    67.         }
    68.         ret = 0;
    69.         goto out;
    70.     }
    71.     head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
    72.     tb = inet_csk(sk)->icsk_bind_hash;
    73.     spin_lock_bh(&head->lock);
    74.     if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
    75.         hash(sk);
    76.         spin_unlock_bh(&head->lock);
    77.         return 0;
    78.     } else {
    79.         spin_unlock(&head->lock);
    80.         /* No definite answer... Walk to established hash table */
    81.         ret = check_established(death_row, sk, snum, NULL);
    82. out:
    83.         local_bh_enable();
    84.         return ret;
    85.     }
    86. }
    注意第61行代码,这里返回了EADDRNOTAVAIL错误。
    这里分析一下这个函数实现:
    调用inet_get_local_port_range(&low, &high) 获取可用的端口列表,这个值就是/proc/sys/net/ipv4/ip_local_port_range 中的值。

    点击(此处)折叠或打开

    1. void inet_get_local_port_range(int *low, int *high)
    2. {
    3.     unsigned seq;
    4.     do {
    5.         seq = read_seqbegin(&sysctl_local_ports.lock);
    6.         *low = sysctl_local_ports.range[0];
    7.         *high = sysctl_local_ports.range[1];
    8.     } while (read_seqretry(&sysctl_local_ports.lock, seq));
    9. }
    然后内核在这个范围内选择一个可用的端口作为本地端口去connect服务器,如果没有可用的端口可用,比如这个范围内的端口都处于如下状态中的一种:
    1. bind使用的端口
    2. 端口处于非TIME_WAIT状态
    3. 端口处于TIME_WAIT状态,但是没有启用tcp_tw_reuse
    那么就会返回EADDRNOTAVAIL错误。
     
    一般情况下,出现这个错误应该是代码设计的问题,如果确定代码没有问题,那么根据上面的原则,可用使用如下方法解决问题:
    1. 增大可选端口的范围,修改/proc/sys/net/ipv4/ip_local_port_range的值。
    2. 开启tcp_tw_reuse,允许使用TIME_WAIT状态的端口。
  • 相关阅读:
    模板
    总结
    关于log方线段树
    [ICPC2014 WF]Sensor Network
    背包问题总结
    NOIP2020微信步数
    NOIP2020移球游戏
    CF643D Bearish Fanpages
    CF685C Optimal Point
    论恋爱对学习的促进作用
  • 原文地址:https://www.cnblogs.com/yanwei-wang/p/6186947.html
Copyright © 2011-2022 走看看