zoukankan      html  css  js  c++  java
  • TCP connect EADDRNOTAVAIL(99)错误原因分析

    转自:http://blog.chinaunix.net/uid-20662820-id-3371081.html

    关于TCP connect 返回错误99,可以能大家都会遇到,这里就分析一下这个错误的真正含义:

    基于内核2.6.32
    应用层调用connect,对应的系统调用的套接口实现是inet_stream_connect,对应tcp协议对应的传输层接口是tcp_v4_connect, 这里就从这个函数作为入口,该函数定义在net/ipv4/tcp_ipv4.c文件中

    点击(此处)折叠或打开

    1. int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    2. {
    3.     struct inet_sock *inet = inet_sk(sk);
    4.     struct tcp_sock *tp = tcp_sk(sk);
    5.     struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
    6.     struct rtable *rt;
    7.     __be32 daddr, nexthop;
    8.     int tmp;
    9.     int err;
    10.     if (addr_len < sizeof(struct sockaddr_in))
    11.         return -EINVAL;
    12.     if (usin->sin_family != AF_INET)
    13.         return -EAFNOSUPPORT;
    14.     nexthop = daddr = usin->sin_addr.s_addr;
    15.     if (inet->opt && inet->opt->srr) {
    16.         if (!daddr)
    17.             return -EINVAL;
    18.         nexthop = inet->opt->faddr;
    19.     }
    20.     tmp = ip_route_connect(&rt, nexthop, inet->saddr,
    21.              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
    22.              IPPROTO_TCP,
    23.              inet->sport, usin->sin_port, sk, 1);
    24.     if (tmp < 0) {
    25.         if (tmp == -ENETUNREACH)
    26.             IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
    27.         return tmp;
    28.     }
    29.     if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
    30.         ip_rt_put(rt);
    31.         return -ENETUNREACH;
    32.     }
    33.     if (!inet->opt || !inet->opt->srr)
    34.         daddr = rt->rt_dst;
    35.     if (!inet->saddr)
    36.         inet->saddr = rt->rt_src;
    37.     inet->rcv_saddr = inet->saddr;
    38.     if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
    39.         /* Reset inherited state */
    40.         tp->rx_opt.ts_recent     = 0;
    41.         tp->rx_opt.ts_recent_stamp = 0;
    42.         tp->write_seq         = 0;
    43.     }
    44.     if (tcp_death_row.sysctl_tw_recycle &&
    45.      !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
    46.         struct inet_peer *peer = rt_get_peer(rt);
    47.         /*
    48.          * VJ's idea. We save last timestamp seen from
    49.          * the destination in peer table, when entering state
    50.          * TIME-WAIT * and initialize rx_opt.ts_recent from it,
    51.          * when trying new connection.
    52.          */
    53.         if (peer != NULL &&
    54.          peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
    55.             tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
    56.             tp->rx_opt.ts_recent = peer->tcp_ts;
    57.         }
    58.     }
    59.     inet->dport = usin->sin_port;
    60.     inet->daddr = daddr;
    61.     inet_csk(sk)->icsk_ext_hdr_len = 0;
    62.     if (inet->opt)
    63.         inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
    64.     tp->rx_opt.mss_clamp = 536;
    65.     /* Socket identity is still unknown (sport may be zero).
    66.      * However we set state to SYN-SENT and not releasing socket
    67.      * lock select source port, enter ourselves into the hash tables and
    68.      * complete initialization after this.
    69.      */
    70.     tcp_set_state(sk, TCP_SYN_SENT);
    71.     err = inet_hash_connect(&tcp_death_row, sk);
    72.     if (err)
    73.         goto failure;
    74.     err = ip_route_newports(&rt, IPPROTO_TCP,
    75.                 inet->sport, inet->dport, sk);
    76.     if (err)
    77.         goto failure;
    78.     /* OK, now commit destination to socket. */
    79.     sk->sk_gso_type = SKB_GSO_TCPV4;
    80.     sk_setup_caps(sk, &rt->u.dst);
    81.     if (!tp->write_seq)
    82.         tp->write_seq = secure_tcp_sequence_number(inet->saddr,
    83.                              inet->daddr,
    84.                              inet->sport,
    85.                              usin->sin_port);
    86.     inet->id = tp->write_seq ^ jiffies;
    87.     err = tcp_connect(sk);
    88.     rt = NULL;
    89.     if (err)
    90.         goto failure;
    91.     return 0;
    92. failure:
    93.     /*
    94.      * This unhashes the socket and releases the local port,
    95.      * if necessary.
    96.      */
    97.     tcp_set_state(sk, TCP_CLOSE);
    98.     ip_rt_put(rt);
    99.     sk->sk_route_caps = 0;
    100.     inet->dport = 0;
    101.     return err;
    102. }
    这里进入第84行代码的函数inet_hash_connect 查找一个本地可用端口与服务器建立连接

    点击(此处)折叠或打开

    1. int inet_hash_connect(struct inet_timewait_death_row *death_row,
    2.          struct sock *sk)
    3. {
    4.     return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
    5.             __inet_check_established, __inet_hash_nolisten);
    6. }
    该函数实际上调用_inet_hash_connect

    点击(此处)折叠或打开

    1. int __inet_hash_connect(struct inet_timewait_death_row *death_row,
    2.         struct sock *sk, u32 port_offset,
    3.         int (*check_established)(struct inet_timewait_death_row *,
    4.             struct sock *, __u16, struct inet_timewait_sock **),
    5.         void (*hash)(struct sock *sk))
    6. {
    7.     struct inet_hashinfo *hinfo = death_row->hashinfo;
    8.     const unsigned short snum = inet_sk(sk)->num;
    9.     struct inet_bind_hashbucket *head;
    10.     struct inet_bind_bucket *tb;
    11.     int ret;
    12.     struct net *net = sock_net(sk);
    13.     if (!snum) {
    14.         int i, remaining, low, high, port;
    15.         static u32 hint;
    16.         u32 offset = hint + port_offset;
    17.         struct hlist_node *node;
    18.         struct inet_timewait_sock *tw = NULL;
    19.         inet_get_local_port_range(&low, &high);
    20.         remaining = (high - low) + 1;
    21.         local_bh_disable();
    22.         for (i = 1; i <= remaining; i++) {
    23.             port = low + (i + offset) % remaining;
    24.             head = &hinfo->bhash[inet_bhashfn(net, port,
    25.                     hinfo->bhash_size)];
    26.             spin_lock(&head->lock);
    27.             /* Does not bother with rcv_saddr checks,
    28.              * because the established check is already
    29.              * unique enough.
    30.              */
    31.             inet_bind_bucket_for_each(tb, node, &head->chain) {
    32.                 if (ib_net(tb) == net && tb->port == port) {
    33.                     if (tb->fastreuse >= 0)
    34.                         goto next_port;
    35.                     WARN_ON(hlist_empty(&tb->owners));
    36.                     if (!check_established(death_row, sk,
    37.                                 port, &tw))
    38.                         goto ok;
    39.                     goto next_port;
    40.                 }
    41.             }
    42.             tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
    43.                     net, head, port);
    44.             if (!tb) {
    45.                 spin_unlock(&head->lock);
    46.                 break;
    47.             }
    48.             tb->fastreuse = -1;
    49.             goto ok;
    50.         next_port:
    51.             spin_unlock(&head->lock);
    52.         }
    53.         local_bh_enable();
    54.         return -EADDRNOTAVAIL;
    55. ok:
    56.         hint += i;
    57.         /* Head lock still held and bh's disabled */
    58.         inet_bind_hash(sk, tb, port);
    59.         if (sk_unhashed(sk)) {
    60.             inet_sk(sk)->sport = htons(port);
    61.             hash(sk);
    62.         }
    63.         spin_unlock(&head->lock);
    64.         if (tw) {
    65.             inet_twsk_deschedule(tw, death_row);
    66.             inet_twsk_put(tw);
    67.         }
    68.         ret = 0;
    69.         goto out;
    70.     }
    71.     head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
    72.     tb = inet_csk(sk)->icsk_bind_hash;
    73.     spin_lock_bh(&head->lock);
    74.     if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
    75.         hash(sk);
    76.         spin_unlock_bh(&head->lock);
    77.         return 0;
    78.     } else {
    79.         spin_unlock(&head->lock);
    80.         /* No definite answer... Walk to established hash table */
    81.         ret = check_established(death_row, sk, snum, NULL);
    82. out:
    83.         local_bh_enable();
    84.         return ret;
    85.     }
    86. }
    注意第61行代码,这里返回了EADDRNOTAVAIL错误。
    这里分析一下这个函数实现:
    调用inet_get_local_port_range(&low, &high) 获取可用的端口列表,这个值就是/proc/sys/net/ipv4/ip_local_port_range 中的值。

    点击(此处)折叠或打开

    1. void inet_get_local_port_range(int *low, int *high)
    2. {
    3.     unsigned seq;
    4.     do {
    5.         seq = read_seqbegin(&sysctl_local_ports.lock);
    6.         *low = sysctl_local_ports.range[0];
    7.         *high = sysctl_local_ports.range[1];
    8.     } while (read_seqretry(&sysctl_local_ports.lock, seq));
    9. }
    然后内核在这个范围内选择一个可用的端口作为本地端口去connect服务器,如果没有可用的端口可用,比如这个范围内的端口都处于如下状态中的一种:
    1. bind使用的端口
    2. 端口处于非TIME_WAIT状态
    3. 端口处于TIME_WAIT状态,但是没有启用tcp_tw_reuse
    那么就会返回EADDRNOTAVAIL错误。
     
    一般情况下,出现这个错误应该是代码设计的问题,如果确定代码没有问题,那么根据上面的原则,可用使用如下方法解决问题:
    1. 增大可选端口的范围,修改/proc/sys/net/ipv4/ip_local_port_range的值。
    2. 开启tcp_tw_reuse,允许使用TIME_WAIT状态的端口。
  • 相关阅读:
    springmvc log4j 配置
    intellij idea maven springmvc 环境搭建
    spring,property not found on type
    intellij idea maven 工程生成可执行的jar
    device eth0 does not seem to be present, delaying initialization
    macos ssh host配置及免密登陆
    centos7 搭建 docker 环境
    通过rest接口获取自增id (twitter snowflake算法)
    微信小程序开发体验
    gitbook 制作 beego 参考手册
  • 原文地址:https://www.cnblogs.com/yanwei-wang/p/6186947.html
Copyright © 2011-2022 走看看