zoukankan      html  css  js  c++  java
  • TCP连接的建立(二)

    被动打开

    SYN cookies

    TCP协议开辟了一个比較大的内存空间请求连接队列来存储连接请求块,当SYN请求不断添加,请求连接数目到达上限时,会致使系统丢弃SYN连接请求。SYN cookies技术就能够使server在半连接队列已满的情况下仍能处理新的SYN请求。

    当半连接队列满时,SYN cookies并不丢弃SYN请求。而是通过加密技术来标识半连接状态。在TCP实现中,当收到client的SYN请求时,server须要回复SYN+ACK包给client,然后client再发送确认包给server。通常,server的初始序列号是由server依照一定的规律计算得到的随机数,而在SYN cookies中,server的初始序列号是由clientIP地址、clientport号、serverIP地址和serverport号、接收到的client初始序列号以及其它一些安全数值进行hash运算,并加密后得到的,称之为cookies。

    当server遭受SYN攻击使得请求连接队列满时,server并不拒绝新的SYN请求,而是回复一个初始化序列号为cookies的SYN包给client。假设收到client的ACK段。server将client的ACK序列号减1得到的值。与用上述那些要素hash运算得到的值比較,假设相等。直接完毕三次握手。注意:此时并不比查看此连接是否属于请求连接队列。

    启用SYN cookies是通过在启动环境中设置一下命令来完毕:

    echo 1 > /proc/sys/net/ipv4/tcp_syncookies

    第一次握手:接收SYN段

    传输控制块接收处理的段都有tcp_v4_do_rcv()处理,在该函数中再依据不同的状态由不同的函数处理。

    /* The socket must have it's spinlock held when we get
     * here.
     *
     * We have a potential double-lock case here, so even when
     * doing backlog processing we use the BH locking scheme.
     * This is because we cannot sleep with the original spinlock
     * held.
     */
    int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
    {
    	struct sock *rsk;
    #ifdef CONFIG_TCP_MD5SIG
    	/*
    	 * We really want to reject the packet as early as possible
    	 * if:
    	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
    	 *  o There is an MD5 option and we're not expecting one
    	 */
    	if (tcp_v4_inbound_md5_hash(sk, skb))
    		goto discard;
    #endif
    
    	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
    		TCP_CHECK_TIMER(sk);
    		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
    			rsk = sk;
    			goto reset;
    		}
    		TCP_CHECK_TIMER(sk);
    		return 0;
    	}
    
    	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
    		goto csum_err;
    
    	if (sk->sk_state == TCP_LISTEN) {
    		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
    		if (!nsk)
    			goto discard;
    
    		if (nsk != sk) {
    			if (tcp_child_process(sk, nsk, skb)) {
    				rsk = nsk;
    				goto reset;
    			}
    			return 0;
    		}
    	}
    
    	TCP_CHECK_TIMER(sk);
    	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
    		rsk = sk;
    		goto reset;
    	}
    	TCP_CHECK_TIMER(sk);
    	return 0;
    
    reset:
    	tcp_v4_send_reset(rsk, skb);
    discard:
    	kfree_skb(skb);
    	/* Be careful here. If this function gets more complicated and
    	 * gcc suffers from register pressure on the x86, sk (in %ebx)
    	 * might be destroyed here. This current version compiles correctly,
    	 * but you have been warned.
    	 */
    	return 0;
    
    csum_err:
    	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
    	goto discard;
    }
    
    第二次握手:发送SYN+ACK段

    tcp_v4_send_synack()用来为服务端构造回应client连接请求SYN段的SYN+ACK段,并将其封装在IP数据报中发送给client。

    /*
     *	Send a SYN-ACK after having received a SYN.
     *	This still operates on a request_sock only, not on a big
     *	socket.
     */
    static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
    				struct dst_entry *dst)
    {
    	const struct inet_request_sock *ireq = inet_rsk(req);
    	int err = -1;
    	struct sk_buff * skb;
    
    	/* First, grab a route. */
    	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
    		return -1;
    
    	skb = tcp_make_synack(sk, dst, req);
    
    	if (skb) {
    		struct tcphdr *th = tcp_hdr(skb);
    
    		th->check = tcp_v4_check(skb->len,
    					 ireq->loc_addr,
    					 ireq->rmt_addr,
    					 csum_partial(th, skb->len,
    						      skb->csum));
    
    		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
    					    ireq->rmt_addr,
    					    ireq->opt);
    		err = net_xmit_eval(err);
    	}
    
    	dst_release(dst);
    	return err;
    }
    static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
    {
    	return __tcp_v4_send_synack(sk, req, NULL);
    }
    第三次握手:接收ACK段

    服务端接收到SYN段后,会为将建立的连接创建一个连接请求块,同一时候发送SYN+ACK段给client作为回应,然后启动建立连接定时器,等待client最后一次握手的ACK段

    connect系统调用的实现

    inet_stream_connect()是connect系统调用的套接口层实现,首先校验设置的地址族,然后校验套接口状态,套接口状态为SS_UNCONNECTED时调用传输层接口。TCP中为tcp_v4_connect()。最后,等待连接的完毕或失败。

    /*
     *	Connect to a remote host. There is regrettably still a little
     *	TCP 'magic' in here.
     */
    int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
    			int addr_len, int flags)
    {
    	struct sock *sk = sock->sk;
    	int err;
    	long timeo;
    
    	lock_sock(sk);
    
    	if (uaddr->sa_family == AF_UNSPEC) {
    		err = sk->sk_prot->disconnect(sk, flags);
    		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
    		goto out;
    	}
    
    	switch (sock->state) {
    	default:
    		err = -EINVAL;
    		goto out;
    	case SS_CONNECTED:
    		err = -EISCONN;
    		goto out;
    	case SS_CONNECTING:
    		err = -EALREADY;
    		/* Fall out of switch with err, set for this state */
    		break;
    	case SS_UNCONNECTED:
    		err = -EISCONN;
    		if (sk->sk_state != TCP_CLOSE)
    			goto out;
    
    		err = sk->sk_prot->connect(sk, uaddr, addr_len);
    		if (err < 0)
    			goto out;
    
    		sock->state = SS_CONNECTING;
    
    		/* Just entered SS_CONNECTING state; the only
    		 * difference is that return value in non-blocking
    		 * case is EINPROGRESS, rather than EALREADY.
    		 */
    		err = -EINPROGRESS;
    		break;
    	}
    
    	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
    
    	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
    		/* Error code is set above */
    		if (!timeo || !inet_wait_for_connect(sk, timeo))
    			goto out;
    
    		err = sock_intr_errno(timeo);
    		if (signal_pending(current))
    			goto out;
    	}
    
    	/* Connection was closed by RST, timeout, ICMP error
    	 * or another process disconnected us.
    	 */
    	if (sk->sk_state == TCP_CLOSE)
    		goto sock_error;
    
    	/* sk->sk_err may be not zero now, if RECVERR was ordered by user
    	 * and error was received after socket entered established state.
    	 * Hence, it is handled normally after connect() return successfully.
    	 */
    
    	sock->state = SS_CONNECTED;
    	err = 0;
    out:
    	release_sock(sk);
    	return err;
    
    sock_error:
    	err = sock_error(sk) ? : -ECONNABORTED;
    	sock->state = SS_UNCONNECTED;
    	if (sk->sk_prot->disconnect(sk, flags))
    		sock->state = SS_DISCONNECTING;
    	goto out;
    }
    
    调用传输层接口,连接须要三层握手,connect接口仅仅是完毕发送SYN段过程,兴许两次握手由协议栈完毕。

    SYN段发送成功后,兴许仅仅需等待第三次握手结束。

    主动打开

    第一次握手:发送SYN段

    初始化client传输控制块并发送SYN段,通过tcp_v4_connect()完毕

    /* This will initiate an outgoing connection. */
    int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    {
    	struct inet_sock *inet = inet_sk(sk);
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
    	struct rtable *rt;
    	__be32 daddr, nexthop;
    	int tmp;
    	int err;
    
    	if (addr_len < sizeof(struct sockaddr_in))
    		return -EINVAL;
    
    	if (usin->sin_family != AF_INET)
    		return -EAFNOSUPPORT;
    
    	nexthop = daddr = usin->sin_addr.s_addr;
    	if (inet->opt && inet->opt->srr) {
    		if (!daddr)
    			return -EINVAL;
    		nexthop = inet->opt->faddr;
    	}
    
    	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
    			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
    			       IPPROTO_TCP,
    			       inet->sport, usin->sin_port, sk, 1);
    	if (tmp < 0) {
    		if (tmp == -ENETUNREACH)
    			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
    		return tmp;
    	}
    
    	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
    		ip_rt_put(rt);
    		return -ENETUNREACH;
    	}
    
    	if (!inet->opt || !inet->opt->srr)
    		daddr = rt->rt_dst;
    
    	if (!inet->saddr)
    		inet->saddr = rt->rt_src;
    	inet->rcv_saddr = inet->saddr;
    
    	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
    		/* Reset inherited state */
    		tp->rx_opt.ts_recent	   = 0;
    		tp->rx_opt.ts_recent_stamp = 0;
    		tp->write_seq		   = 0;
    	}
    
    	if (tcp_death_row.sysctl_tw_recycle &&
    	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
    		struct inet_peer *peer = rt_get_peer(rt);
    		/*
    		 * VJ's idea. We save last timestamp seen from
    		 * the destination in peer table, when entering state
    		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
    		 * when trying new connection.
    		 */
    		if (peer != NULL &&
    		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
    			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
    			tp->rx_opt.ts_recent = peer->tcp_ts;
    		}
    	}
    
    	inet->dport = usin->sin_port;
    	inet->daddr = daddr;
    
    	inet_csk(sk)->icsk_ext_hdr_len = 0;
    	if (inet->opt)
    		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
    
    	tp->rx_opt.mss_clamp = 536;
    
    	/* Socket identity is still unknown (sport may be zero).
    	 * However we set state to SYN-SENT and not releasing socket
    	 * lock select source port, enter ourselves into the hash tables and
    	 * complete initialization after this.
    	 */
    	tcp_set_state(sk, TCP_SYN_SENT);
    	err = inet_hash_connect(&tcp_death_row, sk);
    	if (err)
    		goto failure;
    
    	err = ip_route_newports(&rt, IPPROTO_TCP,
    				inet->sport, inet->dport, sk);
    	if (err)
    		goto failure;
    
    	/* OK, now commit destination to socket.  */
    	sk->sk_gso_type = SKB_GSO_TCPV4;
    	sk_setup_caps(sk, &rt->u.dst);
    
    	if (!tp->write_seq)
    		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
    							   inet->daddr,
    							   inet->sport,
    							   usin->sin_port);
    
    	inet->id = tp->write_seq ^ jiffies;
    
    	err = tcp_connect(sk);
    	rt = NULL;
    	if (err)
    		goto failure;
    
    	return 0;
    
    failure:
    	/*
    	 * This unhashes the socket and releases the local port,
    	 * if necessary.
    	 */
    	tcp_set_state(sk, TCP_CLOSE);
    	ip_rt_put(rt);
    	sk->sk_route_caps = 0;
    	inet->dport = 0;
    	return err;
    }
    第二次握手:接收SYN+ACK段

    处于SYN_SENT状态的传输控制块,通过tcp_rcv_state_process()来处理。

    /*
     *	This function implements the receiving procedure of RFC 793 for
     *	all states except ESTABLISHED and TIME_WAIT.
     *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
     *	address independent.
     */
    
    int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
    			  struct tcphdr *th, unsigned len)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct inet_connection_sock *icsk = inet_csk(sk);
    	int queued = 0;
    	int res;
    
    	tp->rx_opt.saw_tstamp = 0;
    
    	switch (sk->sk_state) {
    	case TCP_CLOSE:
    		goto discard;
    
    	case TCP_LISTEN:
    		if (th->ack)
    			return 1;
    
    		if (th->rst)
    			goto discard;
    
    		if (th->syn) {
    			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
    				return 1;
    
    			/* Now we have several options: In theory there is
    			 * nothing else in the frame. KA9Q has an option to
    			 * send data with the syn, BSD accepts data with the
    			 * syn up to the [to be] advertised window and
    			 * Solaris 2.1 gives you a protocol error. For now
    			 * we just ignore it, that fits the spec precisely
    			 * and avoids incompatibilities. It would be nice in
    			 * future to drop through and process the data.
    			 *
    			 * Now that TTCP is starting to be used we ought to
    			 * queue this data.
    			 * But, this leaves one open to an easy denial of
    			 * service attack, and SYN cookies can't defend
    			 * against this problem. So, we drop the data
    			 * in the interest of security over speed unless
    			 * it's still in use.
    			 */
    			kfree_skb(skb);
    			return 0;
    		}
    		goto discard;
    
    	case TCP_SYN_SENT:
    		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
    		if (queued >= 0)
    			return queued;
    
    		/* Do step6 onward by hand. */
    		tcp_urg(sk, skb, th);
    		__kfree_skb(skb);
    		tcp_data_snd_check(sk);
    		return 0;
    	}
    
    	res = tcp_validate_incoming(sk, skb, th, 0);
    	if (res <= 0)
    		return -res;
    
    	/* step 5: check the ACK field */
    	if (th->ack) {
    		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
    
    		switch (sk->sk_state) {
    		case TCP_SYN_RECV:
    			if (acceptable) {
    				tp->copied_seq = tp->rcv_nxt;
    				smp_mb();
    				tcp_set_state(sk, TCP_ESTABLISHED);
    				sk->sk_state_change(sk);
    
    				/* Note, that this wakeup is only for marginal
    				 * crossed SYN case. Passively open sockets
    				 * are not waked up, because sk->sk_sleep ==
    				 * NULL and sk->sk_socket == NULL.
    				 */
    				if (sk->sk_socket)
    					sk_wake_async(sk,
    						      SOCK_WAKE_IO, POLL_OUT);
    
    				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
    				tp->snd_wnd = ntohs(th->window) <<
    					      tp->rx_opt.snd_wscale;
    				tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
    
    				/* tcp_ack considers this ACK as duplicate
    				 * and does not calculate rtt.
    				 * Force it here.
    				 */
    				tcp_ack_update_rtt(sk, 0, 0);
    
    				if (tp->rx_opt.tstamp_ok)
    					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
    
    				/* Make sure socket is routed, for
    				 * correct metrics.
    				 */
    				icsk->icsk_af_ops->rebuild_header(sk);
    
    				tcp_init_metrics(sk);
    
    				tcp_init_congestion_control(sk);
    
    				/* Prevent spurious tcp_cwnd_restart() on
    				 * first data packet.
    				 */
    				tp->lsndtime = tcp_time_stamp;
    
    				tcp_mtup_init(sk);
    				tcp_initialize_rcv_mss(sk);
    				tcp_init_buffer_space(sk);
    				tcp_fast_path_on(tp);
    			} else {
    				return 1;
    			}
    			break;
    
    		case TCP_FIN_WAIT1:
    			if (tp->snd_una == tp->write_seq) {
    				tcp_set_state(sk, TCP_FIN_WAIT2);
    				sk->sk_shutdown |= SEND_SHUTDOWN;
    				dst_confirm(sk->sk_dst_cache);
    
    				if (!sock_flag(sk, SOCK_DEAD))
    					/* Wake up lingering close() */
    					sk->sk_state_change(sk);
    				else {
    					int tmo;
    
    					if (tp->linger2 < 0 ||
    					    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
    					     after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
    						tcp_done(sk);
    						NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
    						return 1;
    					}
    
    					tmo = tcp_fin_time(sk);
    					if (tmo > TCP_TIMEWAIT_LEN) {
    						inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
    					} else if (th->fin || sock_owned_by_user(sk)) {
    						/* Bad case. We could lose such FIN otherwise.
    						 * It is not a big problem, but it looks confusing
    						 * and not so rare event. We still can lose it now,
    						 * if it spins in bh_lock_sock(), but it is really
    						 * marginal case.
    						 */
    						inet_csk_reset_keepalive_timer(sk, tmo);
    					} else {
    						tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
    						goto discard;
    					}
    				}
    			}
    			break;
    
    		case TCP_CLOSING:
    			if (tp->snd_una == tp->write_seq) {
    				tcp_time_wait(sk, TCP_TIME_WAIT, 0);
    				goto discard;
    			}
    			break;
    
    		case TCP_LAST_ACK:
    			if (tp->snd_una == tp->write_seq) {
    				tcp_update_metrics(sk);
    				tcp_done(sk);
    				goto discard;
    			}
    			break;
    		}
    	} else
    		goto discard;
    
    	/* step 6: check the URG bit */
    	tcp_urg(sk, skb, th);
    
    	/* step 7: process the segment text */
    	switch (sk->sk_state) {
    	case TCP_CLOSE_WAIT:
    	case TCP_CLOSING:
    	case TCP_LAST_ACK:
    		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
    			break;
    	case TCP_FIN_WAIT1:
    	case TCP_FIN_WAIT2:
    		/* RFC 793 says to queue data in these states,
    		 * RFC 1122 says we MUST send a reset.
    		 * BSD 4.4 also does reset.
    		 */
    		if (sk->sk_shutdown & RCV_SHUTDOWN) {
    			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
    			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
    				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
    				tcp_reset(sk);
    				return 1;
    			}
    		}
    		/* Fall through */
    	case TCP_ESTABLISHED:
    		tcp_data_queue(sk, skb);
    		queued = 1;
    		break;
    	}
    
    	/* tcp_data could move socket to TIME-WAIT */
    	if (sk->sk_state != TCP_CLOSE) {
    		tcp_data_snd_check(sk);
    		tcp_ack_snd_check(sk);
    	}
    
    	if (!queued) {
    discard:
    		__kfree_skb(skb);
    	}
    	return 0;
    }
    第三次握手:发送ACK段

    tcp_send_ack()用来发送一个ACK段,同一时候更新窗体

    /* This routine sends an ack and also updates the window. */
    void tcp_send_ack(struct sock *sk)
    {
    	struct sk_buff *buff;
    
    	/* If we have been reset, we may not send again. */
    	if (sk->sk_state == TCP_CLOSE)
    		return;
    
    	/* We are not putting this on the write queue, so
    	 * tcp_transmit_skb() will set the ownership to this
    	 * sock.
    	 */
    	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
    	if (buff == NULL) {
    		inet_csk_schedule_ack(sk);
    		inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
    		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
    					  TCP_DELACK_MAX, TCP_RTO_MAX);
    		return;
    	}
    
    	/* Reserve space for headers and prepare control bits. */
    	skb_reserve(buff, MAX_TCP_HEADER);
    	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
    
    	/* Send it off, this clears delayed acks for us. */
    	TCP_SKB_CB(buff)->when = tcp_time_stamp;
    	tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
    }
    发送ACK段时,TCP必须不在CLOSE状态。

    为ACK段分配一个SKB,假设分配失败则在启动延时定时器后返回。

  • 相关阅读:
    1.窗体与界面设计-其他技术
    1.窗体与界面设计-窗体控制技术
    1.窗体与界面设计-设置窗体大小
    1.窗体与界面设计-设置窗体位置
    1.窗体与界面设计-标题栏窗体
    1.窗体与界面设计-窗体动画
    1.窗体与界面设计-窗体效果
    远程连接mysql数据库
    linux压缩、解压缩命令
    CSV文件导入Mysql出现的乱码等问题 (转载)
  • 原文地址:https://www.cnblogs.com/mfrbuaa/p/5126446.html
Copyright © 2011-2022 走看看