上一篇文章疑惑地方是:udp bind 同一个port 是否可行?以及如何查找sk 管理skbbuff
此时疑惑的是:udp没有bind 一个ip:port 去connect 会发生什么,connect 后发出send 调用呢?此过程中涉及到的ip port 如何填充呢? 如果socket没有connect也没有bind,那sendto/send会发生什么?
UDP的connect系统调用
udp虽然不是基于链接的协议,但是内核也提供了connect 系统调用,通过connect传输控制块记录的目的地址和目的端口,并且根据这个目的ip-port 选择目的路由,怎样发送的时候就可以通过write send发送,而不需要使用sendto,同时接收数据的时候也不必调用recvfrom获取数据报文发送者,而可以用read recv recvmsg。
udp的connect核心函数为:
/* * Automatically bind an unbound socket. */ static int inet_autobind(struct sock *sk) { struct inet_sock *inet; /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); if (!inet->inet_num) { if (sk->sk_prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } inet->inet_sport = htons(inet->inet_num); } release_sock(sk); return 0; } int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); }
可以看到:udp connect的时候如果没有bind 一个port , 就回自动bind 一个port
最后会调用一个协议相关的connect :ip4_datagram_connect
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { int res; lock_sock(sk); res = __ip4_datagram_connect(sk, uaddr, addr_len); release_sock(sk); return res; }
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct flowi4 *fl4; struct rtable *rt; __be32 saddr; int oif; int err; if (addr_len < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; sk_dst_reset(sk);//复位路由高速缓冲区的入口地址 oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; //如果建立连接的目的地址是组播地址,如果源ip 以及出口接口没有设置则重新设置 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { if (!oif) oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } fl4 = &inet->cork.fl.u.ip4; //查找出口路由 rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, inet->inet_sport, usin->sin_port, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); goto out; } if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { ip_rt_put(rt); err = -EACCES; goto out; } if (!inet->inet_saddr)// 如果 socket 没有bind ip 则设置路由出口的src ip inet->inet_saddr = fl4->saddr; /* Update source address */ if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = fl4->saddr; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ////更新目的地址和目的端口,源端口已经给定了 inet->inet_daddr = fl4->daddr;//可能是组播原因 inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); err = 0; out: return err; }
从上述代码可一看到,udp socket 即使没有bind ip port,在connect的时候根据路由结果,会去主动填充src ip 以及src port 同时同时要注意组播的处理方式;
UDP 套接字的send调用
由于对同一个socket 可以连续发起send调用发送到不同目的ip port的报文,所以除非是connect后的socket,每次send 应该是需要从新路由获取出口信息以及源ip port 信息
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { /*套接字的网络层表示 ------ 转换成INET套接字的表示*/ struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); struct flowi4 fl4_stack; struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; int connected = 0; __be32 daddr, faddr, saddr; __be16 dport; u8 tos; int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; if (len > 0xFFFF) return -EMSGSIZE; /* * Check the flags. */ if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; ipc.opt = NULL; ipc.tx_flags = 0; ipc.ttl = 0; ipc.tos = -1; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; fl4 = &inet->cork.fl.u.ip4; if (up->pending) { /* * There are pending frames. * The socket lock must be held while it's corked. */ /*当前的sock有等待发送的数据,直接将数据追加?? 注意 :应用层发送数据时 自家来加锁控制乱序 , 同时接收端也应该自己控制乱序*/ lock_sock(sk); if (likely(up->pending)) { if (unlikely(up->pending != AF_INET)) { release_sock(sk); return -EINVAL; } goto do_append_data; } release_sock(sk); } ulen += sizeof(struct udphdr); /* * Get and verify the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { if (usin->sin_family != AF_UNSPEC) return -EAFNOSUPPORT; } // 获取目的ip地址和端口 daddr = usin->sin_addr.s_addr; dport = usin->sin_port; if (dport == 0) return -EINVAL; } else {// 如果参数中没有目的ip port 则 检测是否为 connect过的socket if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = inet->inet_daddr; dport = inet->inet_dport; /* Open fast path for connected socket. Route will not be used, if at least one option is set. */ connected = 1; } /*获取存储在 socket 上的源地址、发送网络设备索引(device index)和时间戳选项*/ ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) {/*msg中控制信息处理*/ err = ip_cmsg_send(sock_net(sk), msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; connected = 0; } if (!ipc.opt) {/*如果发送的数据中没有IP选项控制信息,则从正在使用的socket中获取IP选项信息*/ struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } saddr = ipc.addr; ipc.addr = faddr = daddr; if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; // 如果存在严格ip 选项, 则不能根据目的ip 来选路, 需要根据opt 中的下一站ip 地址来选路, faddr = ipc.opt->opt.faddr; connected = 0;/*重新进行本地路由*/ } tos = get_rttos(&ipc, inet); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->opt.is_strictroute)) { tos |= RTO_ONLINK; /*如果设置了 dontroute 或者ip 中存在严格源选路 则说明目的地址或者下一条必定位于本地子网*/ connected = 0;/*只进行本地路由*/ } if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; connected = 0;/*只进行本地路由*/ } else if (!ipc.oif) ipc.oif = inet->uc_index; if (connected)/*目标路由检查*/ rt = (struct rtable *)sk_dst_check(sk, 0); if (!rt) {//如果没有路由,建立一次路由 struct net *net = sock_net(sk); __u8 flow_flags = inet_sk_flowi_flags(sk); fl4 = &fl4_stack; flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, fl4); if (err < 0) goto out; } security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } err = -EACCES; if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) goto out; if (connected) sk_dst_set(sk, dst_clone(&rt->dst)); } if (msg->msg_flags&MSG_CONFIRM) goto do_confirm;// 应用层知道网关有效可达, 跳转到do_confirm 确认路由缓存 back_from_confirm: saddr = fl4->saddr; if (!ipc.addr) daddr = ipc.addr = fl4->daddr; /* Lockless fast path for the non-corking case. */ if (!corkreq) { skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, msg->msg_flags); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_send_skb(skb, fl4); goto out; } lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ /* ... which is an evident application bug. --ANK */ release_sock(sk); net_dbg_ratelimited("cork app bug 2\n"); err = -EINVAL; goto out; } /* * Now cork the socket to pend data. 更新缓存 目的地址 目的端口 源ip port 信息 设置pending标志,表示正在处理数据 */ fl4 = &inet->cork.fl.u.ip4; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; /* 1、首先获取到目标地址和端口,原地址和端口,目的端口不能为0, 目标地址为广播地址或者开头为224操作不同。 2、.虽然是面向无连接的,但是需要起始点到目的地之间的链路,发送数据必须有一条路由缓存。 如果目的地址改变,则重新建立一条路由并获取缓存 3、发包中没有包含校验和的操作,只含有数据拼接和发送数据前的准备工作 */ do_append_data: up->len += ulen;/*udp组ip包*/ err = ip_append_data(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk); else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; release_sock(sk); out: ip_rt_put(rt); if (free) kfree(ipc.opt); if (!err) return len; /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise * we don't have a good statistic (IpOutDiscards but it can be too many * things). We could add another new stat but at least for now that * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); } return err; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(&rt->dst, &fl4->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; goto out; }
问题:
- 多线程同时向一个socket send pkt, 如果线程1 lock_sock --->sockfd 线程2 被阻塞!!
- 如果线程1 send 后release sockfd, 但是sock 出于pending状态, 线程2 发送则直接add data 到socket ,那就是直接使用socket的inet 信息发送数据,没有更新路由数据了!
- udp 发送socket 的时候会根据目的ip port 信息来路由,对于源IP port 必须bind 一个吗? 目前看没必要!因为ip_route_output_flow 会根据目的ip地址查找路由,如果saddr为空则会将路由信息填充进去
#include <sys/types.h> #include <sys/socket.h> #include<pthread.h> #include <netinet/in.h> #include <stdio.h> #include <string.h> #include <unistd.h> #include <stdlib.h> int main(int argc, char **argv) { char buff[512]; printf("This is a UDP client\n"); struct sockaddr_in addr; int sock; if ( (sock=socket(AF_INET, SOCK_DGRAM, 0)) <0) { perror("socket"); exit(1); } addr.sin_family = AF_INET; addr.sin_port = htons(3702); addr.sin_addr.s_addr = inet_addr("10.10.10.10"); if (addr.sin_addr.s_addr == INADDR_NONE) { printf("Incorrect ip address!"); close(sock); exit(1); } while (1) { /* 接受用户输入 */ bzero(buff, 512); if (fgets(buff, 511, stdin) == (char *) EOF)//从键盘输入内容 exit(0); if( sendto(sock, buff, strlen(buff), 0, (struct sockaddr *)&addr, sizeof(addr)) < 0){ perror("sendto"); close(sock); break; }else{ printf("clinet send success!\n"); } } return 0; }
#include <sys/types.h> #include <sys/socket.h> #include<pthread.h> #include <netinet/in.h> #include <stdio.h> #include <string.h> #include <unistd.h> #include <stdlib.h> int main(int argc, char **argv) { printf("Welcome! This is a UDP server, I can only received message from client and reply with same message\n"); struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_port = htons(3702); addr.sin_addr.s_addr = htonl(INADDR_ANY); int sock; if ( (sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { perror("socket"); exit(1); } if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { perror("bind"); exit(1); } char buff[512]; struct sockaddr_in clientAddr; int n; int len = sizeof(clientAddr); while (1) { n = recvfrom(sock, buff, 511, 0, (struct sockaddr*)&clientAddr, &len); if (n>0) { buff[n] = 0; printf("%s %u says: %s\n", inet_ntoa(clientAddr.sin_addr), ntohs(clientAddr.sin_port), buff); n = sendto(sock, buff, n, 0, (struct sockaddr *)&clientAddr, sizeof(clientAddr)); if (n < 0) { perror("sendto"); break; } } else { perror("recv"); break; } } return 0; }
上述有client 和server 例子 直接测试使用
关于详细ip_append_data /udp_push_pending_frames 发送数据,后续再看