zoukankan      html  css  js  c++  java
  • 套接字之相关系统调用的调用流程

     最近一直在读内核网络协议栈源码,这里以ipv4/tcp为例对socket相关系统调用的流程做一个简要整理,这些相关系统调用的内部细节虽然各有不同,但其调用流程则基本一致;

    调用流程:

    (1)系统调用 –> (2)查找socket –> (3)执行socket的对应操作函数  –> (4)执行传输层协议的对应操作函数;

    中间核心数据结构为inetws_array[],位于af_inet.c,以第一个元素type=SOCK_STREAM,protocol=IPPROTO_TCP为例,该类型适用与tcp协议,当创建tcp socket时,其操作socket->ops赋值为&inet_stream_ops,对应的传输控制块操作sock->sk_prot赋值为&tcp_prot;

     1 /* Upon startup we insert all the elements in inetsw_array[] into
     2  * the linked list inetsw.
     3  */
     4 static struct inet_protosw inetsw_array[] =
     5 {
     6     {
     7         .type =       SOCK_STREAM,
     8         .protocol =   IPPROTO_TCP,
     9         .prot =       &tcp_prot,
    10         .ops =        &inet_stream_ops,
    11         .flags =      INET_PROTOSW_PERMANENT |
    12                   INET_PROTOSW_ICSK,
    13     },
    14 
    15     {
    16         .type =       SOCK_DGRAM,
    17         .protocol =   IPPROTO_UDP,
    18         .prot =       &udp_prot,
    19         .ops =        &inet_dgram_ops,
    20         .flags =      INET_PROTOSW_PERMANENT,
    21        },
    22 
    23        {
    24         .type =       SOCK_DGRAM,
    25         .protocol =   IPPROTO_ICMP,
    26         .prot =       &ping_prot,
    27         .ops =        &inet_sockraw_ops,
    28         .flags =      INET_PROTOSW_REUSE,
    29        },
    30 
    31        {
    32            .type =       SOCK_RAW,
    33            .protocol =   IPPROTO_IP,    /* wild card */
    34            .prot =       &raw_prot,
    35            .ops =        &inet_sockraw_ops,
    36            .flags =      INET_PROTOSW_REUSE,
    37        }
    38 };

    查看inet_stream_ops结构会发现,其中包含了各种socket系统调用的对应的处理函数;

     1 const struct proto_ops inet_stream_ops = {
     2     .family           = PF_INET,
     3     .owner           = THIS_MODULE,
     4     .release       = inet_release,
     5     .bind           = inet_bind,
     6     .connect       = inet_stream_connect,
     7     .socketpair       = sock_no_socketpair,
     8     .accept           = inet_accept,
     9     .getname       = inet_getname,
    10     .poll           = tcp_poll,
    11     .ioctl           = inet_ioctl,
    12     .listen           = inet_listen,
    13     .shutdown       = inet_shutdown,
    14     .setsockopt       = sock_common_setsockopt,
    15     .getsockopt       = sock_common_getsockopt,
    16     .sendmsg       = inet_sendmsg,
    17     .recvmsg       = inet_recvmsg,
    18     .mmap           = sock_no_mmap,
    19     .sendpage       = inet_sendpage,
    20     .splice_read       = tcp_splice_read,
    21     .read_sock       = tcp_read_sock,
    22     .peek_len       = tcp_peek_len,
    23 #ifdef CONFIG_COMPAT
    24     .compat_setsockopt = compat_sock_common_setsockopt,
    25     .compat_getsockopt = compat_sock_common_getsockopt,
    26     .compat_ioctl       = inet_compat_ioctl,
    27 #endif
    28 };

    具体实例,以tcp bind系统调用为例:

     1 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
     2 {
     3     struct socket *sock;
     4     struct sockaddr_storage address;
     5     int err, fput_needed;
     6 
     7     /* 获取socket ,fput_need标识是否需要减少文件引用计数*/
     8     sock = sockfd_lookup_light(fd, &err, &fput_needed);
     9     if (sock) {
    10         /* 将用户空间地址复制到内核空间 */
    11         err = move_addr_to_kernel(umyaddr, addrlen, &address);
    12         if (err >= 0) {
    13             /* 安全模块的bind检查 */
    14             err = security_socket_bind(sock,
    15                            (struct sockaddr *)&address,
    16                            addrlen);
    17             if (!err)
    18                 /* 调用socket的bind操作 */
    19                 err = sock->ops->bind(sock,
    20                               (struct sockaddr *)
    21                               &address, addrlen);
    22         }
    23 
    24         /* 根据fput_needed决定是否减少引用计数 */
    25         fput_light(sock->file, fput_needed);
    26     }
    27     return err;
    28 }

    上面的sock->ops->bind操作实际是调用了inet_stream_ops.bind

     1 /* 地址绑定 */
     2 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
     3 {
     4     /*  省略无关代码 */
     5     /* If the socket has its own bind function then use it. (RAW) */
     6     /* 
     7         如果传输控制块有自己的bind操作则调用,
     8         目前只有raw实现了自己的bind 
     9     */
    10     if (sk->sk_prot->bind) {
    11         err = sk->sk_prot->bind(sk, uaddr, addr_len);
    12         goto out;
    13     }
    14     
    15     /* 省略无关代码 */
    16 
    17     /* 
    18         端口不为0,或者端口为0允许绑定 
    19         则使用协议的具体获取端口函数绑定端口
    20     */
    21     if ((snum || !inet->bind_address_no_port) &&
    22         sk->sk_prot->get_port(sk, snum)) {
    23 
    24         /* 绑定失败 */
    25         inet->inet_saddr = inet->inet_rcv_saddr = 0;
    26 
    27         /* 端口在使用中 */
    28         err = -EADDRINUSE;
    29         goto out_release_sock;
    30     }
    31 
    32    /* 省略无关代码 */
    33 out_release_sock:
    34     release_sock(sk);
    35 out:
    36     return err;
    37 }

    上面的sk->sk_prot->bind以及sk->sk_prot->get_port为具体传输层实现的对应操作函数,其中只有raw socket实现了bind操作,我们不关注,而以tcp的get_port操作为例,实际上也就是调用了tcp_prot.get_port,具体tcp实现为inet_csk_get_port;(该函数尚未分析,后续补充)

     1 /* Obtain a reference to a local port for the given sock,
     2  * if snum is zero it means select any available local port.
     3  * We try to allocate an odd port (and leave even ports for connect())
     4  */
     5 int inet_csk_get_port(struct sock *sk, unsigned short snum)
     6 {
     7     bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
     8     struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
     9     int ret = 1, port = snum;
    10     struct inet_bind_hashbucket *head;
    11     struct net *net = sock_net(sk);
    12     struct inet_bind_bucket *tb = NULL;
    13     kuid_t uid = sock_i_uid(sk);
    14 
    15     if (!port) {
    16         head = inet_csk_find_open_port(sk, &tb, &port);
    17         if (!head)
    18             return ret;
    19         if (!tb)
    20             goto tb_not_found;
    21         goto success;
    22     }
    23     head = &hinfo->bhash[inet_bhashfn(net, port,
    24                       hinfo->bhash_size)];
    25     spin_lock_bh(&head->lock);
    26     inet_bind_bucket_for_each(tb, &head->chain)
    27         if (net_eq(ib_net(tb), net) && tb->port == port)
    28             goto tb_found;
    29 tb_not_found:
    30     tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
    31                      net, head, port);
    32     if (!tb)
    33         goto fail_unlock;
    34 tb_found:
    35     if (!hlist_empty(&tb->owners)) {
    36         if (sk->sk_reuse == SK_FORCE_REUSE)
    37             goto success;
    38 
    39         if ((tb->fastreuse > 0 && reuse) ||
    40             sk_reuseport_match(tb, sk))
    41             goto success;
    42         if (inet_csk_bind_conflict(sk, tb, true, true))
    43             goto fail_unlock;
    44     }
    45 success:
    46     if (!hlist_empty(&tb->owners)) {
    47         tb->fastreuse = reuse;
    48         if (sk->sk_reuseport) {
    49             tb->fastreuseport = FASTREUSEPORT_ANY;
    50             tb->fastuid = uid;
    51             tb->fast_rcv_saddr = sk->sk_rcv_saddr;
    52             tb->fast_ipv6_only = ipv6_only_sock(sk);
    53 #if IS_ENABLED(CONFIG_IPV6)
    54             tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
    55 #endif
    56         } else {
    57             tb->fastreuseport = 0;
    58         }
    59     } else {
    60         if (!reuse)
    61             tb->fastreuse = 0;
    62         if (sk->sk_reuseport) {
    63             /* We didn't match or we don't have fastreuseport set on
    64              * the tb, but we have sk_reuseport set on this socket
    65              * and we know that there are no bind conflicts with
    66              * this socket in this tb, so reset our tb's reuseport
    67              * settings so that any subsequent sockets that match
    68              * our current socket will be put on the fast path.
    69              *
    70              * If we reset we need to set FASTREUSEPORT_STRICT so we
    71              * do extra checking for all subsequent sk_reuseport
    72              * socks.
    73              */
    74             if (!sk_reuseport_match(tb, sk)) {
    75                 tb->fastreuseport = FASTREUSEPORT_STRICT;
    76                 tb->fastuid = uid;
    77                 tb->fast_rcv_saddr = sk->sk_rcv_saddr;
    78                 tb->fast_ipv6_only = ipv6_only_sock(sk);
    79 #if IS_ENABLED(CONFIG_IPV6)
    80                 tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
    81 #endif
    82             }
    83         } else {
    84             tb->fastreuseport = 0;
    85         }
    86     }
    87     if (!inet_csk(sk)->icsk_bind_hash)
    88         inet_bind_hash(sk, tb, port);
    89     WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
    90     ret = 0;
    91 
    92 fail_unlock:
    93     spin_unlock_bh(&head->lock);
    94     return ret;
    95 }
  • 相关阅读:
    hdu 1849 Rabbit and Grass(nim)
    sg函数模板
    hdu 1848 Fibonacci again and again(sg)
    hdu 1847 Good Luck in CET-4 Everybody!(sg)
    hdu 1846 Brave Game(bash)
    hdu 1517 A Multiplication Game(必胜态,必败态)
    hdu 1536/ hdu 1944 S-Nim(sg函数)
    hdu 2509 Be the Winner(anti nim)
    hdu 1907 John(anti nim)
    zoj 3965 Binary Tree Restoring(搜索)
  • 原文地址:https://www.cnblogs.com/wanpengcoder/p/7623101.html
Copyright © 2011-2022 走看看