zoukankan      html  css  js  c++  java
  • Linux内核Socket实现之------Socket创建(2) 文件描述符

    转载请注明:http://blog.chinaunix.net/uid-20788636-id-4408276.html

    1.2 sock_map_fd函数

             在用户空间创建了一个socket后,返回值是一个文件描述符,下面分析一下创建socket时怎么和文件描述符联系的。在SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)最后调用sock_map_fd进行关联,其中返回的retval就是用户空间获取的文件描述符fd,sock就是调用sock_create创建成功的socket.

             sock_map_fd()主要用于对socket的*file指针初始化,经过sock_map_fd()操作后,socket就通过其*file指针与VFS管理的文件进行了关联,便可以进行文件的各种操作,如read、write、lseek、ioctl等. 

    retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));

    static int sock_map_fd(struct socket *sock, int flags)

    {

             struct file *newfile;

             int fd = get_unused_fd_flags(flags);//根据flags获取没有使用的fd,具体分析见1.2.1

             if (unlikely(fd < 0))

                       return fd;

             newfile = sock_alloc_file(sock, flags, NULL);

             if (likely(!IS_ERR(newfile))) {

                       fd_install(fd, newfile);

                       return fd;

             }

             put_unused_fd(fd);

             return PTR_ERR(newfile);

    }

    1.2.1   get_unused_fd_flags函数

             get_unused_fd_flags()函数调用__alloc_fd分配一个新的可用的fd

    int __alloc_fd(struct files_struct *files,

                    unsigned start, unsigned end, unsigned flags)

    {

             unsigned int fd;

             int error;

             struct fdtable *fdt;

             spin_lock(&files->file_lock);

    repeat:

    /*得到本进程的文件描述符表*/

             fdt = files_fdtable(files);

             fd = start;//从start开始,这里的start为0

    /* files->next_fd为上一次查找确定的下一个可用空闲的文件描述符,这样可以提高获取的效率,如果fd小于files->next_fd的话就可以直接使用next_fd */

             if (fd < files->next_fd)

                       fd = files->next_fd;

    /*当fd小于目前进程支持的最大的描述符号,那么可以通过fds_bits位图,从fd位开始查找,找到下一个0位,即下一个空闲描述符。*/

             if (fd < fdt->max_fds)

                       fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);

             /*

              * N.B. For clone tasks sharing a files structure, this test

              * will limit the total number of files that can be opened.

              */

             error = -EMFILE;

             if (fd >= end)

                       goto out;

    /* 如需要则扩展文件描述符表 */

             error = expand_files(files, fd);

             if (error < 0)

                       goto out;

             /*

              * If we needed to expand the fs array we

              * might have blocked - try again.

              */

             if (error)

                       goto repeat;

        /* 

         设置next_fd,用于下次加速查找空闲的fd。

         当start大于next_fd时,不会设置next_fd以避免文件描述符的不连续

         */

             if (start <= files->next_fd)

                       files->next_fd = fd + 1;

      /* 将fd添加到已打开的文件描述符表中 */

             __set_open_fd(fd, fdt);

             if (flags & O_CLOEXEC)

                       __set_close_on_exec(fd, fdt);

             else

                       __clear_close_on_exec(fd, fdt);

             error = fd;

    #if 1

             /* Sanity check */

             if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {

                       printk(KERN_WARNING "alloc_fd: slot %d not NULL! ", fd);

                       rcu_assign_pointer(fdt->fd[fd], NULL);

             }

    #endif

    out:

             spin_unlock(&files->file_lock);

             return error;

    }

    1.2.2 sock_alloc_file函数

    struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)

    {

             struct qstr name = { .name = "" };

             struct path path;

             struct file *file;

             if (dname) {//这里的dname为空

                       name.name = dname;

                       name.len = strlen(name.name);

             } else if (sock->sk) {

     /*这里的name应该是TCP 根据struct proto tcp_prot */

                       name.name = sock->sk->sk_prot_creator->name;

                       name.len = strlen(name.name);

             }

    /*申请一个新的dentry,其中sock_mnt->mnt_sb在前面已经分析过了,是一个sock_fs_type文件系统挂载点,*/

             path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);

             if (unlikely(!path.dentry))

                       return ERR_PTR(-ENOMEM);

             path.mnt = mntget(sock_mnt);

    /*将文件操作的函数绑定到inode,对于dentry是在sockfs_mount函数中sockfs_dentry_operations,该函数在sock_init是调用,在前面有分析 */

             d_instantiate(path.dentry, SOCK_INODE(sock));

             SOCK_INODE(sock)->i_fop = &socket_file_ops;

    /*申请新的file,将path,file,关联起来*/

             file = alloc_file(&path, FMODE_READ | FMODE_WRITE,

                         &socket_file_ops);

             if (unlikely(IS_ERR(file))) {

                       /* drop dentry, keep inode */

                       ihold(path.dentry->d_inode);

                       path_put(&path);

                       return file;

             }

             sock->file = file;//sock->file和刚分配的file关联起来

             file->f_flags = O_RDWR | (flags & O_NONBLOCK);//设置file的标志

             file->private_data = sock;//file的私有数据指针指向sock.

             return file;

    }

    Socket创建流程图

    附录:对于sk_alloc分配的内存大小问题分析

             在分析中经常看到此种类型的强制转换inet = inet_sk(sk);,其中inet被定义为struct inet_sock *inet;结构体,我们看结构体的定义sock结构体的大小小于struct inet_sock,这样是无法进行强制类型转换的,但在实际分配的过程中sock分配的大小为tcp_sock的大小,而该结构足够大。

    struct sock *sk_alloc(struct net *net, int family, gfp_t priority,

                             struct proto *prot)

    {

             struct sock *sk;

             sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);

             if (sk) {

                       sk->sk_family = family;

                       /*

                        * See comment in struct sock definition to understand

                        * why we need sk_prot_creator -acme

                        */

                       sk->sk_prot = sk->sk_prot_creator = prot;

                       sock_lock_init(sk);

                       sock_net_set(sk, get_net(net));

                       atomic_set(&sk->sk_wmem_alloc, 1);

                       sock_update_classid(sk);

                       sock_update_netprioidx(sk);

             }

             return sk;

    }

    static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,

                       int family)

    {

             struct sock *sk;

             struct kmem_cache *slab;

        /*这里分配内存空间时,分为两种情况,第一种情况是从高速缓存上分配,第二种是普通的分配*/

             slab = prot->slab;

             if (slab != NULL) {

                      sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);---------------------(1)

                       if (!sk)

                                return sk;

                       if (priority & __GFP_ZERO) {

                                if (prot->clear_sk)

                                         prot->clear_sk(sk, prot->obj_size);

                                else

                                         sk_prot_clear_nulls(sk, prot->obj_size);

                       }

             } else

                       sk = kmalloc(prot->obj_size, priority);---------------------------(2)

             if (sk != NULL) {

                       kmemcheck_annotate_bitfield(sk, flags);

                       if (security_sk_alloc(sk, family, priority))

                                goto out_free;

                       if (!try_module_get(prot->owner))

                                goto out_free_sec;

                       sk_tx_queue_clear(sk);

             }

             return sk;

    out_free_sec:

             security_sk_free(sk);

    out_free:

             if (slab != NULL)

                       kmem_cache_free(slab, sk);

             else

                       kfree(sk);

             return NULL;

    }

    (1)第一种情况:sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO) 这里的slap等于slab = prot->slab;也就是函数传递过来的struct proto *prot,再看一下这个结构体是怎么定义的?在inet_create函数中sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);,这里的answer_prot为answer_prot = answer->prot;在看一下answer->prot是如何来的?

             在inet_ctreate函数中通过遍历inetsw数组获取到struct inet_protosw *answer;

    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

                       err = 0;

                       /* Check the non-wild match. */

                       if (protocol == answer->protocol) {

                                if (protocol != IPPROTO_IP)

                                         break;

                       } else {

                                /* Check for the two wild cases. */

                                if (IPPROTO_IP == protocol) {

                                         protocol = answer->protocol;

                                         break;

                                }

                                if (IPPROTO_IP == answer->protocol)

                                         break;

                       }

                       err = -EPROTONOSUPPORT;

             }

             其中inetsw的定义下面类型的数组,如果是SOCK_STREAM类型的socket,这里的prot = tcp_prot

    static struct inet_protosw inetsw_array[] =

    {

             {

                       .type =       SOCK_STREAM,

                       .protocol =   IPPROTO_TCP,

                      .prot =       &tcp_prot,

                       .ops =        &inet_stream_ops,

                       .no_check =   0,

                       .flags =      INET_PROTOSW_PERMANENT |

                                      INET_PROTOSW_ICSK,

             },

             {

                       .type =       SOCK_DGRAM,

                       .protocol =   IPPROTO_UDP,

                       .prot =       &udp_prot,

                       .ops =        &inet_dgram_ops,

                       .no_check =   UDP_CSUM_DEFAULT,

                       .flags =      INET_PROTOSW_PERMANENT,

           },

           {

                       .type =       SOCK_DGRAM,

                       .protocol =   IPPROTO_ICMP,

                       .prot =       &ping_prot,

                       .ops =        &inet_dgram_ops,

                       .no_check =   UDP_CSUM_DEFAULT,

                       .flags =      INET_PROTOSW_REUSE,

           },

           {

                    .type =       SOCK_RAW,

                    .protocol =   IPPROTO_IP,       /* wild card */

                    .prot =       &raw_prot,

                    .ops =        &inet_sockraw_ops,

                    .no_check =   UDP_CSUM_DEFAULT,

                    .flags =      INET_PROTOSW_REUSE,

           }

    };

             再看一下

    struct proto tcp_prot = {

             .name                         = "TCP",

             .owner                        = THIS_MODULE,

             .close                          = tcp_close,

             .connect            = tcp_v4_connect,

             .disconnect                = tcp_disconnect,

             .accept                       = inet_csk_accept,

             .ioctl                            = tcp_ioctl,

             .init                     = tcp_v4_init_sock,

             .destroy            = tcp_v4_destroy_sock,

             .shutdown                 = tcp_shutdown,

             .setsockopt               = tcp_setsockopt,

             .getsockopt               = tcp_getsockopt,

             .recvmsg           = tcp_recvmsg,

             .sendmsg                   = tcp_sendmsg,

             .sendpage                  = tcp_sendpage,

             .backlog_rcv              = tcp_v4_do_rcv,

             .release_cb               = tcp_release_cb,

             .mtu_reduced          = tcp_v4_mtu_reduced,

             .hash                           = inet_hash,

             .unhash                      = inet_unhash,

             .get_port          = inet_csk_get_port,

             .enter_memory_pressure       = tcp_enter_memory_pressure,

             .stream_memory_free    = tcp_stream_memory_free,

             .sockets_allocated  = &tcp_sockets_allocated,

             .orphan_count                   = &tcp_orphan_count,

             .memory_allocated = &tcp_memory_allocated,

             .memory_pressure = &tcp_memory_pressure,

             .sysctl_mem             = sysctl_tcp_mem,

             .sysctl_wmem          = sysctl_tcp_wmem,

             .sysctl_rmem            = sysctl_tcp_rmem,

             .max_header            = MAX_TCP_HEADER,

             .obj_size           = sizeof(struct tcp_sock),

             .slab_flags                 = SLAB_DESTROY_BY_RCU,

             .twsk_prot                 = &tcp_timewait_sock_ops,

             .rsk_prot           = &tcp_request_sock_ops,

             .h.hashinfo                = &tcp_hashinfo,

             .no_autobind            = true,

    #ifdef CONFIG_COMPAT

             .compat_setsockopt        = compat_tcp_setsockopt,

             .compat_getsockopt        = compat_tcp_getsockopt,

    #endif

    #ifdef CONFIG_MEMCG_KMEM

             .init_cgroup               = tcp_init_cgroup,

             .destroy_cgroup                = tcp_destroy_cgroup,

             .proto_cgroup          = tcp_proto_cgroup,

    #endif

    };

             在af_inet.c文件中的inet_init函数中的

    static int __init inet_init(void)

    {

             struct inet_protosw *q;

             struct list_head *r;

             int rc = -EINVAL;

             BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));

             sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);

             if (!sysctl_local_reserved_ports)

                       goto out;

        //该函数是注册tcp_prot,在该函数中对tcp_prot->slab进行内存分配

             rc = proto_register(&tcp_prot, 1);

             if (rc)

                       goto out_free_reserved_ports;

             rc = proto_register(&udp_prot, 1);

             if (rc)

                       goto out_unregister_tcp_proto;

             rc = proto_register(&raw_prot, 1);

             if (rc)

                       goto out_unregister_udp_proto;

             rc = proto_register(&ping_prot, 1);

             if (rc)

                       goto out_unregister_raw_proto;

             /*

              *     Tell SOCKET that we are alive...

              */

             (void)sock_register(&inet_family_ops);

    #ifdef CONFIG_SYSCTL

             ip_static_sysctl_init();

    #endif

             /*

              *     Add all the base protocols.

              */

             if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)

                       pr_crit("%s: Cannot add ICMP protocol ", __func__);

             if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)

                       pr_crit("%s: Cannot add UDP protocol ", __func__);

             if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)

                       pr_crit("%s: Cannot add TCP protocol ", __func__);

    #ifdef CONFIG_IP_MULTICAST

             if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)

                       pr_crit("%s: Cannot add IGMP protocol ", __func__);

    #endif

             /* Register the socket-side information for inet_create. 对inetsw进行初始化操作*/

             for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)

                       INIT_LIST_HEAD(r);

    /*将inetsw_array 加入到对于的inetsw链表中,就可以在inet_create 函数中进行遍历*/

             for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)

                       inet_register_protosw(q);

             /*

              *     Set the ARP module up

              */

             arp_init();

             /*

              *     Set the IP module up

              */

             ip_init();

             tcp_v4_init();

             /* Setup TCP slab cache for open requests. */

             tcp_init();

             /* Setup UDP memory threshold */

             udp_init();

             /* Add UDP-Lite (RFC 3828) */

             udplite4_register();

             ping_init();

             /*

              *     Set the ICMP layer up

              */

             if (icmp_init() < 0)

                       panic("Failed to create the ICMP control socket. ");

             /*

              *     Initialise the multicast router

              */

    #if defined(CONFIG_IP_MROUTE)

             if (ip_mr_init())

                       pr_crit("%s: Cannot init ipv4 mroute ", __func__);

    #endif

             /*

              *     Initialise per-cpu ipv4 mibs

              */

             if (init_ipv4_mibs())

                       pr_crit("%s: Cannot init ipv4 mibs ", __func__);

             ipv4_proc_init();

             ipfrag_init();

             dev_add_pack(&ip_packet_type);

             rc = 0;

    out:

             return rc;

    out_unregister_raw_proto:

             proto_unregister(&raw_prot);

    out_unregister_udp_proto:

             proto_unregister(&udp_prot);

    out_unregister_tcp_proto:

             proto_unregister(&tcp_prot);

    out_free_reserved_ports:

             kfree(sysctl_local_reserved_ports);

             goto out;

    }

             在proto_register函数中,主要是关注prot->slab进行了初始化。

    int proto_register(struct proto *prot, int alloc_slab)

    {

             if (alloc_slab) {

                       prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,

                                                   SLAB_HWCACHE_ALIGN | prot->slab_flags,

                                                   NULL);// 这里的饿prot->obj_size为.obj_size               = sizeof(struct tcp_sock),

                       if (prot->slab == NULL) {

                                pr_crit("%s: Can't create sock SLAB cache! ",

                                         prot->name);

                                goto out;

                       }

    ……………………..

    }

    (2)对于第二种情况,主要prot->obj_size,就是struct proto tcp_prot 中初始化的.obj_size            = sizeof(struct tcp_sock)。sk = kmalloc(prot->obj_size, priority);---------------------------(2)

             下面是五个相关的数据结构,tcp_sock结构体占用的空间是最大的,所以在分配内存空间时,都是分配的tcp_sock的大小,这样在后面进行强制转换的过程中可以保证正确。

  • 相关阅读:
    PythonStudy——数据类型总结 Data type summary
    PythonStudy——可变与不可变 Variable and immutable
    PythonStudy——列表操作 List operatio
    PythonStudy——列表的常用操作 List of common operations
    PythonStudy——列表类型 List type
    PythonStudy——字符串扩展方法 String extension method
    PythonStudy——字符串重要方法 String important method
    AWT,Swing,RCP 开发
    JQuery插件机制
    最新知识网站
  • 原文地址:https://www.cnblogs.com/feng9exe/p/7001308.html
Copyright © 2011-2022 走看看