zoukankan      html  css  js  c++  java
  • 网络协议栈学习(二)创建 socket

      下面通过学习linux 1.2.13源码进一步理解socket通信机制。对该版本源码的学习主要参考《Linux内核网络栈源代码情景分析》(曹桂平 编著)。

      要理解socket的本质,就要理解当调用socket函数时,该函数到底创建了什么?返回了什么?

    int  socket(int family, int type, int protocol);

      socket 函数为用户层函数,该函数对应的内核函数为sock_socket(socket.c文件),源码如下:

    static int sock_socket(int family, int type, int protocol)
    {
        int i, fd;
        struct socket *sock;
        struct proto_ops *ops;
    
        /* Locate the correct protocol family. */
        for (i = 0; i < NPROTO; ++i) 
        {
            if (pops[i] == NULL) continue;
            if (pops[i]->family == family) 
                break;
        }
    
        if (i == NPROTO) 
        {
              return -EINVAL;
        }
    
        ops = pops[i];
    
    /*
     *    Check that this is a type that we know how to manipulate and
     *    the protocol makes sense here. The family can still reject the
     *    protocol later.
     */
      
        if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
            type != SOCK_SEQPACKET && type != SOCK_RAW &&
            type != SOCK_PACKET) || protocol < 0)
                return(-EINVAL);
    
    /*
     *    Allocate the socket and allow the family to set things up. if
     *    the protocol is 0, the family is instructed to select an appropriate
     *    default.
     */
    
        if (!(sock = sock_alloc())) 
        {
            printk("NET: sock_socket: no more sockets
    ");
            return(-ENOSR);    /* Was: EAGAIN, but we are out of
                       system resources! */
        }
    
        sock->type = type;
        sock->ops = ops;
        if ((i = sock->ops->create(sock, protocol)) < 0) 
        {
            sock_release(sock);
            return(i);
        }
    
        if ((fd = get_fd(SOCK_INODE(sock))) < 0) 
        {
            sock_release(sock);
            return(-EINVAL);
        }
    
        return(fd);
    }

        sock_socket 函数完成如下工作:

    (1)分配socket、sock结构,这两个结构在网络栈的不同层次表示一个套接字连接。

    (2)分配inode、file结构用于普通文件操作。

    (3)分配一个文件描述符并返回给应用程序作为以后的操作句柄。

      sock_alloc 函数用于分配一个inode节点,并返回该节点的socket指针

    struct socket *sock_alloc(void)
    {
        struct inode * inode;
        struct socket * sock;
    
        inode = get_empty_inode();
        if (!inode)
            return NULL;
    
        inode->i_mode = S_IFSOCK;
        inode->i_sock = 1;
        inode->i_uid = current->uid;
        inode->i_gid = current->gid;
    
        sock = &inode->u.socket_i;
        sock->state = SS_UNCONNECTED;
        sock->flags = 0;
        sock->ops = NULL;
        sock->data = NULL;
        sock->conn = NULL;
        sock->iconn = NULL;
        sock->next = NULL;
        sock->wait = &inode->i_wait;
        sock->inode = inode;        /* "backlink": we could use pointer arithmetic instead */
        sock->fasync_list = NULL;
        sockets_in_use++;
        return sock;
    }

     inode的定义如下

    /* include/fs.h */
    struct inode {
        dev_t        i_dev;
        unsigned long    i_ino;
        umode_t        i_mode;
        nlink_t        i_nlink;
        uid_t        i_uid;
        gid_t        i_gid;
        dev_t        i_rdev;
        off_t        i_size;
        time_t        i_atime;
        time_t        i_mtime;
        time_t        i_ctime;
        unsigned long    i_blksize;
        unsigned long    i_blocks;
        unsigned long    i_version;
        struct semaphore i_sem;
        struct inode_operations * i_op;
        struct super_block * i_sb;
        struct wait_queue * i_wait;
        struct file_lock * i_flock;
        struct vm_area_struct * i_mmap;
        struct inode * i_next, * i_prev;
        struct inode * i_hash_next, * i_hash_prev;
        struct inode * i_bound_to, * i_bound_by;
        struct inode * i_mount;
        unsigned short i_count;
        unsigned short i_wcount;
        unsigned short i_flags;
        unsigned char i_lock;
        unsigned char i_dirt;
        unsigned char i_pipe;
        unsigned char i_sock;
        unsigned char i_seek;
        unsigned char i_update;
        union {
            struct pipe_inode_info pipe_i;
            struct minix_inode_info minix_i;
            struct ext_inode_info ext_i;
            struct ext2_inode_info ext2_i;
            struct hpfs_inode_info hpfs_i;
            struct msdos_inode_info msdos_i;
            struct umsdos_inode_info umsdos_i;
            struct iso_inode_info isofs_i;
            struct nfs_inode_info nfs_i;
            struct xiafs_inode_info xiafs_i;
            struct sysv_inode_info sysv_i;
            struct socket socket_i;
            void * generic_ip;
        } u;
    };

      inode 结构是文件系统的一个结构体,该结构体中的成员变量u指明了该inode结构具体的文件类型,当inode是用于socket通信时,u的值就为socket_i。sock_alloc 的作用就是创建inode结构体,然后返回socket_i的地址。至于具体如何分配inode涉及到文件系统方面的知识,这里暂不讨论。

      当协议族为AF_INET时,ops->create 将调用inet_create(struct socket*sock, int protocol)函数。该函数将创建一个sock结构体并使得socket的data指针指向该sock结构体。

    static int inet_create(struct socket *sock, int protocol)
    {
        struct sock *sk;
        struct proto *prot;
        int err;
    
        sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
        if (sk == NULL) 
            return(-ENOBUFS);
        sk->num = 0;
        sk->reuse = 0;
        switch(sock->type) 
        {
            case SOCK_STREAM:
            case SOCK_SEQPACKET:
                if (protocol && protocol != IPPROTO_TCP) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPROTONOSUPPORT);
                }
                protocol = IPPROTO_TCP;
                sk->no_check = TCP_NO_CHECK;
                prot = &tcp_prot;
                break;
    
            case SOCK_DGRAM:
                if (protocol && protocol != IPPROTO_UDP) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPROTONOSUPPORT);
                }
                protocol = IPPROTO_UDP;
                sk->no_check = UDP_NO_CHECK;
                prot=&udp_prot;
                break;
          
            case SOCK_RAW:
                if (!suser()) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPERM);
                }
                if (!protocol) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPROTONOSUPPORT);
                }
                prot = &raw_prot;
                sk->reuse = 1;
                sk->no_check = 0;    /*
                             * Doesn't matter no checksum is
                             * performed anyway.
                             */
                sk->num = protocol;
                break;
    
            case SOCK_PACKET:
                if (!suser()) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPERM);
                }
                if (!protocol) 
                {
                    kfree_s((void *)sk, sizeof(*sk));
                    return(-EPROTONOSUPPORT);
                }
                prot = &packet_prot;
                sk->reuse = 1;
                sk->no_check = 0;    /* Doesn't matter no checksum is
                             * performed anyway.
                             */
                sk->num = protocol;
                break;
    
            default:
                kfree_s((void *)sk, sizeof(*sk));
                return(-ESOCKTNOSUPPORT);
        }
        sk->socket = sock;
    #ifdef CONFIG_TCP_NAGLE_OFF
        sk->nonagle = 1;
    #else    
        sk->nonagle = 0;
    #endif  
        sk->type = sock->type;
        sk->stamp.tv_sec=0;
        sk->protocol = protocol;
            ...... 
        sk->timer.function = &net_timer;
        skb_queue_head_init(&sk->back_log);
        sk->blog = 0;
        sock->data =(void *) sk; //socket 指向 sock
        sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
           ......
        if (sk->prot->init) 
        {
            err = sk->prot->init(sk);
            if (err != 0) 
            {
                destroy_sock(sk);
                return(err);
            }
        }
        return(0);
    }

      最后调用get_fd 返回一个文件描述符给上层应用。

    /* socket.c */
    static int get_fd(struct inode *inode)
    {
        int fd;
        struct file *file;
    
        /*
         *    Find a file descriptor suitable for return to the user. 
         */
    
        file = get_empty_filp(); // 获取一个闲置的file结构
        if (!file) 
            return(-1);
    
        for (fd = 0; fd < NR_OPEN; ++fd)
            if (!current->files->fd[fd]) 
                break;
        if (fd == NR_OPEN) 
        {
            file->f_count = 0;
            return(-1);
        }
    
        FD_CLR(fd, &current->files->close_on_exec);
            current->files->fd[fd] = file;  
        file->f_op = &socket_file_ops; // socket 文件操作
        file->f_mode = 3;
        file->f_flags = O_RDWR;
        file->f_count = 1;
        file->f_inode = inode;
        if (inode) 
            inode->i_count++;
        file->f_pos = 0;
        return(fd);
    }

      get_fd 用于为网络套接字分配一个文件描述符,分配描述符的同时需要一个file结构,每个file结构都需要一个inode结构对应。内核维护一个file结构数据,get_empty_filp 函数即通过检查该数组,获取一个闲置的成员。f_op 字段的赋值实现了网络操作的普通文件接口。如果调用write、read函数进行操作就会调用相应的sock_read 和 sock_write 函数。

      如何根据文件描述如fd找到相应的sock?

  • 相关阅读:
    (55)ElasticSearch之使用scroll滚动技术实现大数据量搜锁
    (54)ElasticSearch之DocValues解析
    (53)ElasticSearch之如何计算相关度分数
    HDU
    POJ3311 Hie with the Pie
    luoguP2768: 珍珠项链(矩阵乘法优化DP)
    luoguU60884 【模板】动态点分治套线段树
    最小圆覆盖(洛谷 P1742 增量法)
    CodeForces
    HDU
  • 原文地址:https://www.cnblogs.com/happygirl-zjj/p/7120902.html
Copyright © 2011-2022 走看看