zoukankan      html  css  js  c++  java
  • poll实现

    struct pollfd {
        int fd;     //当前描述符
        short events;     //进程关心的该描述符的事件
        short revents;    //返回的事件
    };

    asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
                long timeout_msecs)
    {
        s64 timeout_jiffies;


    //超时时间处理
        if (timeout_msecs > 0) {
    #if HZ > 1000
            /* We can only overflow if HZ > 1000 */
            if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
                timeout_jiffies = -1;
            else
    #endif
                timeout_jiffies = msecs_to_jiffies(timeout_msecs);
        } else {
            /* Infinite (< 0) or no (0) timeout */
            timeout_jiffies = timeout_msecs;
        }

    //实际处理函数
        return do_sys_poll(ufds, nfds, &timeout_jiffies);
    }


    struct poll_list {
        struct poll_list *next;
        int len;
        struct pollfd entries[0];
    };



    int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
    {
        struct poll_wqueues table;
         int fdcount, err;
         unsigned int i;
        struct poll_list *head;
         struct poll_list *walk;
        /* Allocate small arguments on the stack to save memory and be
           faster - use long to make sure the buffer is aligned properly
           on 64 bit archs to avoid unaligned access */
        long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; //栈的分配会更快
        struct poll_list *stack_pp = NULL;


        //检查描述符个数是否超过系统的限制
        /* Do a sanity check on nfds ... */
        if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
            return -EINVAL;

        //首先是一个初始化工作, 主要初始化poll_table这个函数指针
        poll_initwait(&table);

        head = NULL;
        walk = NULL;
        i = nfds;
        err = -ENOMEM;
        //这个循环所作的工作就是将从用户传过来的多个pollfd结构信息拷贝到内核,
            //由于可能结构的个数可能超过一页内存所能存储的范围,所以就用了循环来完成,
            //每次拷贝一页内存能装载的个数。并且再将它们用链表链起来。
        while(i!=0) {
            struct poll_list *pp;
            int num, size;
            if (stack_pp == NULL)
                num = N_STACK_PPS;
            else
                num = POLLFD_PER_PAGE; //这里保证kmalloc分配的空间不会超过一个页面
            if (num > i)
                num = i;
            size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;

            //如果描述符的个数比较小时,或在比较大的时候,第一次会使用栈来存储
            if (!stack_pp)
                stack_pp = pp = (struct poll_list *)stack_pps;
            else {
                pp = kmalloc(size, GFP_KERNEL);
                if (!pp)
                    goto out_fds;
            }
            pp->next=NULL;
            pp->len = num;
            if (head == NULL)
                head = pp;
            else
                walk->next = pp;

            walk = pp;
            if (copy_from_user(pp->entries, ufds + nfds-i,
                    sizeof(struct pollfd)*num)) {
                err = -EFAULT;
                goto out_fds;
            }
            i -= pp->len;
        }

        //真正的POLL操作,返回的结果在head中
        fdcount = do_poll(nfds, head, &table, timeout);

        //双重循环,将事件拷贝回给用户空间
        /* OK, now copy the revents fields back to user space. */
        walk = head;
        err = -EFAULT;
        while(walk != NULL) {
            struct pollfd *fds = walk->entries;
            int j;

            for (j=0; j < walk->len; j++, ufds++) {
                if(__put_user(fds[j].revents, &ufds->revents))
                    goto out_fds;
            }
            walk = walk->next;
        }
        err = fdcount;
        if (!fdcount && signal_pending(current))
            err = -EINTR;

        //以下是释放空间
    out_fds:
        walk = head;
        while(walk!=NULL) {
            struct poll_list *pp = walk->next;
            if (walk != stack_pp)
                kfree(walk);
            walk = pp;
        }
        poll_freewait(&table);
        return err;
    }


    //这个函数就是将当前进程加入等待队列,这个等待队列由驱动或文件系统或网络协议栈来提供
    //这个函数是由驱动的file->poll中调用poll_wait()来间接调用的。
    /* Add a new entry */
    static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
                    poll_table *p)
    {
        struct poll_table_entry *entry = poll_get_entry(p);
        if (!entry)
            return;
        get_file(filp);
        entry->filp = filp;
        entry->wait_address = wait_address;
        init_waitqueue_entry(&entry->wait, current);
        add_wait_queue(wait_address,&entry->wait);
    }

    void poll_initwait(struct poll_wqueues *pwq)
    {
        //在poll()中初始化为__pollwait(),注意在epoll中又会不同
        init_poll_funcptr(&pwq->pt, __pollwait);
        pwq->error = 0;
        pwq->table = NULL;
        pwq->inline_index = 0;
    }

    ===========================================

    static int do_poll(unsigned int nfds, struct poll_list *list,
               struct poll_wqueues *wait, s64 *timeout)
    {
        int count = 0;
        poll_table* pt = &wait->pt;

        /* Optimise the no-wait case */
        if (!(*timeout))   //进程不设超时
            pt = NULL;

        for (;;) {
            struct poll_list *walk;
            long __timeout;

            也是一个双重循环,处理每个文件描述符事件
            set_current_state(TASK_INTERRUPTIBLE);
            for (walk = list; walk != NULL; walk = walk->next) {
                struct pollfd * pfd, * pfd_end;

                pfd = walk->entries;
                pfd_end = pfd + walk->len;
                for (; pfd != pfd_end; pfd++) {
                    /*
                    * Fish for events. If we found one, record it
                    * and kill the poll_table, so we don't
                    * needlessly register any other waiters after
                    * this. They'll get immediately deregistered
                    * when we break out and return.
                    */
                    if (do_pollfd(pfd, pt)) { //处理每个文件描述符
                        count++;
                        pt = NULL;
                    }
                }
            }

            //超时处理
            /*
            * All waiters have already been registered, so don't provide
            * a poll_table to them on the next loop iteration.
            */
            pt = NULL;
            if (count || !*timeout || signal_pending(current))
                break;
            count = wait->error;
            if (count)
                break;

            if (*timeout < 0) {
                /* Wait indefinitely */
                __timeout = MAX_SCHEDULE_TIMEOUT;
            } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {
                /*
                * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in
                * a loop
                */
                __timeout = MAX_SCHEDULE_TIMEOUT - 1;
                *timeout -= __timeout;
            } else {
                __timeout = *timeout;
                *timeout = 0;
            }
            //进程切换
            __timeout = schedule_timeout(__timeout);
            //进程被唤醒, 继续执行
            if (*timeout >= 0)
                *timeout += __timeout;
        }
        __set_current_state(TASK_RUNNING);
        return count;
    }


    /*
    * Fish for pollable events on the pollfd->fd file descriptor. We're only
    * interested in events matching the pollfd->events mask, and the result
    * matching that mask is both recorded in pollfd->revents and returned. The
    * pwait poll_table will be used by the fd-provided poll handler for waiting,
    * if non-NULL.
    */
    static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
    {
        unsigned int mask;
        int fd;

        mask = 0;
        fd = pollfd->fd;
        if (fd >= 0) {
            int fput_needed;
            struct file * file;

            file = fget_light(fd, &fput_needed);
            mask = POLLNVAL;
            if (file != NULL) {
                mask = DEFAULT_POLLMASK;

                //调用驱动或文件系统的poll函数, 是否将当前进程加入驱动的等待队列,
                          //取决是file->poll()第二个参数是否为空.
                if (file->f_op && file->f_op->poll)
                    mask = file->f_op->poll(file, pwait);
                /* Mask out unneeded events. */
                mask &= pollfd->events | POLLERR | POLLHUP;
                fput_light(file, fput_needed);
            }
        }
        pollfd->revents = mask; //更新参数返回值

        return mask; //如果可读/写返回非0值
    }


    =================================
    驱动或文件系统的poll()实现原型:
    test_poll(struct file *filep, poll_table *wait)
    {
        ...
        poll_wait(filep, &dev->wait_queue_head, wait);
        ...

        if (dev->readable)
            mask |= POLLIN | POLLRDNORM;

        if (dev->writable)
            mask |= POLLOUT | POLLWRNORM;

        ...

    }


    static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
    {
        if (p && wait_address)
            p->qproc(filp, wait_address, p); //这个函数就是上面又poll_initwait()初始化的__pollwait()了.
    }

  • 相关阅读:
    路由器基础配置之ospf基础配置
    路由器基础配置之广播多路访问链路上的ospf
    路由器基础设置之ospf
    linux命令之文件系统权限操作常用命令
    路由器基础配置之路由重分布
    路由器配置 之 DHCP+DHCP中继服务配置
    路由器配置 之 PAP与CHAP认证
    基于链路的OSPF MD5口令认证
    压缩和归档操作(16个命令)
    基于链路的OSPF简单口令认证
  • 原文地址:https://www.cnblogs.com/oracleloyal/p/5395095.html
Copyright © 2011-2022 走看看