zoukankan      html  css  js  c++  java
  • BIND9的架构与机制笔记1

      BIND9采用的是事件驱动的机制来工作,而事件的源头则是IO,IO在linux使用的EPOLL的边缘触发模式。

      本篇说的是epoll,BIND9如果创建了watcher线程(宏USE_WATCHER_THREAD控制),这里就讨论有线程的情况,实际上即使不创建

    线程干的也都是一样的活。在lib/isc/socket.c中setup_watcher函数:(所有的代码都是截取的epoll下的片段,因为还有kqueue,devpoll,select等的实现代码,太多了)

    #elif defined(USE_EPOLL)
        manager->nevents = ISC_SOCKET_MAXEVENTS;
        manager->events = isc_mem_get(mctx, sizeof(struct epoll_event) *
                          manager->nevents);
        if (manager->events == NULL)
            return (ISC_R_NOMEMORY);
        manager->epoll_fd = epoll_create(manager->nevents);
        if (manager->epoll_fd == -1) {
            result = isc__errno2result(errno);
            isc__strerror(errno, strbuf, sizeof(strbuf));
            UNEXPECTED_ERROR(__FILE__, __LINE__,
                     "epoll_create %s: %s",
                     isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
                            ISC_MSG_FAILED, "failed"),
                     strbuf);
            isc_mem_put(mctx, manager->events,
                    sizeof(struct epoll_event) * manager->nevents);
            return (result);
        }
    #ifdef USE_WATCHER_THREAD
        result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ);
        if (result != ISC_R_SUCCESS) {
            close(manager->epoll_fd);
            isc_mem_put(mctx, manager->events,
                    sizeof(struct epoll_event) * manager->nevents);
            return (result);
        }
    #endif    /* USE_WATCHER_THREAD */
    View Code

    先是创建了要监视的最大socket fd数目(manager->nevents)对应的epoll_event结构体数组,然后调用epoll_create函数创建一个epoll fd,参数则是指定监视的socket fd

    最大数目。我的内核版本是3.13,man一下epoll_create发现它是这样说的:epoll_create()  creates  an  epoll(7) instance.  Since Linux 2.6.8, thesize argument is ignored, but must be  greater  than  zero。这个函数在2.6.8内核以后就忽略参数size了,但是传递的参数值一定要大于0。后来找了一下资料,网上的高手的博客说的就很清楚了http://www.cnblogs.com/apprentice89/p/3234677.html。继续往下说,后面的watch_fd实在创建线程的情况下才有,就是将pipe_fds[0]这个管道描述符,也就是一个可读的流,而上述的socket fd都是可以归为流。watch_fd的实现代码:

    #elif defined(USE_EPOLL)
            struct epoll_event event;
    
            if (msg == SELECT_POKE_READ)
                    event.events = EPOLLIN;
            else
                    event.events = EPOLLOUT;
            memset(&event.data, 0, sizeof(event.data));
            event.data.fd = fd;
            if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1 &&
                errno != EEXIST) {
                    result = isc__errno2result(errno);
            }
    
            return (result);
    View Code

    这是将pipe_fds[0]加入epoll_fd的监听队列,EPOLL_CTL_ADD是操作类型,注册该fd到epoll_fd上。这个管道的目的是接收管理该线程的消息,比如线程退出。

    那么进入线程看:

    static isc_threadresult_t
    watcher(void *uap) {
        isc__socketmgr_t *manager = uap;
        isc_boolean_t done;
        int ctlfd;
        int cc;
    #ifdef USE_KQUEUE
        const char *fnname = "kevent()";
    #elif defined (USE_EPOLL)
        const char *fnname = "epoll_wait()";
    #elif defined(USE_DEVPOLL)
        const char *fnname = "ioctl(DP_POLL)";
        struct dvpoll dvp;
    #elif defined (USE_SELECT)
        const char *fnname = "select()";
        int maxfd;
    #endif
        char strbuf[ISC_STRERRORSIZE];
    #ifdef ISC_SOCKET_USE_POLLWATCH
        pollstate_t pollstate = poll_idle;
    #endif
    
        /*
         * Get the control fd here.  This will never change.
         */
        ctlfd = manager->pipe_fds[0];
        done = ISC_FALSE;
        while (!done) {
            do {
    #ifdef USE_KQUEUE
                cc = kevent(manager->kqueue_fd, NULL, 0,
                        manager->events, manager->nevents, NULL);
    #elif defined(USE_EPOLL)
                cc = epoll_wait(manager->epoll_fd, manager->events,
                        manager->nevents, -1);
    #elif defined(USE_DEVPOLL)
                dvp.dp_fds = manager->events;
                dvp.dp_nfds = manager->nevents;
    #ifndef ISC_SOCKET_USE_POLLWATCH
                dvp.dp_timeout = -1;
    #else
                if (pollstate == poll_idle)
                    dvp.dp_timeout = -1;
                else
                    dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT;
    #endif    /* ISC_SOCKET_USE_POLLWATCH */
                cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp);
    #elif defined(USE_SELECT)
                LOCK(&manager->lock);
                memcpy(manager->read_fds_copy, manager->read_fds,
                       manager->fd_bufsize);
                memcpy(manager->write_fds_copy, manager->write_fds,
                       manager->fd_bufsize);
                maxfd = manager->maxfd + 1;
                UNLOCK(&manager->lock);
    
                cc = select(maxfd, manager->read_fds_copy,
                        manager->write_fds_copy, NULL, NULL);
    #endif    /* USE_KQUEUE */
    
                if (cc < 0 && !SOFT_ERROR(errno)) {
                    isc__strerror(errno, strbuf, sizeof(strbuf));
                    FATAL_ERROR(__FILE__, __LINE__,
                            "%s %s: %s", fnname,
                            isc_msgcat_get(isc_msgcat,
                                   ISC_MSGSET_GENERAL,
                                   ISC_MSG_FAILED,
                                   "failed"), strbuf);
                }
    
    #if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH)
                if (cc == 0) {
                    if (pollstate == poll_active)
                        pollstate = poll_checking;
                    else if (pollstate == poll_checking)
                        pollstate = poll_idle;
                } else if (cc > 0) {
                    if (pollstate == poll_checking) {
                        /*
                         * XXX: We'd like to use a more
                         * verbose log level as it's actually an
                         * unexpected event, but the kernel bug
                         * reportedly happens pretty frequently
                         * (and it can also be a false positive)
                         * so it would be just too noisy.
                         */
                        manager_log(manager,
                                ISC_LOGCATEGORY_GENERAL,
                                ISC_LOGMODULE_SOCKET,
                                ISC_LOG_DEBUG(1),
                                "unexpected POLL timeout");
                    }
                    pollstate = poll_active;
                }
    #endif
            } while (cc < 0);
    
    #if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL)
            done = process_fds(manager, manager->events, cc);
    #elif defined(USE_SELECT)
            process_fds(manager, maxfd, manager->read_fds_copy,
                    manager->write_fds_copy);
    
            /*
             * Process reads on internal, control fd.
             */
            if (FD_ISSET(ctlfd, manager->read_fds_copy))
                done = process_ctlfd(manager);
    #endif
        }
    
        manager_log(manager, TRACE, "%s",
                isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
                       ISC_MSG_EXITING, "watcher exiting"));
    
        return ((isc_threadresult_t)0);
    }
    View Code

    无限循环,epoll_wait当监听的epoll_fd队列上有IO事件发生时,将对应的socket fd和事件放入events数组中,并且将这些注册在epoll_fd上的socket fd对应事件清空。

    process_fds遍历数组,找到对应的socket fd,并判断该fd是不是线程控制管道,如果是则会在执行完其他socket fd上的对应事件后再处理管道中的控制消息。

    static isc_boolean_t
    process_fds(isc__socketmgr_t *manager, struct epoll_event *events, int nevents)
    {
        int i;
        isc_boolean_t done = ISC_FALSE;
    #ifdef USE_WATCHER_THREAD
        isc_boolean_t have_ctlevent = ISC_FALSE;
    #endif
    
        if (nevents == manager->nevents) {
            manager_log(manager, ISC_LOGCATEGORY_GENERAL,
                    ISC_LOGMODULE_SOCKET, ISC_LOG_INFO,
                    "maximum number of FD events (%d) received",
                    nevents);
        }
    
        for (i = 0; i < nevents; i++) {
            REQUIRE(events[i].data.fd < (int)manager->maxsocks);
    #ifdef USE_WATCHER_THREAD
            if (events[i].data.fd == manager->pipe_fds[0]) {
                have_ctlevent = ISC_TRUE;
                continue;
            }
    #endif
            if ((events[i].events & EPOLLERR) != 0 ||
                (events[i].events & EPOLLHUP) != 0) {
                /*
                 * epoll does not set IN/OUT bits on an erroneous
                 * condition, so we need to try both anyway.  This is a
                 * bit inefficient, but should be okay for such rare
                 * events.  Note also that the read or write attempt
                 * won't block because we use non-blocking sockets.
                 */
                events[i].events |= (EPOLLIN | EPOLLOUT);
            }
            process_fd(manager, events[i].data.fd,
                   (events[i].events & EPOLLIN) != 0,
                   (events[i].events & EPOLLOUT) != 0);
        }
    
    #ifdef USE_WATCHER_THREAD
        if (have_ctlevent)
            done = process_ctlfd(manager);
    #endif
    
        return (done);
    }
    View Code

     待续

  • 相关阅读:
    套接字的工作流程
    信安系统设计基础(个人报告阅读说明)
    1.1Linux 系统简介(学习过程)
    1.12Linux下软件安装(学习过程)
    作业3.5
    作业1
    变量与基本数据类型
    python入门
    计算机基础知识补充
    计算机基础
  • 原文地址:https://www.cnblogs.com/ding-linux-coder/p/4432666.html
Copyright © 2011-2022 走看看