zoukankan      html  css  js  c++  java
  • 爬虫Larbin解析(二)——sequencer()

    分析的函数: void sequencer() 

    //位置:larbin-2.6.3/src/fetch/sequencer.cc
    void
    sequencer() { bool testPriority = true; if (space == 0) //unit space = 0 { space = global::inter->putAll(); } int still = space; if (still > maxPerCall) //#define maxPerCall 100 still = maxPerCall; while (still) { if (canGetUrl(&testPriority)) { --space; --still; } else { still = 0; } } }

    所在的文件

    larbin-2.6.3/src/fetch/sequencer.h、larbin-2.6.3/src/fetch/sequencer.cc

    // Larbin
    // Sebastien Ailleret
    // 15-11-99 -> 15-11-99
    
    #ifndef SEQUENCER_H
    #define SEQUENCER_H
    
    /** only for debugging, handle with care */
    extern uint space;
    
    /** Call the sequencer */
    void sequencer ();
    
    #endif
    View Code
    // Larbin
    // Sebastien Ailleret
    // 15-11-99 -> 04-01-02
    
    #include <iostream.h>
    
    #include "options.h"
    
    #include "global.h"
    #include "types.h"
    #include "utils/url.h"
    #include "utils/debug.h"
    #include "fetch/site.h"
    
    static bool canGetUrl (bool *testPriority);
    uint space = 0;
    
    #define maxPerCall 100
    
    /** start the sequencer*/
    //按优先度将URL放到代爬站点
    void sequencer() 
    {
        bool testPriority = true;
        if (space == 0) 
        {
            space = global::inter->putAll();
        }
        int still = space;
        if (still > maxPerCall)  //#define maxPerCall 100
            still = maxPerCall;
        while (still) 
        {
            if (canGetUrl(&testPriority)) 
            {
                space--; 
                still--; 
            } 
            else 
            { 
                still = 0; 
            } 
        }
    }
    
    /* Get the next url
     * here is defined how priorities are handled
     按优先级从各个URL队列
     (比如URLsDisk,URLsDiskWait或URLsPriority,URLsPriorityWait)
     获取url保存到某个NameSite(通过url的hash值)
     */
    static bool canGetUrl (bool *testPriority) 
    {
        url *u;
        if (global::readPriorityWait)  // global.cc  赋值为0
        {
            global::readPriorityWait--;
            u = global::URLsPriorityWait->get();
            global::namedSiteList[u->hostHashCode()].putPriorityUrlWait(u);
            return true;
        } 
        else if (*testPriority && (u=global::URLsPriority->tryGet()) != NULL) 
        {
            // We've got one url (priority)
            global::namedSiteList[u->hostHashCode()].putPriorityUrl(u);
            return true;
        } 
        else 
        {
            *testPriority = false;
            // Try to get an ordinary url
            if (global::readWait) 
            {
              global::readWait--;
              u = global::URLsDiskWait->get();
              global::namedSiteList[u->hostHashCode()].putUrlWait(u);
              return true;
            } 
            else 
            {
                u = global::URLsDisk->tryGet();
                if (u != NULL) 
                {
                    global::namedSiteList[u->hostHashCode()].putUrl(u);
                    return true;
                }
                else 
                {
                    return false;
                }
            }
        }
    }
    View Code

    一、 对于space = global::inter->putAll();

    1. interf在global.cc(位置:/larbin-2.6.3/src/global.cc)中的定义为

    inter = new Interval(ramUrls);   //#define ramUrls 100000  (位置:larbin-2.6.3/src/types.h)

    批注:区别 inter = new Interval(ramUrls);  和 inter = new Interval[ramUrls];  前一个()内是参数,要传入构造函数的;后一个[]内是开辟数组的个数。

    2. 类 Interval定义(位置:/larbin-2.6.3/src/fetch/site.h)

    /** This class is intended to make sure the sum of the
     * sizes of the fifo included in the different sites
     * are not too big
     */
    class Interval 
    {
        public:
            Interval (uint sizes) : size(sizes), pos(0) {}
            ~Interval () {}
            /** How many urls can we put. Answer 0: if no urls can be put */
            inline uint putAll () 
            { 
                int res = size - pos; 
                pos = size; 
                return res; 
            }
            /** Warn an url has been retrieved */
            inline void getOne () 
            { 
                --pos; 
            }
            /** only for debugging, handle with care */
            inline uint getPos () 
            { 
                return pos; 
            }
        private:
            /** Size of the interval */
            uint size;
            /** Position in the interval */
            uint pos;
    };
    View Code

    批注:类内的函数定义为inline。对内联函数的几点说明:

    • 内联函数避免函数调用的开销。将函数指定为内联函数,(通常)就是将它在程序的每个调用点上“内联地”展开,消除调用函数进行的额外开销(调用前先保存寄存器,并在返回时回复)。内联说明(在函数返回值前加inline)对编译器来说只是一个建议,编译器可以选择忽略。一般内敛函数适用于优化小的、只有几行、经常被调用的函数。大多数编译器不支持递归函数的内敛。
    • 把内联函数放在头文件。以便编译器能够在调用点展开同一个函数(保证编译器可见、所有的定义相同)。
    • 编译器隐式地将在类内定义的成员函数当作为内联函数.

    二、 对于canGetUrl(&testPriority)

    函数定义(位置larbin-2.6.3/src/fetch/sequencer.cc)

    /* Get the next url
     * here is defined how priorities are handled
     按优先级从各个URL队列
     (比如URLsDisk,URLsDiskWait或URLsPriority,URLsPriorityWait)
     获取url保存到某个NameSite(通过url的hash值)
    
    at "global.cc"
    // FIFOs
    URLsDisk         = new PersistentFifo(reload, fifoFile);
    URLsDiskWait     = new PersistentFifo(reload, fifoFileWait);
    URLsPriority     = new SyncFifo<url>;
    URLsPriorityWait = new SyncFifo<url>;
    
     */
    static bool canGetUrl (bool *testPriority) 
    {
        url *u;
        if (global::readPriorityWait != 0)  // 在global.cc声明定义: uint global::readPriorityWait=0;
        {
            global::readPriorityWait--;
            u = global::URLsPriorityWait->get();
            global::namedSiteList[u->hostHashCode()].putPriorityUrlWait(u);
            return true;
        } 
        else if (*testPriority && (u=global::URLsPriority->tryGet()) != NULL) 
        {
            // We've got one url (priority)
            global::namedSiteList[u->hostHashCode()].putPriorityUrl(u);
            return true;
        } 
        else 
        {
            *testPriority = false;
            // Try to get an ordinary url
            if (global::readWait) 
            {
              global::readWait--;
              u = global::URLsDiskWait->get();
              global::namedSiteList[u->hostHashCode()].putUrlWait(u);
              return true;
            } 
            else 
            {
                u = global::URLsDisk->tryGet();
                if (u != NULL) 
                {
                    global::namedSiteList[u->hostHashCode()].putUrl(u);
                    return true;
                }
                else 
                {
                    return false;
                }
            }
        }
    }

    1. 为什么diskpriority的队列都是成对出现的,是因为可以认为每个sitenamedSiteList当中都有一个小的队列来保存它的url,这个url的个数是有个数限制的,当超过这个限制的时候就不能再把该site下的url放入,但也不能丢弃,而是放入wait队列。Larbin会控制一段时间在disk队列中取url,一段时间在diskWait当中取urldiskpriority的区别只是优先级的区别。namedSiteList的作用是实现了DNS缓存

              

    2. global::readPriorityWait 的值由main.cc的cron()函数中变化得知

    // see if we should read again urls in fifowait
    if ((global::now % 300) == 0) {
        global::readPriorityWait = global::URLsPriorityWait->getLength();
        global::readWait = global::URLsDiskWait->getLength();
    }
    if ((global::now % 300) == 150) {
        global::readPriorityWait = 0;
        global::readWait = 0;
    }

    这里global::now%300是判断这次是对wait里的url进行处理,还是对不是wait里的进行处理,这里的%300等于0150的概率都是1/300,所以大约300次换一次。readPriorityWaitURLsPriorityWait中的长度(也就是url的数量);readWait是URLsDiskWait中url的个数。

    3. 在canGetUrl中,在对于每个站点,将相应的url放进去。putPriorityUrlWait, putPriorityUrl, putUrlWait, putUrl在site.h的定义如下

    /** Put an url in the fifo
     * If there are too much, put it back in UrlsInternal
     * Never fill totally the fifo => call at least with 1 */
    void putGenericUrl(url *u, int limit, bool prio);
    inline void putUrl(url *u) {
        putGenericUrl(u, 15, false);
    }
    inline void putUrlWait(url *u) {
        putGenericUrl(u, 10, false);
    }
    inline void putPriorityUrl(url *u) {
        putGenericUrl(u, 5, true);
    }
    inline void putPriorityUrlWait(url *u) {
        putGenericUrl(u, 1, true);
    }

     可以发现,每次都是调用函数putGenericUrl,其定义如下

    /* Put an url in the fifo if their are not too many */
    void NamedSite::putGenericUrl(url *u, int limit, bool prio) 
    {
    if (nburls > maxUrlsBySite - limit)
    {
    // Already enough Urls in memory for this Site // first check if it can already be forgotten if (!strcmp(name, u->getHost()))
    {
    if (dnsState == errorDns)
    { nburls
    ++; forgetUrl(u, noDNS); return; } if (dnsState == noConnDns)
    { nburls
    ++; forgetUrl(u, noConnection); return; } if (u->getPort() == port && dnsState == doneDns && !testRobots(u->getFile()))
    { nburls
    ++; forgetUrl(u, forbiddenRobots); return; } } // else put it back in URLsDisk refUrl(); global::inter->getOne(); if (prio)
    {
    global::URLsPriorityWait->put(u); }
    else
    {
    global::URLsDiskWait->put(u); } }

    如果已经有足够多的url在内存里,执行这里if中的代码,strcmp(name,u->getHost())是判断这个主机是不是已经就进行过dns方面的判断,也就是说对于一个站点,只做一次dns解析的判断,以后就按这个结果进行处理,dnsStatenoDnsnoConnDns,还有robots.txt不允许的情况,如果没有问题,就把它放到URLsDisk中。

    else {
        nburls++;
        if (dnsState == waitDns || strcmp(name, u->getHost()) || port
               != u->getPort() || global::now > dnsTimeout) {
           // dns not done or other site
           putInFifo(u);
           addNamedUrl();
           // Put Site in fifo if not yet in
           if (!isInFifo) {
               isInFifo = true;
               global::dnsSites->put(this);
           }
        } else
           switch (dnsState) {
           case doneDns:
               transfer(u);
               break;
           case errorDns:
               forgetUrl(u, noDNS);
               break;
           default: // noConnDns
               forgetUrl(u, noConnection);
           }
    }

     如果需要判断dns能不能解析,就将它放到dnsSites里,这个会在fetchDns中判断。或是如果还能放到内存里,并且又是doneDns,表示可以解析,就调用transfer

    void NamedSite::transfer(url *u) {
        if (testRobots(u->getFile())) {
           if (global::proxyAddr == NULL) {
               memcpy(&u->addr, &addr, sizeof(struct in_addr));
           }
           global::IPSiteList[ipHash].putUrl(u);
        } else {
           forgetUrl(u, forbiddenRobots);
        }
    }

    这里是将url放入到IPSiteList的相应ipHash中。

    附类的定义

    类url定义(larbin-2.6.3/src/utils/url.h  larbin-2.6.3/src/utils/url.cc

    // Larbin
    // Sebastien Ailleret
    // 15-11-99 -> 14-03-02
    
    /* This class describes an URL */
    
    #ifndef URL_H
    #define URL_H
    
    #include <netinet/in.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <stdlib.h>
    
    #include "types.h"
    
    bool fileNormalize (char *file);
    
    class url {
     private:
      char *host;
      char *file;
      uint16_t port; // the order of variables is important for physical size
      int8_t depth;
      /* parse the url */
      void parse (char *s);
      /** parse a file with base */
      void parseWithBase (char *u, url *base);
      /* normalize file name */
      bool normalize (char *file);
      /* Does this url starts with a protocol name */
      bool isProtocol (char *s);
      /* constructor used by giveBase */
      url (char *host, uint port, char *file);
    
     public:
      /* Constructor : Parses an url (u is deleted) */
      url (char *u, int8_t depth, url *base);
    
      /* constructor used by input */
      url (char *line, int8_t depth);
    
      /* Constructor : read the url from a file (cf serialize) */
      url (char *line);
    
      /* Destructor */
      ~url ();
    
      /* inet addr (once calculated) */
      struct in_addr addr;
    
      /* Is it a valid url ? */
      bool isValid ();
    
      /* print an URL */
      void print ();
    
      /* return the host */
      inline char *getHost () { return host; }
    
      /* return the port */
      inline uint getPort () { return port; }
    
      /* return the file */
      inline char *getFile () { return file; }
    
      /** Depth in the Site */
      inline int8_t getDepth () { return depth; }
    
      /* Set depth to max if we are at an entry point in the site
       * try to find the ip addr
       * answer false if forbidden by robots.txt, true otherwise */
      bool initOK (url *from);
    
      /** return the base of the url
       * give means that you have to delete the string yourself
       */
      url *giveBase ();
    
      /** return a char * representation of the url
       * give means that you have to delete the string yourself
       */
      char *giveUrl ();
    
      /** write the url in a buffer
       * buf must be at least of size maxUrlSize
       * returns the size of what has been written (not including '')
       */
      int writeUrl (char *buf);
    
      /* serialize the url for the Persistent Fifo */
      char *serialize ();
    
      /* very thread unsafe serialisation in a static buffer */
      char *getUrl();
    
      /* return a hashcode for the host of this url */
      uint hostHashCode ();
    
      /* return a hashcode for this url */
      uint hashCode ();
    
    #ifdef URL_TAGS
      /* tag associated to this url */
      uint tag;
    #endif // URL_TAGS
    
    #ifdef COOKIES
      /* cookies associated with this page */
      char *cookie;
      void addCookie(char *header);
    #else // COOKIES
      inline void addCookie(char *header) {}
    #endif // COOKIES
    };
    
    #endif // URL_H
    View Code
    // Larbin
    // Sebastien Ailleret
    // 15-11-99 -> 16-03-02
    
    /* This class describes an URL */
    
    #include <assert.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <ctype.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    
    #include "options.h"
    
    #include "types.h"
    #include "global.h"
    #include "utils/url.h"
    #include "utils/text.h"
    #include "utils/connexion.h"
    #include "utils/debug.h"
    
    #ifdef COOKIES
    #define initCookie() cookie=NULL
    #else // COOKIES
    #define initCookie() ((void) 0)
    #endif // COOKIES
    
    /* small functions used later */
    static uint siteHashCode (char *host) {
      uint h=0;
      uint i=0;
      while (host[i] != 0) {
        h = 37*h + host[i];
        i++;
      }
      return h % namedSiteListSize;
    }
    
    /* return the int with correspond to a char
     * -1 if not an hexa char */
    static int int_of_hexa (char c) {
      if (c >= '0' && c <= '9')
        return (c - '0');
      else if (c >= 'a' && c <= 'f')
        return (c - 'a' + 10);
      else if (c >= 'A' && c <= 'F')
        return (c - 'A' + 10);
      else
        return -1;
    }
    
    /* normalize a file name : also called by robots.txt parser
     * return true if it is ok, false otherwise (cgi-bin)
     */
    bool fileNormalize (char *file) {
      int i=0;
      while (file[i] != 0 && file[i] != '#') {
        if (file[i] == '/') {
          if (file[i+1] == '.' && file[i+2] == '/') {
            // suppress /./
            int j=i+3;
            while (file[j] != 0) {
              file[j-2] = file[j];
              j++;
            }
            file[j-2] = 0;
          } else if (file[i+1] == '/') {
            // replace // by /
            int j=i+2;
            while (file[j] != 0) {
              file[j-1] = file[j];
              j++;
            }
            file[j-1] = 0;
          } else if (file[i+1] == '.' && file[i+2] == '.' && file[i+3] == '/') {
            // suppress /../
            if (i == 0) {
              // the file name starts with /../ : error
              return false;
            } else {
              int j = i+4, dec;
              i--;
              while (file[i] != '/') { i--; }
              dec = i+1-j; // dec < 0
              while (file[j] != 0) {
                file[j+dec] = file[j];
                j++;
              }
              file[j+dec] = 0;
            }
          } else if (file[i+1] == '.' && file[i+2] == 0) {
            // suppress /.
            file[i+1] = 0;
            return true;
          } else if (file[i+1] == '.' && file[i+2] == '.' && file[i+3] == 0) {
            // suppress /..
            if (i == 0) {
              // the file name starts with /.. : error
              return false;
            } else {
              i--;
              while (file[i] != '/') {
                i--;
              }
              file[i+1] = 0;
              return true;
            }
          } else { // nothing special, go forward
            i++;
          }
        } else if (file[i] == '%') {
          int v1 = int_of_hexa(file[i+1]);
          int v2 = int_of_hexa(file[i+2]);
          if (v1 < 0 || v2 < 0) return false;
          char c = 16 * v1 + v2;
          if (isgraph(c)) {
            file[i] = c;
            int j = i+3;
            while (file[j] != 0) {
              file[j-2] = file[j];
              j++;
            }
            file[j-2] = 0;
            i++;
          } else if (c == ' ' || c == '/') { // keep it with the % notation
            i += 3;
          } else { // bad url
            return false;
          }
        } else { // nothing special, go forward
          i++;
        }
      }
      file[i] = 0;
      return true;
    }
    
    /**************************************/
    /* definition of methods of class url */
    /**************************************/
    
    /* Constructor : Parses an url */
    url::url (char *u, int8_t depth, url *base) {
      newUrl();
      this->depth = depth;
      host = NULL;
      port = 80;
      file = NULL;
      initCookie();
    #ifdef URL_TAGS
      tag = 0;
    #endif // URL_TAGS
      if (startWith("http://", u)) {
        // absolute url
        parse (u + 7);
        // normalize file name
        if (file != NULL && !normalize(file)) {
          delete [] file;
          file = NULL;
          delete [] host;
          host = NULL;
        }
      } else if (base != NULL) {
        if (startWith("http:", u)) {
          parseWithBase(u+5, base);
        } else if (isProtocol(u)) {
          // Unknown protocol (mailto, ftp, news, file, gopher...)
        } else {
          parseWithBase(u, base);
        }
      }
    }
    
    /* constructor used by input */
    url::url (char *line,  int8_t depth) {
      newUrl();
      this->depth = depth;
      host = NULL;
      port = 80;
      file = NULL;
      initCookie();
      int i=0;
    #ifdef URL_TAGS
      tag = 0;
      while (line[i] >= '0' && line[i] <= '9') {
        tag = 10*tag + line[i] - '0';
        i++;
      }
      i++;
    #endif // URL_TAGS
      if (startWith("http://", line+i)) {
        parse(line+i+7);
        // normalize file name
        if (file != NULL && !normalize(file)) {
          delete [] file;
          file = NULL;
          delete [] host;
          host = NULL;
        }
      }
    }
    
    /* Constructor : read the url from a file (cf serialize)
     */
    url::url (char *line) {
      newUrl();
      int i=0;
      // Read depth
      depth = 0;
      while (line[i] >= '0' && line[i] <= '9') {
        depth = 10*depth + line[i] - '0';
        i++;
      }
    #ifdef URL_TAGS
      // read tag
      tag = 0; i++;
      while (line[i] >= '0' && line[i] <= '9') {
        tag = 10*tag + line[i] - '0';
        i++;
      }
    #endif // URL_TAGS
      int deb = ++i;
      // Read host
      while (line[i] != ':') {
        i++;
      }
      line[i] = 0;
      host = newString(line+deb);
      i++;
      // Read port
      port = 0;
      while (line[i] >= '0' && line[i] <= '9') {
        port = 10*port + line[i] - '0';
        i++;
      }
    #ifndef COOKIES
      // Read file name
      file = newString(line+i);
    #else // COOKIES
      char *cpos = strchr(line+i, ' ');
      if (cpos == NULL) {
        cookie = NULL;
      } else {
        *cpos = 0;
        // read cookies
        cookie = new char[maxCookieSize];
        strcpy(cookie, cpos+1);
      }
      // Read file name
      file = newString(line+i);
    #endif // COOKIES
    }
    
    /* constructor used by giveBase */
    url::url (char *host, uint port, char *file) {
      newUrl();
      initCookie();
      this->host = host;
      this->port = port;
      this->file = file;
    }
    
    /* Destructor */
    url::~url () {
      delUrl();
      delete [] host;
      delete [] file;
    #ifdef COOKIES
      delete [] cookie;
    #endif // COOKIES
    }
    
    /* Is it a valid url ? */
    bool url::isValid () {
      if (host == NULL) return false;
      int lh = strlen(host);
      return file!=NULL && lh < maxSiteSize
        && lh + strlen(file) + 18 < maxUrlSize;
    }
    
    /* print an URL */
    void url::print () {
      printf("http://%s:%u%s
    ", host, port, file);
    }
    
    /* Set depth to max if necessary
     * try to find the ip addr
     * answer false if forbidden by robots.txt, true otherwise */
    bool url::initOK (url *from) {
    #if defined(DEPTHBYSITE) || defined(COOKIES)
      if (strcmp(from->getHost(), host)) { // different site
    #ifdef DEPTHBYSITE
        depth = global::depthInSite;
    #endif // DEPTHBYSITE
      } else { // same site
    #ifdef COOKIES
        if (from->cookie != NULL) {
          cookie = new char[maxCookieSize];
          strcpy(cookie, from->cookie);
        }
    #endif // COOKIES
      }
    #endif // defined(DEPTHBYSITE) || defined(COOKIES)
      if (depth < 0) {
        errno = tooDeep;
        return false;
      }
      NamedSite *ns = global::namedSiteList + (hostHashCode());
      if (!strcmp(ns->name, host) && ns->port == port) {
        switch (ns->dnsState) {
        case errorDns:
          errno = fastNoDns;
          return false;
        case noConnDns:
          errno = fastNoConn;
          return false;
        case doneDns:
          if (!ns->testRobots(file)) {
            errno = fastRobots;
            return false;
          }
        }
      }
      return true;
    }
    
    /* return the base of the url */
    url *url::giveBase () {
      int i = strlen(file);
      assert (file[0] == '/');
      while (file[i] != '/') {
        i--;
      }
      char *newFile = new char[i+2];
      memcpy(newFile, file, i+1);
      newFile[i+1] = 0;
      return new url(newString(host), port, newFile);
    }
    
    /** return a char * representation of the url
     * give means that you have to delete the string yourself
     */
    char *url::giveUrl () {
      char *tmp;
      int i = strlen(file);
      int j = strlen(host);
    
      tmp = new char[18+i+j];  // 7 + j + 1 + 9 + i + 1
                               // http://(host):(port)(file)
      strcpy(tmp, "http://");
      strcpy (tmp+7, host);
      j += 7;
      if (port != 80) {
        j += sprintf(tmp + j, ":%u", port);
      }
      // Copy file name
      while (i >= 0) {
        tmp [j+i] = file[i];
        i--;
      }
      return tmp;
    }
    
    /** write the url in a buffer
     * buf must be at least of size maxUrlSize
     * returns the size of what has been written (not including '')
     */
    int url::writeUrl (char *buf) {
      if (port == 80)
        return sprintf(buf, "http://%s%s", host, file);
      else
        return sprintf(buf, "http://%s:%u%s", host, port, file);
    }
    
    /* serialize the url for the Persistent Fifo */
    char *url::serialize () {
      // this buffer is protected by the lock of PersFifo
      static char statstr[maxUrlSize+40+maxCookieSize];
      int pos = sprintf(statstr, "%u ", depth);
    #ifdef URL_TAGS
      pos += sprintf(statstr+pos, "%u ", tag);
    #endif // URL_TAGS
      pos += sprintf(statstr+pos, "%s:%u%s", host, port, file);
    #ifdef COOKIES
      if (cookie != NULL) {
        pos += sprintf(statstr+pos, " %s", cookie);
      }
    #endif // COOKIES
      statstr[pos] = '
    ';
      statstr[pos+1] = 0;
      return statstr;
    }
    
    /* very thread unsafe serialisation in a static buffer */
    char *url::getUrl() {
      static char statstr[maxUrlSize+40];
      sprintf(statstr, "http://%s:%u%s", host, port, file);
      return statstr;
    }
    
    /* return a hashcode for the host of this url */
    uint url::hostHashCode () {
      return siteHashCode (host);
    }
    
    /* return a hashcode for this url */
    uint url::hashCode () {
      unsigned int h=port;
      unsigned int i=0;
      while (host[i] != 0) {
        h = 31*h + host[i];
        i++;
      }
      i=0;
      while (file[i] != 0) {
        h = 31*h + file[i];
        i++;
      }
      return h % hashSize;
    }
    
    /* parses a url : 
     * at the end, arg must have its initial state, 
     * http:// has allready been suppressed
     */
    void url::parse (char *arg) {
      int deb = 0, fin = deb;
      // Find the end of host name (put it into lowerCase)
      while (arg[fin] != '/' && arg[fin] != ':' && arg[fin] != 0) {
        fin++;
      }
      if (fin == 0) return;
    
      // get host name
      host = new char[fin+1];
      for (int  i=0; i<fin; i++) {
        host[i] = lowerCase(arg[i]);
      }
      host[fin] = 0;
    
      // get port number
      if (arg[fin] == ':') {
        port = 0;
        fin++;
        while (arg[fin] >= '0' && arg[fin] <= '9') {
          port = port*10 + arg[fin]-'0';
          fin++;
        }
      }
    
      // get file name
      if (arg[fin] != '/') {
        // www.inria.fr => add the final /
        file = newString("/");
      } else {
        file = newString(arg + fin);
      }
    }
    
    /** parse a file with base
     */
    void url::parseWithBase (char *u, url *base) {
      // cat filebase and file
      if (u[0] == '/') {
        file = newString(u);
      } else {
        uint lenb = strlen(base->file);
        char *tmp = new char[lenb + strlen(u) + 1];
        memcpy(tmp, base->file, lenb);
        strcpy(tmp + lenb, u);
        file = tmp;
      }
      if (!normalize(file)) {
        delete [] file;
        file = NULL;
        return;
      }
      host = newString(base->host);
      port = base->port;
    }
    
    /** normalize file name
     * return true if it is ok, false otherwise (cgi-bin)
     */
    bool url::normalize (char *file) {
      return fileNormalize(file);
    }
    
    /* Does this url starts with a protocol name */
    bool url::isProtocol (char *s) {
      uint i = 0;
      while (isalnum(s[i])) {
        i++;
      }
      return s[i] == ':';
    }
    
    #ifdef COOKIES
    #define addToCookie(s) len = strlen(cookie); 
        strncpy(cookie+len, s, maxCookieSize-len); 
        cookie[maxCookieSize-1] = 0;
    
    /* see if a header contain a new cookie */
    void url::addCookie(char *header) {
      if (startWithIgnoreCase("set-cookie: ", header)) {
        char *pos = strchr(header+12, ';');
        if (pos != NULL) {
          int len;
          if (cookie == NULL) {
            cookie = new char[maxCookieSize];
            cookie[0] = 0;
          } else {
            addToCookie("; ");
          }
          *pos = 0;
          addToCookie(header+12);
          *pos = ';';
        }
      }
    }
    #endif // COOKIES
    View Code

    global::namedSiteList

    NamedSite *global::namedSiteList;
    namedSiteList = new NamedSite[namedSiteListSize];
    class NamedSite 
    {
        private:
            /* string used for following CNAME chains (just one jump) */
            char *cname;
            /** we've got a good dns answer
            * get the robots.txt */
            void dnsOK ();
            /** Cannot get the inet addr
            * dnsState must have been set properly before the call */
            void dnsErr ();
            /** Delete the old identity of the site */
            void newId ();
            /** put this url in its IPSite */
            void transfer (url *u);
            /** forget this url for this reason */
            void forgetUrl (url *u, FetchError reason);
        public:
            /** Constructor */
            NamedSite ();
            /** Destructor : never used */
            ~NamedSite ();
            /* name of the site */
            char name[maxSiteSize];
            /* port of the site */
            uint16_t port;
            /* numbers of urls in ram for this site */
            uint16_t nburls;
            /* fifo of urls waiting to be fetched */
            url *fifo[maxUrlsBySite];
            uint8_t inFifo;
            uint8_t outFifo;
            void putInFifo(url *u);
            url *getInFifo();
            short fifoLength();
            /** Is this Site in a dnsSites */
            bool isInFifo;
            /** internet addr of this server */
            char dnsState;
            struct in_addr addr;
            uint ipHash;
            /* Date of expiration of dns call and robots.txt fetch */
            time_t dnsTimeout;
            /** test if a file can be fetched thanks to the robots.txt */
            bool testRobots(char *file);
            /* forbidden paths : given by robots.txt */
            Vector<char> forbidden;
            /** Put an url in the fifo
            * If there are too much, put it back in UrlsInternal
            * Never fill totally the fifo => call at least with 1 */
            void putGenericUrl(url *u, int limit, bool prio);
            inline void putUrl (url *u) { putGenericUrl(u, 15, false); }
            inline void putUrlWait (url *u) { putGenericUrl(u, 10, false); }
            inline void putPriorityUrl (url *u) { putGenericUrl(u, 5, true); }
            inline void putPriorityUrlWait (url *u) { putGenericUrl(u, 1, true); }
            /** Init a new dns query */
            void newQuery ();
            /** The dns query ended with success */
            void dnsAns (adns_answer *ans);
            /** we got the robots.txt, transfer what must be in IPSites */
            void robotsResult (FetchError res);
    };
    View Code
    ///////////////////////////////////////////////////////////
    // class NamedSite
    ///////////////////////////////////////////////////////////
    
    /** Constructor : initiate fields used by the program
     */
    NamedSite::NamedSite () 
    {
      name[0] = 0;
      nburls = 0;
      inFifo = 0; outFifo = 0;
      isInFifo = false;
      dnsState = waitDns;
      cname = NULL;
    }
    
    /** Destructor : This one is never used
     */
    NamedSite::~NamedSite () {
      assert(false);
    }
    
    /* Management of the Fifo */
    void NamedSite::putInFifo(url *u) {
      fifo[inFifo] = u;
      inFifo = (inFifo + 1) % maxUrlsBySite;
      assert(inFifo!=outFifo);
    }
    
    url *NamedSite::getInFifo() {
      assert (inFifo != outFifo);
      url *tmp = fifo[outFifo];
      outFifo = (outFifo + 1) % maxUrlsBySite;
      return tmp;
    }
    
    short NamedSite::fifoLength() {
      return (inFifo + maxUrlsBySite - outFifo) % maxUrlsBySite;
    }
    
    /* Put an url in the fifo if their are not too many */
    void NamedSite::putGenericUrl(url *u, int limit, bool prio) {
      if (nburls > maxUrlsBySite-limit) {
        // Already enough Urls in memory for this Site
        // first check if it can already be forgotten
        if (!strcmp(name, u->getHost())) {
          if (dnsState == errorDns) {
            nburls++;
            forgetUrl(u, noDNS);
            return;
          }
          if (dnsState == noConnDns) {
            nburls++;
            forgetUrl(u, noConnection);
            return;
          }
          if (u->getPort() == port
              && dnsState == doneDns && !testRobots(u->getFile())) {
            nburls++;
            forgetUrl(u, forbiddenRobots);
            return;
          }
        }
        // else put it back in URLsDisk
        refUrl();
        global::inter->getOne();
        if (prio) {
          global::URLsPriorityWait->put(u);
        } else {
          global::URLsDiskWait->put(u);
        }
      } else {
        nburls++;
        if (dnsState == waitDns
            || strcmp(name, u->getHost())
            || port != u->getPort()
            || global::now > dnsTimeout) {
          // dns not done or other site
          putInFifo(u);
          addNamedUrl();
          // Put Site in fifo if not yet in
          if (!isInFifo) {
            isInFifo = true;
            global::dnsSites->put(this);
          }
        } else switch (dnsState) {
        case doneDns:
          transfer(u);
          break;
        case errorDns:
          forgetUrl(u, noDNS);
          break;
        default: // noConnDns
          forgetUrl(u, noConnection);
        }
      }
    }
    
    /** Init a new dns query
     */
    void NamedSite::newQuery () 
    {
        // Update our stats
        newId();
        if (global::proxyAddr != NULL) 
        {
            // we use a proxy, no need to get the sockaddr
            // give anything for going on
            siteSeen();
            siteDNS();
            // Get the robots.txt
            dnsOK();
        } 
        else if (isdigit(name[0])) 
        {
            // the name already in numbers-and-dots notation
            siteSeen();
            if (inet_aton(name, &addr)) 
            {
                  // Yes, it is in numbers-and-dots notation
                  siteDNS();
                  // Get the robots.txt
                  dnsOK();
            } 
            else 
            {
                // No, it isn't : this site is a non sense
                dnsState = errorDns;
                dnsErr();
            }
        } 
        else 
        {
            // submit an adns query
            global::nbDnsCalls++;
            adns_query quer = NULL;
            adns_submit(global::ads, name,
                        (adns_rrtype) adns_r_addr,
                        (adns_queryflags) 0,
                        this, &quer);
        }
    }
    
    /** The dns query ended with success
     * assert there is a freeConn
     */
    void NamedSite::dnsAns (adns_answer *ans) 
    {
        if (ans->status == adns_s_prohibitedcname) 
        {
            if (cname == NULL) 
            {
                // try to find ip for cname of cname
                cname = newString(ans->cname);
                global::nbDnsCalls++;
                adns_query quer = NULL;
                adns_submit(global::ads, cname,
                      (adns_rrtype) adns_r_addr,
                      (adns_queryflags) 0,
                      this, &quer);
            } 
            else 
            {
                // dns chains too long => dns error
                // cf nslookup or host for more information
                siteSeen();
                delete [] cname; cname = NULL;
                dnsState = errorDns;
                dnsErr();
            }
        } 
        else 
        {
            siteSeen();
            if (cname != NULL) 
            { 
                delete [] cname; 
                cname = NULL; 
            }
            if (ans->status != adns_s_ok) 
            {
              // No addr inet
              dnsState = errorDns;
              dnsErr();
            } 
            else 
            {
              siteDNS();
              // compute the new addr
              memcpy (&addr,
                      &ans->rrs.addr->addr.inet.sin_addr,
                      sizeof (struct in_addr));
              // Get the robots.txt
              dnsOK();
            }
        }
    }
    
    /** we've got a good dns answer
     * get the robots.txt
     * assert there is a freeConn
     */
    void NamedSite::dnsOK () {
      Connexion *conn = global::freeConns->get();
      char res = getFds(conn, &addr, port);
      if (res != emptyC) {
        conn->timeout = timeoutPage;
        if (global::proxyAddr != NULL) {
          // use a proxy
          conn->request.addString("GET http://");
          conn->request.addString(name);
          char tmp[15];
          sprintf(tmp, ":%u", port);
          conn->request.addString(tmp);
          conn->request.addString("/robots.txt HTTP/1.0
    Host: ");
        } else {
          // direct connection
          conn->request.addString("GET /robots.txt HTTP/1.0
    Host: ");
        }
        conn->request.addString(name);
        conn->request.addString(global::headersRobots);
        conn->parser = new robots(this, conn);
        conn->pos = 0;
        conn->err = success;
        conn->state = res;
      } else {
        // Unable to get a socket
        global::freeConns->put(conn);
        dnsState = noConnDns;
        dnsErr();
      }
    }
    
    /** Cannot get the inet addr
     * dnsState must have been set properly before the call
     */
    void NamedSite::dnsErr () {
      FetchError theErr;
      if (dnsState == errorDns) {
        theErr = noDNS;
      } else {
        theErr = noConnection;
      }
      int ss = fifoLength();
      // scan the queue
      for (int i=0; i<ss; i++) {
        url *u = getInFifo();
        if (!strcmp(name, u->getHost())) {
          delNamedUrl();
          forgetUrl(u, theErr);
        } else { // different name
          putInFifo(u);
        }
      }
      // where should now lie this site
      if (inFifo==outFifo) {
        isInFifo = false;
      } else {
        global::dnsSites->put(this);
      }
    }
    
    /** test if a file can be fetched thanks to the robots.txt */
    bool NamedSite::testRobots(char *file) {
      uint pos = forbidden.getLength();
      for (uint i=0; i<pos; i++) {
        if (robotsMatch(forbidden[i], file))
          return false;
      }
      return true;
    }
    
    /** Delete the old identity of the site */
    void NamedSite::newId () {
      // ip expires or new name or just new port
      // Change the identity of this site
    #ifndef NDEBUG
      if (name[0] == 0) {
        addsite();
      }
    #endif // NDEBUG
      url *u = fifo[outFifo];
      strcpy(name, u->getHost());
      port = u->getPort();
      dnsTimeout = global::now + dnsValidTime;
      dnsState = waitDns;
    }
    
    /** we got the robots.txt,
     * compute ipHashCode
     * transfer what must be in IPSites
     */
    void NamedSite::robotsResult (FetchError res) {
      bool ok = res != noConnection;
      if (ok) {
        dnsState = doneDns;
        // compute ip hashcode
        if (global::proxyAddr == NULL) {
          ipHash=0;
          char *s = (char *) &addr;
          for (uint i=0; i<sizeof(struct in_addr); i++) {
            ipHash = ipHash*31 + s[i];
          }
        } else {
          // no ip and need to avoid rapidFire => use hostHashCode
          ipHash = this - global::namedSiteList;
        }
        ipHash %= IPSiteListSize;
      } else {
        dnsState = noConnDns;
      }
      int ss = fifoLength();
      // scan the queue
      for (int i=0; i<ss; i++) {
        url *u = getInFifo();
        if (!strcmp(name, u->getHost())) {
          delNamedUrl();
          if (ok) {
            if (port == u->getPort()) {
              transfer(u);
            } else {
              putInFifo(u);
            }
          } else {
            forgetUrl(u, noConnection);
          }
        } else {
          putInFifo(u);
        }
      }
      // where should now lie this site
      if (inFifo==outFifo) {
        isInFifo = false;
      } else {
        global::dnsSites->put(this);
      }  
    }
    
    void NamedSite::transfer (url *u) {
      if (testRobots(u->getFile())) {
        if (global::proxyAddr == NULL) {
          memcpy (&u->addr, &addr, sizeof (struct in_addr));
        }
        global::IPSiteList[ipHash].putUrl(u);
      } else {
        forgetUrl(u, forbiddenRobots);
      }
    }
    
    void NamedSite::forgetUrl (url *u, FetchError reason) {
      urls();
      fetchFail(u, reason);
      answers(reason);
      nburls--;
      delete u;
      global::inter->getOne();
    }
    View Code

    其中两个类的定义

    larbin-2.6.3/src/utils/PersistentFifo.h、larbin-2.6.3/src/utils/PersistentFifo.cc

    // Larbin
    // Sebastien Ailleret
    // 06-01-00 -> 12-06-01
    
    /* this fifo is stored on disk */
    
    #ifndef PERSFIFO_H
    #define PERSFIFO_H
    
    #include <dirent.h>
    #include <unistd.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    #include <errno.h>
    #include <string.h>
    
    #include "types.h"
    #include "utils/url.h"
    #include "utils/text.h"
    #include "utils/connexion.h"
    #include "utils/mypthread.h"
    
    class PersistentFifo 
    {
        protected:
            uint in, out;
            #ifdef THREAD_OUTPUT
            pthread_mutex_t lock;
            #endif
            // number of the file used for reading
            uint fileNameLength;
            // name of files
            int fin, fout;
            char *fileName;
    
        protected:
            // Make fileName fit with this number
            void makeName(uint nb);
            // Give a file name for this int
            int getNumber(char *file);
            // Change the file used for reading
            void updateRead ();
            // Change the file used for writing
            void updateWrite ();
    
        protected:
            // buffer used for readLine
            char outbuf[BUF_SIZE];
            // number of char used in this buffer
            uint outbufPos;
            // buffer used for readLine
            char buf[BUF_SIZE];
            // number of char used in this buffer
            uint bufPos, bufEnd;
            // sockets for reading and writing
            int rfds, wfds;
        protected:
            // read a line on rfds
            char *readLine ();
            // write an url in the out file (buffered write)
            void writeUrl (char *s);
            // Flush the out Buffer in the outFile
            void flushOut ();
    
        public:
            PersistentFifo (bool reload, char *baseName);
            ~PersistentFifo ();
    
            /* get the first object (non totally blocking)
            * return NULL if there is none
            */
            url *tryGet ();
    
            /* get the first object (non totally blocking)
            * probably crash if there is none
            */
            url *get ();
    
            /* add an object in the fifo */
            void put (url *obj);
    
            /* how many items are there inside ? */
            int getLength ();
    };
    
    #endif // PERSFIFO_H
    View Code
    // Larbin
    // Sebastien Ailleret
    // 27-05-01 -> 04-01-02
    
    #include <string.h>
    #include <assert.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <ctype.h>
    #include <iostream.h>
    
    #include "types.h"
    #include "global.h"
    #include "utils/mypthread.h"
    #include "utils/PersistentFifo.h"
    
    PersistentFifo::PersistentFifo (bool reload, char *baseName) 
    {
      fileNameLength = strlen(baseName)+5;
      fileName = new char[fileNameLength+2];
      strcpy(fileName, baseName);
      fileName[fileNameLength+1] = 0;
      outbufPos = 0;
      bufPos = 0;
      bufEnd = 0;
      mypthread_mutex_init(&lock, NULL);
      if (reload) 
      {
        DIR *dir = opendir(".");
        struct dirent *name;
    
        fin = -1;
        fout = -1;
        name = readdir(dir);
        while (name != NULL) 
        {
          if (startWith(fileName, name->d_name)) 
          {
            int tmp = getNumber(name->d_name);
            if (fin == -1) 
            {
              fin = tmp;
              fout = tmp;
            } 
            else 
            {
              if (tmp > fin)  { fin = tmp; }
              if (tmp < fout) { fout = tmp; }
            }
          }
          name = readdir(dir);
        }
        if (fin == -1) 
        {
          fin = 0;
          fout = 0;
        }
        if (fin == fout && fin != 0) 
        {
          cerr << "previous crawl was too little, cannot reload state
    "
               << "please restart larbin with -scratch option
    ";
          exit(1);
        }
        closedir(dir);
        in = (fin - fout) * urlByFile;
        out = 0;
        makeName(fin);
        wfds = creat (fileName, S_IRUSR | S_IWUSR);
        makeName(fout);
        rfds = open (fileName, O_RDONLY);
      } 
      else 
      {
        // Delete old fifos
        DIR *dir = opendir(".");
        struct dirent *name;
        name = readdir(dir);
        while (name != NULL) 
        {
          if (startWith(fileName, name->d_name)) 
          {
            unlink(name->d_name);
          }
          name = readdir(dir);
        }
        closedir(dir);
    
        fin = 0;
        fout = 0;
        in = 0;
        out = 0;
        makeName(0);
        wfds = creat (fileName, S_IRUSR | S_IWUSR);
        rfds = open (fileName, O_RDONLY);
      }
    }
    
    PersistentFifo::~PersistentFifo () 
    {
      mypthread_mutex_destroy (&lock);
      close(rfds);
      close(wfds);
    }
    
    url *PersistentFifo::tryGet () 
    {
      url *tmp = NULL;
      mypthread_mutex_lock(&lock);
      if (in != out) 
      {
        // The stack is not empty
        char *line = readLine();
        tmp = new url(line);
        out++;
        updateRead();
      }
      mypthread_mutex_unlock(&lock);
      return tmp;
    }
    
    url *PersistentFifo::get () 
    {
      mypthread_mutex_lock(&lock);
      char *line = readLine();
      url *res = new url(line);
      out++;
      updateRead();
      mypthread_mutex_unlock(&lock);
      return res;
    }
    
    /** Put something in the fifo
     * The objet is then deleted
     */
    void PersistentFifo::put (url *obj) 
    {
      mypthread_mutex_lock(&lock);
      char *s = obj->serialize(); // statically allocated string
      writeUrl(s);
      in++;
      updateWrite();
      mypthread_mutex_unlock(&lock);
      delete obj;
    } 
    int PersistentFifo::getLength () 
    {
      return in - out;
    }
    
    void PersistentFifo::makeName (uint nb) 
    {
      for (uint i=fileNameLength; i>=fileNameLength-5; i--) 
      {
        fileName[i] = (nb % 10) + '0';
        nb /= 10;
      }
    }
    
    int PersistentFifo::getNumber (char *file) 
    {
      uint len = strlen(file);
      int res = 0;
      for (uint i=len-6; i<=len-1; i++) 
      {
        res = (res * 10) + file[i] - '0';
      }
      return res;
    }
    
    void PersistentFifo::updateRead () 
    {
      if ((out % urlByFile) == 0) 
      {
        close(rfds);
        makeName(fout);
        unlink(fileName);
        makeName(++fout);
        rfds = open(fileName, O_RDONLY);
        in -= out;
        out = 0;
        assert(bufPos == bufEnd);
      }
    }
    
    void PersistentFifo::updateWrite () 
    {
      if ((in % urlByFile) == 0) 
      {
        flushOut();
        close(wfds);
        makeName(++fin);
        wfds = creat(fileName, S_IRUSR | S_IWUSR);
    #ifdef RELOAD
        global::seen->save();
    #ifdef NO_DUP
        global::hDuplicate->save();
    #endif
    #endif
      }
    }
    
    /* read a line from the file
     * uses a buffer
     */
    char *PersistentFifo::readLine () {
      if (bufPos == bufEnd) {
        bufPos = 0; bufEnd = 0; buf[0] = 0;
      }
      char *posn = strchr(buf + bufPos, '
    ');
      while (posn == NULL) {
        if (!(bufEnd - bufPos < maxUrlSize + 40 + maxCookieSize)) {
          printf(fileName);
          printf(buf+bufPos);
        }
        if (bufPos*2 > BUF_SIZE) {
          bufEnd -= bufPos;
          memmove(buf, buf+bufPos, bufEnd);
          bufPos = 0;
        }
        int postmp = bufEnd;
        bool noRead = true;
        while (noRead) {
          int rd = read(rfds, buf+bufEnd, BUF_SIZE-1-bufEnd);
          switch (rd) {
          case 0 :
            // We need to flush the output in order to read it
            flushOut();
            break;
          case -1 :
            // We have a trouble here
            if (errno != EINTR) {
              cerr << "Big Problem while reading (persistentFifo.h)
    ";
              perror("reason");
              assert(false);
            } else {
              perror("Warning in PersistentFifo: ");
            }
            break;
          default:
            noRead = false;
            bufEnd += rd;
            buf[bufEnd] = 0;
            break;
          }
        }
        posn = strchr(buf + postmp, '
    ');
      }
      *posn = 0;
      char *res = buf + bufPos;
      bufPos = posn + 1 - buf;
      return res;
    }
    
    // write an url in the out file (buffered write)
    void PersistentFifo::writeUrl (char *s) {
      size_t len = strlen(s);
      assert(len < maxUrlSize + 40 + maxCookieSize);
      if (outbufPos + len < BUF_SIZE) {
        memcpy(outbuf + outbufPos, s, len);
        outbufPos += len;
      } else {
        // The buffer is full
        flushOut ();
        memcpy(outbuf + outbufPos, s, len);
        outbufPos = len;
      }
    }
    
    // Flush the out Buffer in the outFile
    void PersistentFifo::flushOut () {
      ecrireBuff (wfds, outbuf, outbufPos);
      outbufPos = 0;
    }
    View Code

    Larbin-2.6.3/src/utils/syncFifo.h

    // Larbin
    // Sebastien Ailleret
    // 09-11-99 -> 07-12-01
    
    /* fifo in RAM with synchronisations */
    
    #ifndef SYNCFIFO_H
    #define SYNCFIFO_H
    
    #define std_size 100
    
    #include "utils/mypthread.h"
    
    template <class T>
    class SyncFifo {
     protected:
      uint in, out;
      uint size;
      T **tab;
    #ifdef THREAD_OUTPUT
      pthread_mutex_t lock;
      pthread_cond_t nonEmpty;
    #endif
    
     public:
      /* Specific constructor */
      SyncFifo (uint size = std_size);
    
      /* Destructor */
      ~SyncFifo ();
    
      /* get the first object */
      T *get ();
    
      /* get the first object (non totally blocking)
       * return NULL if there is none
       */
      T *tryGet ();
    
      /* add an object in the Fifo */
      void put (T *obj);
    
      /* how many itmes are there inside ? */
      int getLength ();
    };
    
    template <class T>
    SyncFifo<T>::SyncFifo (uint size) {
      tab = new T*[size];
      this->size = size;
      in = 0;
      out = 0;
      mypthread_mutex_init (&lock, NULL);
      mypthread_cond_init (&nonEmpty, NULL);
    }
    
    template <class T>
    SyncFifo<T>::~SyncFifo () {
      delete [] tab;
      mypthread_mutex_destroy (&lock);
      mypthread_cond_destroy (&nonEmpty);
    }
    
    template <class T>
    T *SyncFifo<T>::get () {
      T *tmp;
      mypthread_mutex_lock(&lock);
      mypthread_cond_wait(in == out, &nonEmpty, &lock);
      tmp = tab[out];
      out = (out + 1) % size;
      mypthread_mutex_unlock(&lock);
      return tmp;
    }
    
    template <class T>
    T *SyncFifo<T>::tryGet () {
      T *tmp = NULL;
      mypthread_mutex_lock(&lock);
      if (in != out) {
        // The stack is not empty
        tmp = tab[out];
        out = (out + 1) % size;
      }
      mypthread_mutex_unlock(&lock);
      return tmp;
    }
    
    template <class T>
    void SyncFifo<T>::put (T *obj) {
      mypthread_mutex_lock(&lock);
      tab[in] = obj;
      if (in == out) {
        mypthread_cond_broadcast(&nonEmpty);
      }
      in = (in + 1) % size;
      if (in == out) {
        T **tmp;
        tmp = new T*[2*size];
        for (uint i=out; i<size; i++) {
          tmp[i] = tab[i];
        }
        for (uint i=0; i<in; i++) {
          tmp[i+size] = tab[i];
        }
        in += size;
        size *= 2;
        delete [] tab;
        tab = tmp;
      }
      mypthread_mutex_unlock(&lock);
    }
    
    template <class T>
    int SyncFifo<T>::getLength () {
      int tmp;
      mypthread_mutex_lock(&lock);
      tmp = (in + size - out) % size;
      mypthread_mutex_unlock(&lock);
      return tmp;
    }
    
    #endif // SYNCFIFO_H
    View Code
  • 相关阅读:
    linux安装jenkins
    如何将接口进行限流
    java线程池思想
    一次缓存评估过程
    docker
    linux基本操作
    【安卓】App自动化环境搭建
    sheill之文本处理工具
    Liunx vim编辑器
    Liunx 远程
  • 原文地址:https://www.cnblogs.com/kaituorensheng/p/3789621.html
Copyright © 2011-2022 走看看