zoukankan      html  css  js  c++  java
  • Linux企业级项目实践之网络爬虫(21)——扩展为多任务爬虫

    高效的网络爬虫是搜索引擎的重要基础。采用多任务并发执行,实现类似于CPU的流水线(pipeline)运行方式,可极大地提高网络和计算资源的利用率等性能。


    #include "threads.h"

    #include "spider.h"
    #include "confparser.h"
     
    /* the number of current running thread */
    int g_cur_thread_num = 0;
    
    /* lock for changing g_cur_thread_num's value */
    pthread_mutex_t gctn_lock = PTHREAD_MUTEX_INITIALIZER;
    
    int create_thread(void *(*start_func)(void *), void * arg, pthread_t *pid, pthread_attr_t * pattr)
    {
        pthread_attr_t attr;
        pthread_t pt;
    
        if (pattr == NULL) {
            pattr = &attr;
            pthread_attr_init(pattr);
            pthread_attr_setstacksize(pattr, 1024*1024);
            pthread_attr_setdetachstate(pattr, PTHREAD_CREATE_DETACHED);
        }
    
        if (pid == NULL)
            pid = &pt;
    
        int rv = pthread_create(pid, pattr, start_func, arg);
        pthread_attr_destroy(pattr);
        return rv;
    }
    
    void begin_thread()
    {
        SPIDER_LOG(SPIDER_LEVEL_DEBUG, "Begin Thread %lu", pthread_self());
    }
    
    void end_thread()
    {
        pthread_mutex_lock(&gctn_lock);	
        int left = g_conf->max_job_num - (--g_cur_thread_num);
        if (left == 1) {
            /* can start one thread */
            attach_epoll_task();
        } else if (left > 1) {
            /* can start two thread */
            attach_epoll_task();
            attach_epoll_task();
        } else {
            /* have reached g_conf->max_job_num , do nothing */
        }
        SPIDER_LOG(SPIDER_LEVEL_DEBUG, "End Thread %lu, cur_thread_num=%d", pthread_self(), g_cur_thread_num);
        pthread_mutex_unlock(&gctn_lock);	
    }
    


  • 相关阅读:
    免费馅饼(HDU 1176 DP)
    搬寝室(HDU 1421 DP)
    FatMouse's Speed(HDU LIS)
    Bone Collector II(HDU 2639 DP)
    Palindrome(POJ 1159 DP)
    Proud Merchants(POJ 3466 01背包+排序)
    树的最大独立集
    Roads in the North(POJ 2631 DFS)
    Starship Troopers(HDU 1011 树形DP)
    Strategic game(POJ 1463 树形DP)
  • 原文地址:https://www.cnblogs.com/new0801/p/6176990.html
Copyright © 2011-2022 走看看