zoukankan      html  css  js  c++  java
  • Python threading

    eg: Error

    self._target(*self._args, **self._kwargs) TypeError: get_zonghe_answers() takes 1 positional argument but 36 were given

    Instead, you should provide args a tuple:

      t = threading.Thread(target=startSuggestworker, args = (start_keyword,))

    http://blog.gusibi.com/post/python-thread-note/

    eg: Download data 

         

     import *  #看情况自己导入
    
    all_threads_ret = [] 用于各个进程爬取的数据
    def get_zonghe_answers(url):
        ....
        data = get_data_from_url(url)
        all_threads_ret.append(data)
    def mutiprocess_thread():
        import threading
        df = pd.read_csv('zonghe.csv', header=None)
        df_data = np.array(df).flatten()
        all_threads = []
        #init thread
        for url in df_data:
            init_th = threading.Thread(target=get_zonghe_answers,args=(url,))
            all_threads.append(init_th)
        for sth in all_threads:
            sth.start()
        for sth in all_threads:
            sth.join()
        pd.DataFrame(data=all_threads_ret).to_csv('zonghe_answer222.csv', header=0, index= 0)
        print(len(all_threads))
    
    if  __name__ == "__main__":
        mutiprocess_thread()  

    eg: Queue,Producter,Consumer,使用Threading模块创建

       使用Threading模块创建线程,继承threading.Thread,然后重写__init__方法和run方法:这种方式是推荐的方式.

       

    #encoding: utf-8
     
    import urllib
    import threading
    from bs4 import BeautifulSoup
    import requests
    import os
    import time
     
    # 表情链接列表
    FACE_URL_LIST = []
    # 页面链接列表
    PAGE_URL_LIST = []
    # 构建869个页面的链接
    BASE_PAGE_URL = 'https://www.doutula.com/photo/list/?page='
    for x in range(1, 870):
        url = BASE_PAGE_URL + str(x)
        PAGE_URL_LIST.append(url)
     
    # 初始化锁
    gLock = threading.Lock()
     
    # 生产者,负责从每个页面中提取表情的url
    class Producer(threading.Thread):
        def run(self):
            while len(PAGE_URL_LIST) > 0:
                # 在访问PAGE_URL_LIST的时候,要使用锁机制
                gLock.acquire()
                page_url = PAGE_URL_LIST.pop()
                # 使用完后要及时把锁给释放,方便其他线程使用
                gLock.release()
                response = requests.get(page_url)
                soup = BeautifulSoup(response.content, 'lxml')
                img_list = soup.find_all('img', attrs={'class': 'img-responsive lazy image_dta'})
                gLock.acquire()
                for img in img_list:
                    src = img['data-original']
                    if not src.startswith('http'):
                        src = 'http:'+ src
                    # 把提取到的表情url,添加到FACE_URL_LIST中
                    FACE_URL_LIST.append(src)
                gLock.release()
                time.sleep(0.5)
     
    # 消费者,负责从FACE_URL_LIST提取表情链接,然后下载
    class Consumer(threading.Thread):
        def run(self):
            print '%s is running' % threading.current_thread
            while True:
                # 上锁
                gLock.acquire()
                if len(FACE_URL_LIST) == 0:
                    # 不管什么情况,都要释放锁
                    gLock.release()
                    continue
                else:
                    # 从FACE_URL_LIST中提取数据
                    face_url = FACE_URL_LIST.pop()
                    gLock.release()
                    filename = face_url.split('/')[-1]
                    path = os.path.join('images', filename)
                    urllib.urlretrieve(face_url, filename=path)
     
    if __name__ == '__main__':
        # 2个生产者线程,去从页面中爬取表情链接
        for x in range(2):
            Producer().start()
     
        # 5个消费者线程,去从FACE_URL_LIST中提取下载链接,然后下载
        for x in range(5):
            Consumer().start()
    #! /usr/bin/env python
    #encoding=utf-8
     
    import threading
    import time
    from Queue import Queue
     
    def readFile():
        file_object = open('/opt/dev/python/list.dat')
        global queue
        for line in file_object:                    
            queue.put(line)
     
    class Consumer(threading.Thread):
        def run(self):
            global queue
            while queue.qsize() > 0:
                msg = self.name + '消费了 '+queue.get()
                print msg
                time.sleep(0.01)
     
    queue = Queue()
    def main():
        readFile()
        for i in range(5):
            c = Consumer()
            c.start()
     
    if __name__ == '__main__':
        main()
  • 相关阅读:
    Express中间件简介
    Express中间件概念
    浏览器cookie插件
    node=day7
    cookie可视化操作工具---EditThisCookie
    node之cookie和session对比
    node通过session保存登录状态
    浅谈表单同步提交和异步提交
    node.js服务端存储用户密码md5加密
    jQuery的ajax里dataType预期服务器返回数据类型
  • 原文地址:https://www.cnblogs.com/cbugs/p/9846304.html
Copyright © 2011-2022 走看看