zoukankan      html  css  js  c++  java
  • python 并发 进程 数据共享 锁 进程池

    面向对象补充:对象中设置值

    class Foo(object):
    
        def __init__(self):
            object.__setattr__(self, 'info', {}) # 在对象中设置值的本质
    
        def __setattr__(self, key, value):
            self.info[key] = value
    
        def __getattr__(self, item):
            print(item)
            return self.info[item]
    
    obj = Foo()
    obj.name = 'alex'
    print(obj.name)
    v = []
    for i in range(10000):
        v.append(i)
    
    print(v)

    1. 进程

      进程间数据不共享

    data_list = []
    
    def task(arg):
        data_list.append(arg)
        print(data_list)
    
    
    def run():
        for i in range(10):
            p = multiprocessing.Process(target=task,args=(i,))
            # p = threading.Thread(target=task,args=(i,))
            p.start()
    
    if __name__ == '__main__':
        run()

    常用功能:

    import time
    def task(arg):
        time.sleep(2)
        print(arg)
    
    
    def run():
        print('111111111')
        p1 = multiprocessing.Process(target=task,args=(1,))
        p1.name = 'pp1'
        p1.start()
        print('222222222')
    
        p2 = multiprocessing.Process(target=task, args=(2,))
        p2.name = 'pp2'
        p2.start()
        print('333333333')
    
    if __name__ == '__main__':
        run()

    类继承方式创建进程:

    class MyProcess(multiprocessing.Process):
    
        def run(self):
            print('当前进程',multiprocessing.current_process())
    
    
    def run():
        p1 = MyProcess()
        p1.start()
    
        p2 = MyProcess()
        p2.start()
    
    if __name__ == '__main__':
        run()

    2.进程间数据共享:   (multiprocessing.Queue    ,      Manager)

    import multiprocessing
    import threading      #第一种
    import queue
    import time
     q = multiprocessing.Queue()
    
     def task(arg,q):
         q.put(arg)
    
    
     def run():
         for i in range(10):
             p = multiprocessing.Process(target=task, args=(i, q,))
             p.start()
    
         while True:
             v = q.get()
                print(v)
     if __name__ == '__main__':
         run()
    
    
    
    
    def task(arg,dic):
        time.sleep(2)
        dic[arg] = 100
    
    if __name__ == '__main__':
        m = multiprocessing.Manager()    #第二种
        dic = {}
        process_list = []
        for i in range(10):
            p = multiprocessing.Process(target=task, args=(i,dic,))
            p.start()
    
            process_list.append(p)
    
        while True:
            count = 0
            for p in process_list:
                if not p.is_alive():
                    count += 1
            if count == len(process_list):
                break
        print(dic)

    进程间的数据其他电脑:

    lock = multiprocessing.RLock()
    def task(arg,):
        print("鬼子扛枪")
        lock.acquire()
        time.sleep(2)
        print(arg)
        lock.release()
    if __name__ == '__main__':
        while True:
        ........

    3.进程锁:

    import time
    import threading
    import multiprocessing
    
    
    lock = multiprocessing.RLock()
    
    def task(arg):
        print('鬼子来了')
        lock.acquire()
        time.sleep(2)
        print(arg)
        lock.release()
    
    
    if __name__ == '__main__':
        p1 = multiprocessing.Process(target=task,args=(1,))
        p1.start()
    
        p2 = multiprocessing.Process(target=task, args=(2,))
        p2.start()

    4.进程池

    import time
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    
    def task(arg):
        time.sleep(2)
        print(arg)
    
    if __name__ == '__main__':
    
        pool = ProcessPoolExecutor(5)
        for i in range(10):
            pool.submit(task,i)

    5.初始爬虫.

      安装 :  pip3  install  requests

          pip3  install  beautifulsoup4

      问题 : 找不到内部指令?

            方式一 : C:UsersSFAppDataLocalProgramsPythonPython36Scriptspip3 install  requests

          方式二 :  C:UsersSFAppDataLocalProgramsPythonPython36Scripts

          pip3  install  requests

    简单爬虫示例:

    import requests
    from bs4 import BeautifulSoup
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    
    
    # 模拟浏览器发送请求
    # 内部创建 sk = socket.socket()
    # 和抽屉进行socket连接 sk.connect(...)
    # sk.sendall('...')
    # sk.recv(...)
    
    def task(url):
        print(url)
        r1 = requests.get(
            url=url,
            headers={
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
            }
        )
    
        # 查看下载下来的文本信息
        # soup = BeautifulSoup(r1.text,'html.parser')
        # print(soup.text)
        # content_list = soup.find('div',attrs={'id':'content-list'})
        # for item in content_list.find_all('div',attrs={'class':'item'}):
        #     title = item.find('a').text.strip()
        #     target_url = item.find('a').get('href')
        #     print(title,target_url)
    
    def run():
        pool = ThreadPoolExecutor(5)
        for i in range(1,50):
            pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
    
    
    if __name__ == '__main__':
        run()

    进程和线程那个好?

    回答是:  线程好

    进程池/  线程池的应用   与爬虫有关

      

      

  • 相关阅读:
    项目
    关于我
    【转载】罗胖精选|什么样的自控方法才有效?
    知识管理——得到CEO脱不花女士的一次分享
    注意由双大括号匿名类引起的serialVersionUID编译告警
    持续集成、持续交付和持续部署
    Google Cayley图数据库使用方法
    任务的属性
    团队博客地址
    个人总结
  • 原文地址:https://www.cnblogs.com/SUIFAN/p/9637968.html
Copyright © 2011-2022 走看看