进程:系统分配资源的最小单位,一般可以理解为"正在运行中的程序"
1.通过multiprocessing模块开启进程
from multiprocessing import Process import os import time import random def func(name, age): print(f'发送一封邮件给{age}岁的{name}') time.sleep(random.random()) # 堵塞 print('发送完毕') if __name__ == '__main__': print(f'当前进程{os.getpid()}, 父进程{os.getppid()}') args_lst = [('aelx', 84), ('wusir', 60), ('eva', 36)] p_ls = [] for i in args_lst: p = Process(target=func, args=i) p.start() p_ls.append(p) for p in p_ls: p.join() # 同步阻塞 print('所有邮件发送完毕')
2.开启进程的其他方法
import os from multiprocessing import Process class MyProcess(Process): def __init__(self, a, b, c): super().__init__() self.a = a self.b = b self.c = c def run(self): """开启子进程时自动调用该方法""" print(os.getppid(), os.getpid(), self.a, self.b, self.c) if __name__ == '__main__': print(os.getpid()) p = MyProcess(1, 2, 3) p.start()
3.Process类的其他属性和方法
import os import time from multiprocessing import Process class MyProcess(Process): def __init__(self, a, b, c): super().__init__() self.a = a self.b = b self.c = c def run(self): """开启子进程时自动调用该方法""" time.sleep(1) print(os.getppid(), os.getpid(), self.a, self.b, self.c) if __name__ == '__main__': print(os.getpid()) p = MyProcess(1, 2, 3) p.start() print(p.pid, p.ident) # 当前子进程的id print(p.name) print(p.is_alive()) p.terminate() # 强制结束一个子进程,异步非堵塞 time.sleep(0.01) print(p.is_alive())
4.守护进程
from multiprocessing import Process import time def son1(): while True: print('in son1') time.sleep(1) def son2(): for i in range(10): print('in son2') time.sleep(1) if __name__ == '__main__': p1 = Process(target=son1) p1.daemon = True # 设置p是一个守护进程 p1.start() p2 = Process(target=son2) p2.start() # time.sleep(3) # 这里相当于给了守护进程3秒的执行时间 p2.join() # 使守护进程在p2结束后才结束 # 主进程会等待子进程结束,是为了回收子进程的资源 # 守护进程会等待主进程的代码执行结束后再结束,而不是等待整个主进程结束 # 因为守护进程实质上也是一个子进程,它结束后主进程才能回收其资源
5.进程的队列
""" 进程之间数据隔离 进程之间通信:Inter Process communication --> IPC 基于文件 同一台机器上的多个进程之间通信 基于socket的文件级别的通信来完成数据传递 基于网络 同一台机器或者多台机器上的多进程通信 第三方工具(消息中间件): memcache redis rabbitmq kafka """ from multiprocessing import Queue, Process def son(q): q.put('hello') if __name__ == '__main__': q = Queue() Process(target=son, args=(q, )).start() print(q.get()) # 获取在子进程中往队列中添加的值
6.生产者消费者模型
""" 生产者消费者模型: 爬虫的时候: 分布式操作:celery分布式框架 本质: 让生产数据和消费数据的效率都达到平衡并且最大化的效率 """ from multiprocessing import Queue, Process import random import time def consumer(q): # 消费者 while True: if q.get(): print(q.get()) else: break def producer(q, name, food): # 生产者 for i in range(1, 11): foodi = f"{food}->{i}" print(f"{name}生产了{i}个{food}") time.sleep(random.random()) q.put(foodi) if __name__ == '__main__': q = Queue() c1 = Process(target=consumer, args=(q,)) p1 = Process(target=producer, args=(q, 'alex', '冰淇淋')) p2 = Process(target=producer, args=(q, 'wusir', '芋圆')) c1.start() p1.start() p2.start() p1.join() p2.join() q.put(None) # p1,p2生产完后,给消费者发送一个None消息(如果有多个消费者,就要发送多个None)
7.异步阻塞与基于生产者消费者模型的爬虫例子
import requests from multiprocessing import Process, Queue """ 异步阻塞: 不确定子进程的执行顺序,且不知道谁的执行结果先返回 """ url_ls = list() for i in range(1, 6): if i == 1: url = 'https://www.dygod.net/html/gndy/dyzz/' else: url = 'https://www.dygod.net/html/gndy/dyzz/' + 'index_' + str(i) + '.html' url_ls.append(url) def producer(i, url, q): ret = requests.get(url) # print(ret.text.encode('latin1').decode('gbk')) # q.put((i, ret.status_code)) q.put((i, ret.text.encode('latin1').decode('gbk'))) def consumer(q): while True: t = q.get() if t is None: break with open(f"电影天堂第{t[0]}页.html", encoding='gbk', mode='w') as f: f.write(t[1]) if __name__ == '__main__': q = Queue() p_ls = [] for index, url in enumerate(url_ls, 1): p = Process(target=producer, args=(index, url, q)) p.start() p_ls.append(p) # for i in range(len(url_ls)): # print(q.get()) Process(target=consumer, args=(q,)).start() for p in p_ls: p.join() q.put(None)
8.进程之间通过Manage类实现数据共享
from multiprocessing import Process, Manager, Lock def change_dic(dic, lock): with lock: dic['count'] -= 1 if __name__ == '__main__': m = Manager() # 进程之间数据是隔离的,需要通过Manager使某个数据共享 dic = m.dict({'count': 100}) lock = Lock() p_l = [] for i in range(100): p = Process(target=change_dic, args=(dic, lock)) p.start() p_l.append(p) for p in p_l:p.join() print(dic)