一、进程
1.进程间数据不共享

import multiprocessing data_list = [] def task(arg): data_list.append(arg) print(data_list) def run(): for i in range(10): m = multiprocessing.Process(target=task,args=(i,)) m.start() if __name__ == '__main__': run() # [0] #分别打印了一个列表 # [1] # [2] # [3] # [4] # [5] # [6] # [7] # [8] # [9]
常用功能:
join、deamon、name、multiprocessing.current_process()、multiprocessing.current_process().ident/pid

import time import multiprocessing def task(arg): time.sleep(2) print(arg) def run(): print(1111111) p1 = multiprocessing.Process(target=task,args=(1,)) p1.name = "pp1" p1.start() print(2222222) p2 = multiprocessing.Process(target=task,args=(2,)) p2.name = "pp2" p2.start() print(33333333) if __name__ == "__main__": run()
通过继承方式创建进程

class MyProcess(multiprocessing.Process): def run(self): print("当前进度",multiprocessing.Process) def run(): p1 = MyProcess() p1.start() p2 = MyProcess() p2.start() if __name__ == '__main__': run()
2.进程间数据共享
Queue
linux:

q = multiprocessing.Queue() def task(arg,q): q.put(arg) def run(): for i in range(1,11): p = multiprocessing.Process(target=task,args=(i,q)) p.start() while 1: v = q.get() print(v) if __name__ == '__main__': run()
windows:

def task(arg,q): q.put(arg) if __name__ == '__main__': q = multiprocessing.Queue() for i in range(10): p = multiprocessing.Process(target=task,args=(i,q)) p.start() while 1: v = q.get() print(v)
Manage
linux:

m = multiprocessing.Manager() dic = m.dict() def task(arg): dic[arg] = 100 def run(): for i in range(10): p = multiprocessing.Process(target=task,args=(i,)) p.start() input(">>>>>>") print(dic.values()) if __name__ == '__main__': run()
windows:

def task(arg,dic): dic[arg] = 100 def run(): m = multiprocessing.Manager() dic = m.dict() lis = [] for i in range(10): p = multiprocessing.Process(target=task,args=(i,dic,)) p.start() lis.append(p) while 1: count = 0 for p in lis: if not p.is_alive(): count += 1 if count == len(lis): break print(dic) if __name__ == '__main__': run()
3.进程锁:
和线程锁种类用法一致
4.进程池:

import time from concurrent.futures import ProcessPoolExecutor def task(arg): time.sleep(2) print(arg) if __name__ == '__main__': pool = ProcessPoolExecutor(5) for i in range(10): pool.submit(task,i)
requests模块的简单爬虫
安装:(cmd)
pip3 install requests
pip3 install beautifulsoup4

import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def task(url): r1 = requests.get(url=url,headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36' }) soup = BeautifulSoup(r1.text,"html.parser") content_list = soup.find('div',attrs={'id':'content-list'}) for item in content_list.find_all('div',attrs={'class':'item'}): title = item.find('a').text.strip() target_url = item.find('a').get('href') print(title,target_url) def run(): pool = ThreadPoolExecutor(5) for i in range(1,50): pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i) if __name__ == '__main__': run()
以上示例用多线程好
requests模块模拟浏览器发送请求
requests.get():
线程和线程池