一.锁机制
1.普通锁
import threading,random,time gMoney=1000 gTotalTimes=10 gTtimes=0 gLock=threading.Lock() class Producer(threading.Thread): def run(self): global gMoney global gTtimes while True: money=random.randint(100,1000) gLock.acquire() if gTtimes>=gTotalTimes: gLock.release() break gMoney+=money print("%s挣了%d元钱,剩余%d元钱"%(threading.currentThread(),money,gMoney)) gTtimes+=1 gLock.release() time.sleep(0.5) class Consumer(threading.Thread): def run(self): global gMoney while True: money=random.randint(100,1000) gLock.acquire() if gMoney>=money: gMoney-=money print("%s消费了%d元钱,剩余%d元钱"%(threading.current_thread(),money,gMoney)) else: if gTtimes>=gTotalTimes: gLock.release() break print("%s准备消费%d元钱,但是余额不足"%(threading.current_thread(),money)) gLock.release() time.sleep(0.5) def main(): for x in range(3): t=Consumer(name="消费者线程%d"%x) t.start() for x in range(5): t=Producer(name="生产者线程%d"%x) t.start() if __name__=="__main__": main()
2.Condition
import threading import random from time import sleep ct = threading.Condition() all_money = 1000 # 基础金钱1000元 count = 10 # 限制生产者只可以生产十次 class producers(threading.Thread): '''生产者模式''' def run(self): global all_money global count while True: ct.acquire() # 处理数据前,先上锁 if count > 0: # 如果生产次数小于十次 money = random.randint(200,1000) # 随机生产200-1000元 all_money += money # 总金钱数 = 原总金钱数+生产金钱数 count -= 1 # 允许生产次数-1 print('生产者%s生产了%d元,剩余金钱%d元' % (threading.current_thread(), money, all_money)) else: # 如果生产次数已满10次 ct.release() # 解锁 break # 生产结束,跳出循环 ct.notify_all() # 通知所有等待中的消费者,生产已完成,可以开始消费 ct.release() # 解锁 sleep(0.5) class comsumer(threading.Thread): '''消费者模式''' def run(self): global all_money global count while True: ct.acquire() # 处理数据前,先上锁 money = random.randint(200,1000) # 随机消费200-1000元 # 下面这个while是重点!(敲黑板,记笔记,后面我会说到的) while money > all_money: # 如果需消费金额大于总金额,则等待至总金额大于需消费金钱 if count == 0: # 如果生产者生产次数已达上限 ct.release() # 结束前解锁 return # 结束函数 print('消费者%s需要消费%d元,剩余金钱%d元,不足' % (threading.current_thread(), money, all_money)) ct.wait() # 进入等待(阻塞进程) all_money -= money # 剩余金额大于消费金额时,总金额 = 原总金额 - 消费金额 print('消费者%s消费了%d元,剩余金钱%d元' % (threading.current_thread(), money, all_money)) ct.release() # 解锁 sleep(0.5) if __name__ == '__main__': for i in range(3): th = comsumer(name='线程%d'%i) th.start() for i in range(5): th = producers(name='线程%d'%i) th.start()
3.Queue
import threading from queue import Queue import time def set_value(qu): '''生成元素放入列队''' index = 0 while True: qu.put(index) index += 1 start = time.time() time.sleep(2) # 每隔三秒放入一个 end = time.time() print('阻塞时间为:',end-start,'秒threading.Thread') def get_value(qu): while True: print('数据:',qu.get()) # 列队中有数据就取出来,没有就等待 if __name__ == '__main__': qu = Queue(4) t1 = threading.Thread(target=set_value,args=[qu]) t2 = threading.Thread(target=get_value,args=[qu]) t1.start() t2.start()
二.多线程下载表情包
1.不用多线程
import requests,re from lxml import etree from urllib import request def get_page(url): header={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } response=requests.get(url,headers=header) text=response.text html=etree.HTML(text) imgs=html.xpath('//div[@class="page-content text-center"]//a[@class="col-xs-6 col-sm-3"]//img[@class!="gif"]') for img in imgs: img_url=img.get("data-original") #获取图片URL suffix=img_url.split(".")[-1] #获取后缀名JPG或者GIF alt=img.get("alt") #获取名字 alt=re.sub(r'[?.*,!!?。]',"",alt) #替换名字里的掉特殊字符 filename=alt+"."+suffix request.urlretrieve(img_url,"imgs/"+filename) def main(): for x in range(10): url="http://www.doutula.com/photo/list/?page=%s" %(x) get_page(url) if __name__=="__main__": main()
2.利用多线程
main()
- 定义两个队列,和创建多线程
- page_queue():存放每一页的url
- img_queue():存放每一页里面所有的表情的url
Producer()
- 从page_queue()队列中去每一页的url,直到队列为空则break
- 用xpath提取出每一页的所有图片的url
- 把每个图片的url和名字存放到img_queue()队列里面
Consumer()
- 从img_queue()队列中取出图片的url和名字
- 下载保存
- 直到page_queue()和img_queue()两个队列都为空则break
import requests from lxml import etree from urllib import request import os import re import threading from queue import Queue class Producer(threading.Thread): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36', 'Referer': 'https://movie.douban.com/' } def __init__(self, page_queue, img_queue, *args, **kwargs): super(Producer, self).__init__(*args, **kwargs) self.page_queue = page_queue self.img_queue = img_queue def run(self): while True: if self.page_queue.empty(): break url = self.page_queue.get() self.parse_page(url) def parse_page(self,url): response = requests.get(url,headers=self.headers) text = response.text html = etree.HTML(text) imgs = html.xpath("//div[@class='page-content text-center']//img[@class!='gif']") for img in imgs: # print(etree.tostring(img)) #图片地址 img_url = img.get('data-original') #图片名字 alt = img.get('alt') #替换掉名字里面的特殊字符 alt = re.sub(r'[??.,。!!*]','',alt) #获取图片的后缀名(.gif .jpg) suffix = os.path.splitext(img_url)[1] #保存的时候完整的图片名字 filename = alt + suffix self.img_queue.put((img_url,filename)) class Consumer(threading.Thread): def __init__(self,page_queue,img_queue,*args,**kwargs): super(Consumer, self).__init__(*args,**kwargs) self.page_queue = page_queue self.img_queue = img_queue def run(self): while True: if self.img_queue.empty() and self.page_queue.empty(): break img_url,filename = self.img_queue.get() request.urlretrieve(img_url, 'C:/Users/Administrator/Desktop/images/' + filename) print("已下载完一张图片") def main(): page_queue = Queue(1000) img_queue = Queue(10000) for x in range(1,1758): url = 'http://www.doutula.com/photo/list/?page=%d'%x page_queue.put(url) for x in range(10): t = Producer(page_queue,img_queue) t.start() for x in range(10): t = Consumer(page_queue,img_queue) t.start() if __name__ == '__main__': main()