0、检测IP是否可用
# -*- coding: UTF-8 -*- from urllib import request if __name__ == "__main__": #访问网址 #url = 'http://2017.ip138.com/ic.asp' url = 'http://www.whatismyip.com.tw' #这是代理IP proxy = {'https':'218.26.217.77:3128'} #创建ProxyHandler proxy_support = request.ProxyHandler(proxy) #创建Opener opener = request.build_opener(proxy_support) #添加UserAngent opener.addheaders = [ ('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36'), ('Host','www.whatismyip.com.tw') ] #安装OPener request.install_opener(opener) #使用自己安装好的Opener response = request.urlopen(url) #读取相应信息并解码 html = response.read().decode("utf-8") #打印信息 print(html)
1、在 middlewares.py 中添加如下代码,找到 IP 后粘贴在对应位置【找不到可以去淘宝买】
# -*- coding: utf-8 -*- # 导入随机模块 import random # 导入有关IP池有关的模块 from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware # 导入有关用户代理有关的模块 from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware # IP池 class HTTPPROXY(HttpProxyMiddleware): # 初始化 注意一定是 ip='' def __init__(self, ip=''): self.ip = ip def process_request(self, request, spider): item = random.choice(IPPOOL) try: print("当前的IP是:"+item["ipaddr"]) request.meta["proxy"] = "http://"+item["ipaddr"] except Exception as e: print(e) pass # 设置IP池 IPPOOL = [ {"ipaddr": "182.117.102.10:8118"}, {"ipaddr": "121.31.102.215:8123"}, {"ipaddr": "1222.94.128.49:8118"} ] # 用户代理 class USERAGENT(UserAgentMiddleware): #初始化 注意一定是 user_agent='' def __init__(self, user_agent=''): self.user_agent = user_agent def process_request(self, request, spider): item = random.choice(UPPOOL) try: print("当前的User-Agent是:"+item) request.headers.setdefault('User-Agent', item) except Exception as e: print(e) pass # 设置用户代理池 UPPOOL = [ "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393" ]
2、在 settngs.py 中添加一下代码(注意根据项目名修改指向,如这里的工程名是“demo”)
DOWNLOADER_MIDDLEWARES = { # 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware':123, # 'demo3.middlewares.HTTPPROXY' : 125, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 2, 'demo.middlewares.USERAGENT': 1 #demo是项目名 }