引入:
- 相关的门户网站在进行登录的时候,如果用户连续登录的次数超过3次或者5次的时候,就会在登录页中动态生成验证码。通过验证码达到分流和反爬的效果。
今日概要:
- 使用云打码平台识别验证码
云打码官网地址: http://www.yundama.com/
云打码使用流程:
注册:普通用户和开发者用户(两个都要注册)
登陆:
— 普通用户:查询余额
— 开发者用户:
- 创建一个软件:我的软件 --> 添加一个软件
- 下载示例代码:点击开发文档 -> 调用示例及最新的DLL -> 点击PythonHttp示例下载,即可下载
开发者用户下载PythonHttp示例使用:
1. 解压下载好的PythonHttp调用示例,文件中有包含的验证图片,YDMHTTPDemo2.x.py,YDMHTTPDemo2.x.p3 三个文件
2. 使用时,将验证图片 和 YDMHTTPDemo2.x.p3 内容导入到项目即可使用
1 import http.client, mimetypes, urllib, json, time, requests 2 3 ###################################################################### 4 5 class YDMHttp: 6 7 apiurl = 'http://api.yundama.com/api.php' 8 username = '' 9 password = '' 10 appid = '' 11 appkey = '' 12 13 def __init__(self, username, password, appid, appkey): 14 self.username = username 15 self.password = password 16 self.appid = str(appid) 17 self.appkey = appkey 18 19 def request(self, fields, files=[]): 20 response = self.post_url(self.apiurl, fields, files) 21 response = json.loads(response) 22 return response 23 24 def balance(self): 25 data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} 26 response = self.request(data) 27 if (response): 28 if (response['ret'] and response['ret'] < 0): 29 return response['ret'] 30 else: 31 return response['balance'] 32 else: 33 return -9001 34 35 def login(self): 36 data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} 37 response = self.request(data) 38 if (response): 39 if (response['ret'] and response['ret'] < 0): 40 return response['ret'] 41 else: 42 return response['uid'] 43 else: 44 return -9001 45 46 def upload(self, filename, codetype, timeout): 47 data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)} 48 file = {'file': filename} 49 response = self.request(data, file) 50 if (response): 51 if (response['ret'] and response['ret'] < 0): 52 return response['ret'] 53 else: 54 return response['cid'] 55 else: 56 return -9001 57 58 def result(self, cid): 59 data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)} 60 response = self.request(data) 61 return response and response['text'] or '' 62 63 def decode(self, filename, codetype, timeout): 64 cid = self.upload(filename, codetype, timeout) 65 if (cid > 0): 66 for i in range(0, timeout): 67 result = self.result(cid) 68 if (result != ''): 69 return cid, result 70 else: 71 time.sleep(1) 72 return -3003, '' 73 else: 74 return cid, '' 75 76 def report(self, cid): 77 data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'} 78 response = self.request(data) 79 if (response): 80 return response['ret'] 81 else: 82 return -9001 83 84 def post_url(self, url, fields, files=[]): 85 for key in files: 86 files[key] = open(files[key], 'rb'); 87 res = requests.post(url, files=files, data=fields) 88 return res.text 89 90 ###################################################################### 91 92 # 用户名 (指的是普通用户的用户名和密码) 93 username = 'username' 94 95 # 密码 96 password = 'password' 97 98 # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得! 99 appid = 1 100 101 # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得! 102 appkey = '22cc5376925e9387a23cf797cb9ba745' 103 104 # 图片文件 105 filename = 'getimage.jpg' 106 107 # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html 108 codetype = 1004 109 110 # 超时时间,秒 111 timeout = 60 112 113 # 检查 114 if (username == 'username'): 115 print('请设置好相关参数再测试') 116 else: 117 # 初始化 118 yundama = YDMHttp(username, password, appid, appkey) 119 120 # 登陆云打码 121 uid = yundama.login(); 122 print('uid: %s' % uid) 123 124 # 查询余额 125 balance = yundama.balance(); 126 print('balance: %s' % balance) 127 128 # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 129 cid, result = yundama.decode(filename, codetype, timeout); 130 print('cid: %s, result: %s' % (cid, result)) 131 132 ######################################################################
云打码平台处理验证码的实现流程:
代码展示:
爬取人人网登陆后的页面,需要处理验证
import http.client, mimetypes, urllib, json, time, requests
######################################################################
class YDMHttp:
apiurl = 'http://api.yundama.com/api.php'
username = ''
password = ''
appid = ''
appkey = ''
def __init__(self, username, password, appid, appkey):
self.username = username
self.password = password
self.appid = str(appid)
self.appkey = appkey
def request(self, fields, files=[]):
response = self.post_url(self.apiurl, fields, files)
response = json.loads(response)
return response
def balance(self):
data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['balance']
else:
return -9001
def login(self):
data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['uid']
else:
return -9001
def upload(self, filename, codetype, timeout):
data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}
file = {'file': filename}
response = self.request(data, file)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['cid']
else:
return -9001
def result(self, cid):
data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)}
response = self.request(data)
return response and response['text'] or ''
def decode(self, filename, codetype, timeout):
cid = self.upload(filename, codetype, timeout)
if (cid > 0):
for i in range(0, timeout):
result = self.result(cid)
if (result != ''):
return cid, result
else:
time.sleep(1)
return -3003, ''
else:
return cid, ''
def report(self, cid):
data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'}
response = self.request(data)
if (response):
return response['ret']
else:
return -9001
def post_url(self, url, fields, files=[]):
for key in files:
files[key] = open(files[key], 'rb');
res = requests.post(url, files=files, data=fields)
return res.text
def get_code_text(codeType,imgPath):
# 用户名 (指的是普通用户的用户名和密码)
username = 'mwhshare'
# 密码
password = 'mwh@4598105'
# 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
appid = 6596
# 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
appkey = '515bcabfb89e3a824619a1d1c8b25f36'
# 图片文件
filename = imgPath
# 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
codetype = codeType
# 超时时间,秒
timeout = 20
# 检查
if (username == 'username'):
print('请设置好相关参数再测试')
else:
# 初始化
yundama = YDMHttp(username, password, appid, appkey)
# 登陆云打码
uid = yundama.login();
print('uid: %s' % uid)
# 查询余额
balance = yundama.balance();
print('balance: %s' % balance)
# 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
cid, result = yundama.decode(filename, codetype, timeout);
print('cid: %s, result: %s' % (cid, result))
return result
import requests
from lxml import etree
from urllib import request
# 获取一个session对象
session = requests.session()
#session对象和requests作用几乎一样,都可以进行请求的发送,并且请求发送的方式也是一致的,
#session进行请求的发送,如果会产生cookie的话,则cookie会自动被存储到session对象中
# 1. 获取人人网验证码图片:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
url ='http://www.renren.com/'
page_text = requests.get(url=url,headers=headers).text
#将验证码图片解析出来且进行持久化存储
tree = etree.HTML(page_text)
# 拿到当前人人网验证码图片的路径
code_img_src = tree.xpath('//*[@id="verifyPic_login"]/@src')[0]
# 将图片储存到当前文件下
request.urlretrieve(url=code_img_src,filename="./code.jpg")
code = get_code_text(2004,'./code.jpg')
# print(code)
# 模拟登陆
#拿到登陆所对应的url
login_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019031446506 '
data = {
"email":"1696755793@qq.com",
"icod":code,
"origURL":"http://www.renren.com/home",
"domain":"renren.com",
"key_id":"1",
"captcha_type":"web_login",
"password":"4f1d552d7dc5ba646e93e653da9b06e5a24dceda905323a830e19f6352ae8bc0",
"rkey":"9e75e8dc3457b14c55a74627fa64fb43",
"f":""
}
#进行登录,当登录成功之后,可以获取cookie
#cookie就会被存储到session中
response = session.post(url=login_url,headers=headers,data=data)
#对登录成功后对应的当前用户的个人详情页进行请求发送
detail_url = 'http://www.renren.com/969393866/profile'
#该次get请求使用的是session对象,该请求已经携带了cookie
page_text = session.get(url=detail_url,headers=headers).text
with open("./renren.html","w",encoding="utf-8") as fp:
fp.write(page_text)
print("下载完毕")