zoukankan      html  css  js  c++  java
  • 爬虫之打码平台(超级鹰)破解验证码等相关内容-136

    爬拉勾网职位信息



    #https://www.lagou.com/jobs/positionAjax.json?city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false
    import requests
    #实际要爬取的url
    url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'

    payload = {
       'first': 'true',
       'pn': '1',
       'kd': 'python',
    }

    header = {
       'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
       'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
       'Accept': 'application/json, text/javascript, */*; q=0.01'
    }
    #原始的url
    urls ='https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
    #建立session
    s = requests.Session()
    # 获取搜索页的cookies
    s.get(urls, headers=header, timeout=3)
    # 为此次获取的cookies
    cookie = s.cookies
    # 获取此次文本
    response = s.post(url, data=payload, headers=header, cookies=cookie, timeout=5).text
    print(response)

    爬取红楼梦小说

    2 爬红楼梦小说

    #http://www.shicimingju.com/book/hongloumeng.html

    import requests

    from bs4 import BeautifulSoup
    ret=requests.get('https://www.shicimingju.com/book/hongloumeng.html')
    # print(ret.text)

    soup=BeautifulSoup(ret.text,'lxml')
    li_list=soup.find(class_='book-mulu').find('ul').find_all('li')
    with open('hlm.txt','w',encoding='utf-8') as f:
       for li in li_list:
           content=li.find('a').text
           url='https://www.shicimingju.com'+li.find('a').get('href')

           f.write(content)
           f.write(' ')
           res_content=requests.get(url)
           soup2=BeautifulSoup(res_content.text,'lxml')
           content_detail=soup2.find(class_='chapter_content').text
           f.write(content_detail)
           f.write(' ')
           print(content,'写入了')

    爬取肯德基门店

    3 爬肯德基门店

    # http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword
    import requests

    header = {
       'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    data = {
       'cname': '',
       'pid': 20,
       'keyword': '浦东',
       'pageIndex': 1,
       'pageSize': 10
    }
    ret = requests.post('http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword', data=data, headers=header)
    print(ret.json())

    爬取糗事百科

    4 爬糗事百科段子

    #https://www.qiushibaike.com/text/page/2/
    import requests
    from bs4 import BeautifulSoup
    ret=requests.get('https://www.qiushibaike.com/text/page/2/')
    # print(ret.text)

    soup=BeautifulSoup(ret.text,'html.parser')

    article_list=soup.find_all(class_='article')
    # print(article_list)
    for article in article_list:
       content=article.find(class_='content').text
       print(content)
       print('-------')

    1 打码平台使用

    1 不花钱破解验证码:数字字母的组合---》识别率一般
    2 花钱---》复杂的验证码---》打码平台
    -云打码
       -超级鹰(以它为例,破解12306的)
       

    2 selenium登录获得cookie,使用requests发送请求



    # from selenium import webdriver
    # import time
    import json
    # bro=webdriver.Chrome(executable_path='chromedriver.exe')
    #
    # bro.get('https://dig.chouti.com/')
    # bro.maximize_window() # 最大化
    # bro.implicitly_wait(10)
    # login_btn=bro.find_element_by_link_text('登录')
    # login_btn.click()
    #
    # name_input=bro.find_element_by_name('phone')
    # password_input=bro.find_element_by_name('password')
    # name_input.send_keys('18953675221')
    # password_input.send_keys('lqz123')
    #
    # login_real_btn=bro.find_element_by_css_selector('button.login-btn')
    # login_real_btn.click()
    #
    # # 可能有验证码
    # time.sleep(10)
    #
    # cookie=bro.get_cookies()
    #
    # print(cookie)
    # with open('cookie.json','w') as f:
    #     json.dump(cookie,f)
    #
    #
    # print('cookie写入到文件中了')






    # 自动点赞功能
    import requests
    from requests.cookies import RequestsCookieJar
    header = {
       'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
       'Referer': 'https://dig.chouti.com/',
       # 'Cookie':'deviceId=web.eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiIzZmMzMDcxMC1kYTE4LTRhYjEtODgyYi1jMjJiNDJmZGQ0M2MiLCJleHBpcmUiOiIxNjEyMzQwNDkxODkyIn0.-rOq71RiwftdV6hywRH7VU6mJLLHour4W8vd7R7VSjM; __snaker__id=5v8JU21SihPI5qPo; _9755xjdesxxd_=32; YD00000980905869%3AWM_TID=S1%2B5wnw84kpFRVFRBAJvePHwfuWOllVw; gdxidpyhxdE=qjBxjj9DZSdEqLP9dsw7LIaiVZt90hIoMIyY1uiz2NsZigVtqxoRGyr7R5PWhKHIr%2BkrBADuEa3%5CGNNRdJ2JMGzmtetESp%2BkeazmmQVeObtbcHO2Db%2FA%5CR06TGfdvarx%2BlvQO70UtX4zGkNaSUH1mXVNo%2BydlxLNxv72ivYgEgmga8ze%3A1611026700588; YD00000980905869%3AWM_NI=Q4TKzSEwfVhdRZu%2BqO0ALrTB9UwQsMFZV%2BtYYe4hp%2BZE32Uiv6uU0DtTfDxmHrV70D4hpmzh7G69jaXfVH9P3JbVycL4n1Dx5NFP%2BW7wOXo0y2ovpUQCCoxwld%2FRv4mzWWg%3D; YD00000980905869%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed7aa658fa789b4e244fb928bb2c14a879a9baaf541acbe8e8df173ab94e5bacf2af0fea7c3b92a93ec9cacf95987a9aab7eb709286bbccf445f49f8493d74afb8bb9b2f24786869accf7539bbbfc89f3219a9bfdd3d863b6eabbb6b350b88caaa5fb678cae9ad8b350f8b8c08ef964f599fadae96b9cba9aa9bc5cf196aeb7fc6990eca2d2c774f7928ab1aa5df5a981b9ee3cbbeea197f35f939d8d90fb7db0bce1b3f540afa699b8e637e2a3; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJqaWQiOiJjZHVfNTMyMDcwNzg0NjAiLCJleHBpcmUiOiIxNjEzNjE3ODE2NDU5In0.lZVzlzVAtPkdg5guBx2ju8zILeOtAFrW7UTAnlf-Yfw; Hm_lvt_03b2668f8e8699e91d479d62bc7630f1=1609748496,1611025799,1611283275,1611286554; Hm_lpvt_03b2668f8e8699e91d479d62bc7630f1=1611286868'
    }

    res = requests.get('https://dig.chouti.com/link/hot', headers=header)
    print(res.json())
    jar=RequestsCookieJar()
    # 从文件中读出来,写到cookie中
    with open('cookie.json','r') as f:
       cookie_l=json.load(f)
       for cookie in cookie_l:
           jar.set(cookie['name'], cookie['value'])

    # cookie={}

    for item in res.json()['data']:
       id = item['id']
       print(id)
       # 点赞,缺cookie
       data={
           'linkId':id
      }
       res = requests.post('https://dig.chouti.com/link/vote', headers=header,cookies=jar,data=data)
       # res = requests.post('https://dig.chouti.com/link/vote', headers=header,data=data)
       print(res.text)

    3 自动登录12306

    from selenium import webdriver
    import time
    # import json
    # bro=webdriver.Chrome(executable_path='./chromedriver')
    # bro.get('https://dig.chouti.com/')
    # # time.sleep(20)
    # cookie=bro.get_cookies()
    # print(cookie)
    # with open('cookie.json','w') as f:
    #     json.dump(cookie,f)




    import requests
    import json
    from requests.cookies import RequestsCookieJar
    #这里我们使用cookie对象进行处理
    jar = RequestsCookieJar()
    with open("cookie.json", "r") as fp:
       cookies = json.load(fp)
       for cookie in cookies:
           jar.set(cookie['name'], cookie['value'])

    header={
       'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36',
       'Referer': 'http://dig.chouti.cc/',
    }
    data={
    'linkId': 30268402
    }
    res=requests.post('http://dig.chouti.cc/link/vote',cookies=jar,headers=header,data=data)
    print(res.text)

    from selenium import webdriver
    import time
    #pillow
    from PIL import Image

    # 引入超级鹰

    from chaojiying import Chaojiying_Client


    from selenium.webdriver import ActionChains
    bro=webdriver.Chrome(executable_path='./chromedriver.exe')
    bro.implicitly_wait(10)
    try:
       bro.get('https://kyfw.12306.cn/otn/resources/login.html')
       bro.maximize_window()  # 窗口最大化,全屏
       button_z=bro.find_element_by_css_selector('.login-hd-account a')
       button_z.click()
       time.sleep(2)
       # 截取整个屏幕
       bro.save_screenshot('./main.png')
       # 验证码的位置和大小
       img_t=bro.find_element_by_id('J-loginImg')
       print(img_t.size)
       print(img_t.location)

       size=img_t.size
       location=img_t.location

       img_tu = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height']))
       # # 抠出验证码
       # #打开
       img = Image.open('./main.png')
       # 抠图
       fram = img.crop(img_tu)
       # 截出来的小图
       fram.save('code.png')

       # 调用超级鹰破解
       chaojiying = Chaojiying_Client('306334678', 'lqz12345', '903641') #用户中心>>软件ID 生成一个替换 96001
       im = open('code.png', 'rb').read() #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
       # print(chaojiying.PostPic(im, 9004))

       ## 返回结果如果有多个 260,133|123,233,处理这种格式[[260,133],[123,233]]
       res=chaojiying.PostPic(im, 9004)
       print(res)
       result=res['pic_str']

       all_list = []
       if '|' in result:
           list_1 = result.split('|')
           count_1 = len(list_1)
           for i in range(count_1):
               xy_list = []
               x = int(list_1[i].split(',')[0])
               y = int(list_1[i].split(',')[1])
               xy_list.append(x)
               xy_list.append(y)
               all_list.append(xy_list)
       else:
           x = int(result.split(',')[0])
           y = int(result.split(',')[1])
           xy_list = []
           xy_list.append(x)
           xy_list.append(y)
           all_list.append(xy_list)
       print(all_list)
       # 用动作链,点击图片
       # [[260,133],[123,233]]
       for a in all_list:
           x = a[0]
           y = a[1]
           ActionChains(bro).move_to_element_with_offset(img_t, x, y).click().perform()
           time.sleep(1)

       username=bro.find_element_by_id('J-userName')
       username.send_keys('306334678')
       password=bro.find_element_by_id('J-password')
       password.send_keys('lqz12345')
       time.sleep(3)
       submit_login=bro.find_element_by_id('J-login')
       submit_login.click()
       time.sleep(3)

       print(bro.get_cookies())
       time.sleep(10)
       bro.get('https://www.12306.cn/index/')
       time.sleep(5)

    except Exception as e:
       print(e)
    finally:
       bro.close()

     

  • 相关阅读:
    scp命令
    遇到的错误解决方法
    阿里云挂载数据盘
    正则表达式
    python例子三
    Linux shell快捷键
    《超级产品的本质:汽车大王亨利福特自传》书评
    学习嵌入式的一点建议【转】
    win7使用USB转串口连接mini2440方法
    吐血原创:mini2440和win7笔记本利用无路由功能的交换机共享上网(使用x-router软路由)
  • 原文地址:https://www.cnblogs.com/usherwang/p/14470878.html
Copyright © 2011-2022 走看看