zoukankan      html  css  js  c++  java
  • 豆瓣网post 爬取带验证码

    # -*- coding: utf-8 -*-
    import scrapy
    import requests
    from ..bao.jiema import get_number
    
    fromdata = {
    "source": "movie",
    "redir": "https://movie.douban.com/",
    "form_email": "账号",
    "form_password": "密码",
    "login": "登录"
    }
    
    class BanSpider(scrapy.Spider):
        name = 'ban'
        # allowed_domains = ['ban']
        start_urls= ['https://www.douban.com/accounts/login?source=movie']
        def parse(self, response):
            print(response.url)
            images = response.xpath('//*[@id="captcha_image"]/@src').extract_first()
            all_id = response.xpath('//*[@id="lzform"]/div[5]/div/div/input[2]/@value').extract_first()
            if images:
    
                with open('a.jpg', 'wb+')as f:
                    response1 = requests.get(url=images)
                    f.write(response1.content)
                fromdata['captcha-id'] = all_id
    
                fromdata["captcha-solution"] = get_number()
            print(fromdata)
            urls = response.url
            yield scrapy.FormRequest(urls, formdata=fromdata, callback=self.after_login)
        def after_login(self,response):
            all_title = response.xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/a/span[1]/text()').extract()
            print(all_title)
  • 相关阅读:
    js 几个特殊情况
    Oracle 常用操作
    SqlServer性能检测和优化工具使用详细
    Fiddler
    JMeter性能测试,完整入门篇
    asp.net缓存
    Quartz.net开源作业调度框架使用详解
    SQL 存储过程
    .net 分布式锁
    全国省市区数据库
  • 原文地址:https://www.cnblogs.com/gaosai/p/9825285.html
Copyright © 2011-2022 走看看