zoukankan      html  css  js  c++  java
  • scrapy 模拟登陆

    import scrapy
    import urllib.request
    from scrapy.http import Request,FormRequest
    
    class LoginspdSpider(scrapy.Spider):
        name = "loginspd"
        allowed_domains = ["douban.com"]
        start_urls = ['http://douban.com/']
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36 QIHU 360EE'}
        def start_requests(self):
            return [Request('https://www.douban.com/accounts/login',meta={'cookiejar':1},callback=self.parse)]
        def parse(self, response):
            # 获取验证码地址
            captcha = response.xpath('//img[@id="captcha_image"]/@src').extract()
            if len(captcha)>0:
                print('有验证码')
                localpath = 'D:/Captcha.png'
                urllib.request.urlretrieve(captcha[0],filename=localpath)
                print('请查看本地验证码并输入:')
                captcha_value = input()
                data = {
                    'form_email':'83*****81@qq.com',
                    'form_password':'*****1',
                    'captcha-solution':captcha_value,
                    'redir':'https://www.douban.com/people/161282530/',
                }
            else:
                print('没有验证码')
                data = {
                    'form_email': '834****81@qq.com',
                    'form_password': '*****',
                    'redir': 'https://www.douban.com/people/161282530/',
                }
            print('登录中。。。。')
    
            return [FormRequest.from_response(response,
                                              # meta = {'cookiejar':response.meta['cookiejar']}
                                              headers =self.headers,
                                              formdata=data,
                                              callback = self.next,)]
        def next(self,response):
            print('此时已经登录完成并爬取个人中心数据')
            title = response.xpath('/html/head/title/text()').extract()
            print(title)
  • 相关阅读:
    返回到上一页的html代码的几种写法
    记一次网站服务器内存占用过多问题
    rpm命令数据库修复日志
    Linux vmstat命令实战详解
    innodb的innodb_buffer_pool_size和MyISAM的key_buffer_size
    mysql
    如何查看linux系统下的各种日志文件 linux 系统日志的分析大全
    /var/lock/subsys作用
    CentOS目录结构详解
    MySQL体系结构
  • 原文地址:https://www.cnblogs.com/Erick-L/p/6841417.html
Copyright © 2011-2022 走看看