zoukankan      html  css  js  c++  java
  • scrapy项目renrencookie

    # -*- coding: utf-8 -*-
    import scrapy
    import re
    
    class RenrenSpider(scrapy.Spider):
        name = 'renren'
        allowed_domains = ['renren.com']
        start_urls = ['http://www.renren.com/969574735/profile']
                    ##个人的人人网主页,直接请求会不成功,需要登录
    
        def start_requests(self):
            #获取主页的cookie,转换成字典
            cookies = "anonymid=jrg1i50g-7eexp6; depovince=GW; _r01_=1; JSESSIONID=abcj8H8oNBjFffnOJDtIw; ick_login=28035622-0567-4fce-84e7-d2288596f877; ick=e87f9e28-7f1c-4695-a9d5-fc686c74327c; t=11361863ad1785a9ff8498bf8234feee5; societyguester=11361863ad1785a9ff8498bf8234feee5; id=969574735; xnsid=6c889a8c; XNESSESSIONID=7c09723b7d13; jebecookies=0ddc7c08-3ad5-4317-ac36-836ee3c863d4|||||; ver=7.0; loginfrom=null; jebe_key=bf038be3-3e55-4c02-8325-1e6a7938dc70%7Cbb223f6876cd2eb89db3a027b645f7ee%7C1548662439565%7C1%7C1548662439242; wp_fold=0"
            cookies = {i.split("=")[0]:i.split("=")[1] for i in cookies.split("; ")}
            yield scrapy.Request(
                self.start_urls[0],
                callback=self.parse,
                cookies=cookies
            )
    
    
        def parse(self, response):
            print("===========")
            print(re.findall("陈陈",response.body.decode()))
            yield scrapy.Request(
                "http://www.renren.com/969574735/profile?v=info_timeline", #再请求一下个人资料页面
                callback=self.parse_detail
    
            )
            yield scrapy.Request(
                "http://friend.renren.com/managefriends",  # 再请求一下朋友页面
                callback=self.parse_detail
    
            )
        def parse_detail(self,response):
            print(re.findall("陈陈", response.body.decode()))
            print("++++++++")
            print(re.findall("公司全称", response.body.decode()))

    settings

    COOKIES_DEBUG = True  #终端可以看到cookie的传递前提是COOKIES_ENABLED = True
    
    COOKIES_ENABLED = True

     项目地址:https://github.com/CH-chen/renrencookie

  • 相关阅读:
    jQuery学习(三)
    HTML基础
    对于跨域问题的解决
    Spring boot 默认静态资源路径与手动配置访问路径
    json:java中前台向后台传对象数据
    javascrit常用互动方法
    java IO流
    HTML中data* 属性
    java中一些对象(po,vo,dao,pojo)等的解释
    使用mybatis generator插件,自动生成dao、dto、mapper等文件
  • 原文地址:https://www.cnblogs.com/chvv/p/10332466.html
Copyright © 2011-2022 走看看