zoukankan      html  css  js  c++  java
  • scrapy 命令行传参 以及发送post请求payload参数


    class
    SciencedirectspiderSpider(scrapy.Spider): name = 'sciencedirectspider' allowed_domains = ['sciencedirect.com'] start_urls = ['https://www.sciencedirect.com/search?qs=kidney%20stone']    # 在初始化这里进行 def __init__(self, year='', search='', **kwargs): self.year = year self.search = search self.urls = 'https://www.sciencedirect.com/search?qs=' + search + '&years=' + year + '&sortBy=date' self.browser = webdriver.Chrome(chrome_options=chorme_options) super().__init__() def start_requests(self): # //*[@id="srp-pagination"]/li[1]/text()[4] url = "https://www.sciencedirect.com/search?qs=kidney%20stone" response = scrapy.Request(self.urls, callback=self.page, meta={'url': self.urls}) yield response

    执行命令:scrapy crawl sciencedirectspider --nolog -a "search=kidney stone" -a "year=2019"

    ** 注意一个-a一个参数

    main执行语句:

    from scrapy.cmdline import execute
    # execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog'])  # 不打印日志
    execute(['scrapy', 'crawl', 'sciencedirectspider','--nolog','-a','search=kidney stone','-a','year=2019'])  # 不打印日志

     post请求payload参数

    class IeeexplorespiderSpider(scrapy.Spider):
        name = 'ieeexplorespider'
        allowed_domains = ['ieeexplore.ieee.org']
        start_urls = ['http://ieeexplore.ieee.org/']
        headers = {
            "Content-Type": "application/json",
            "Host": "ieeexplore.ieee.org",
            "Origin": "https://ieeexplore.ieee.org",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
        }
    
        def start_requests(self):
            # url = "https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText=Security%20Analytics"
            url = "https://ieeexplore.ieee.org/rest/search"
            data = {
                "highlight": True,
                "matchPubs": True,
                "newsearch": True,
                "pageNumber": "1",
                "queryText": "Security Analytics",
                "returnFacets": ["ALL"],
                "returnType": "SEARCH"
            }
    
            response = scrapy.Request(url=url, body=json.dumps(data), method='POST', callback=self.parse,
                                      headers=self.headers)
    
            yield response
    
    
        def parse(self, response):
            print(123)
            print(response.text)
  • 相关阅读:
    1.23学习总结:文件流
    vue-router重写push方法,解决相同路径跳转报错,解决点击菜单栏打开外部链接
    手把手教Electron+vue,打包vue项目,打包成桌面程序。
    后台获取的map集合封装json
    VUE同级组件之前方法调用
    字节跳动今日头条-抖音小程序序html富文本显示解决办法
    别总写代码,这130个网站比涨工资都重要
    vue 组件之间的自定义方法互相调用
    swiper轮播图出现疯狂抖动(小程序)
    vue通过地址下载文件
  • 原文地址:https://www.cnblogs.com/wukai66/p/13306954.html
Copyright © 2011-2022 走看看