zoukankan      html  css  js  c++  java
  • python Scrapy google trends

    # -*- coding: utf-8 -*-
    import scrapy,json
    from urllib import parse
    
    class GoogleTrendsSpider(scrapy.Spider):
        name = 'google_trends'
        allowed_domains = ['google.com']
        #获取token地址
        GENERAL_URL = 'https://trends.google.com/trends/api/explore?{}'
        #获取关键字csv地址
        INTEREST_OVER_TIME_URL = 'https://trends.google.com/trends/api/widgetdata/multiline/csv?{}'
        #开启useragent和代理中间件
        custom_settings = {
            'DOWNLOADER_MIDDLEWARES' : {
                'blockchain.middlewares.RandomUserAgent': 390,
                'blockchain.middlewares.RandomProxy': 544,
            },
            # 'COOKIES_ENABLED' : False
            'DOWNLOAD_DELAY':1
        }
        
        def start_requests(self):
            '''
                从页面中获取的csv下载url参数
                'keyword': '关键字', 
                'time': 'now 7-d', 
                'geo': ''
            '''
            req = {
                'comparisonItem':[{'keyword': '关键字', 'time': 'now 7-d', 'geo': ''}],
                'category': 0
            }
            req = json.dumps(req).encode('utf-8')
            token_payload = {
                'hl': b'en-US',
                'tz': b'-480',
                'req': req,
                'property': b'',
            }
    
            body = parse.urlencode(token_payload)
            url = self.GENERAL_URL.format(body)
            reqs.append(scrapy.Request(url=url,callback=self.parse_token,meta={'item':{'coin_id':row.id}}))
    
            return reqs
    
        def parse_token(self,response):
            '''
                解析结果,获取token
            '''
            bodyObj = json.loads(response.body.decode('utf-8')[4:])
            for row in bodyObj['widgets']:
                if row['id'] == 'TIMESERIES':
                    token = row['token']
                    request = row['request']
                    params = {
                        'tz': '-480',
                        'req': json.dumps(request),
                        'token':token
                    }
                    body = parse.urlencode(params)
                    url = self.INTEREST_OVER_TIME_URL.format(body)
                    yield scrapy.Request(url=url,callback=self.parse_row,meta={'item':response.meta['item']})
    
        def parse_row(self, response):
            '''
            解析csv
            '''
            bodytext = response.body.decode('utf-8')
            print(bodytext)
    

      抄自:pytrends

  • 相关阅读:
    awk应用
    字符串应用,expect预期交互,数组,正则表达式
    for,while循环,case分支,shell函数
    数值运算,if结构
    shell基础应用,变量的扩展应用
    rsync基本用法与配置,split分离解析
    PXE自动装机
    配置DNS服务器
    进程查看,终止
    应用技巧,vim用法,编译安装软件包
  • 原文地址:https://www.cnblogs.com/qy-brother/p/9003844.html
Copyright © 2011-2022 走看看