zoukankan      html  css  js  c++  java
  • python Scrapy google trends

    # -*- coding: utf-8 -*-
    import scrapy,json
    from urllib import parse
    
    class GoogleTrendsSpider(scrapy.Spider):
        name = 'google_trends'
        allowed_domains = ['google.com']
        #获取token地址
        GENERAL_URL = 'https://trends.google.com/trends/api/explore?{}'
        #获取关键字csv地址
        INTEREST_OVER_TIME_URL = 'https://trends.google.com/trends/api/widgetdata/multiline/csv?{}'
        #开启useragent和代理中间件
        custom_settings = {
            'DOWNLOADER_MIDDLEWARES' : {
                'blockchain.middlewares.RandomUserAgent': 390,
                'blockchain.middlewares.RandomProxy': 544,
            },
            # 'COOKIES_ENABLED' : False
            'DOWNLOAD_DELAY':1
        }
        
        def start_requests(self):
            '''
                从页面中获取的csv下载url参数
                'keyword': '关键字', 
                'time': 'now 7-d', 
                'geo': ''
            '''
            req = {
                'comparisonItem':[{'keyword': '关键字', 'time': 'now 7-d', 'geo': ''}],
                'category': 0
            }
            req = json.dumps(req).encode('utf-8')
            token_payload = {
                'hl': b'en-US',
                'tz': b'-480',
                'req': req,
                'property': b'',
            }
    
            body = parse.urlencode(token_payload)
            url = self.GENERAL_URL.format(body)
            reqs.append(scrapy.Request(url=url,callback=self.parse_token,meta={'item':{'coin_id':row.id}}))
    
            return reqs
    
        def parse_token(self,response):
            '''
                解析结果,获取token
            '''
            bodyObj = json.loads(response.body.decode('utf-8')[4:])
            for row in bodyObj['widgets']:
                if row['id'] == 'TIMESERIES':
                    token = row['token']
                    request = row['request']
                    params = {
                        'tz': '-480',
                        'req': json.dumps(request),
                        'token':token
                    }
                    body = parse.urlencode(params)
                    url = self.INTEREST_OVER_TIME_URL.format(body)
                    yield scrapy.Request(url=url,callback=self.parse_row,meta={'item':response.meta['item']})
    
        def parse_row(self, response):
            '''
            解析csv
            '''
            bodytext = response.body.decode('utf-8')
            print(bodytext)
    

      抄自:pytrends

  • 相关阅读:
    【JZOJ3188】找数【数论,数学】
    【JZOJ3187】的士【模拟】
    【JZOJ3187】的士【模拟】
    【洛谷P1641】生成字符串【数论,数学】
    【洛谷P1896】互不侵犯【状压dp】
    聚集索引与非聚集索引
    哈希索引
    索引能提高检索速度,降低维护速度
    MySQL索引基本知识
    注解
  • 原文地址:https://www.cnblogs.com/qy-brother/p/9003844.html
Copyright © 2011-2022 走看看