zoukankan      html  css  js  c++  java
  • python爬虫 -掘金

    import json
    from time import sleep
    
    import requests
    
    url = "https://web-api.juejin.im/query"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
        "Referer": "https://juejin.im/",
        "X-Agent": "Juejin/Web",
        "Content-Type": "application/json",
    
    }
    
    
    def get_content(after=''):
        info = {"operationName": "", "query": "", "variables": {"first": 20, "after": after, "order": "POPULAR"},
                "extensions": {"query": {"id": "21207e9ddb1de777adeaca7a2fb38030"}}}
        resp = requests.post(url, headers=headers, data=json.dumps(info))
        content = resp.content.decode('utf-8')
        content = json.loads(content)
    
        edges = content['data']['articleFeed']['items']['edges']
        pageInfo = content['data']['articleFeed']['items']['pageInfo']
        return edges, pageInfo
    
    
    def getList(edges):
        tmp = []
        for item in edges:
            one = {}
            node = item['node']
            one['title'] = node['title']
            # one['links'] = node['originalUrl']
            # one['content'] = node['content']
            tmp.append(one)
    
        return tmp
    
    
    data = []
    
    content = get_content()
    edges = content[0]
    pageInfo = content[1]
    
    tmpList = getList(edges)
    #data = data + tmpList
    print(tmpList)
    while (pageInfo['hasNextPage']):
        content = get_content(pageInfo['endCursor'])
        edges = content[0]
        pageInfo = content[1]
        tmpList = getList(edges)
        #data = data + tmpList
        print(tmpList)
        sleep(2)

  • 相关阅读:
    HTML5小游戏-绵羊快跑
    JavaScript模拟下拉菜单代码
    作业6
    作业5
    作业4
    作业三
    作业2(第二遍)
    sap jco3安装
    jmeter脚本文件(jmx)关联
    正则替换:删除空行、格式化Json/xml
  • 原文地址:https://www.cnblogs.com/brady-wang/p/12491967.html
Copyright © 2011-2022 走看看