zoukankan      html  css  js  c++  java
  • 一个可以获取知乎timeline的爬虫

    # -*- coding: utf-8 -*-
    import requests
    import lxml
    import os,time
    from bs4 import BeautifulSoup as sb
    try:
        import cookielib
    
    except:
        import http.cookiejar as cookielib
    import json
    
    headers = {
            "Host": "www.zhihu.com",
            "Accept-Language":"zh-CN,zh;q=0.8",
            "accept":"application/json, text/plain, */*",
            "Referer": "https://www.zhihu.com/",
            "Connection":"keep-alive",
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36',
            "authorization" : "Bearer Mi4xUXJGd0FBQUFBQUFBa0VKNTBfbnVDeGNBQUFCaEFsVk5OQmZMV1FCVnQ3aEhfeUVsUElGN1Zrd3RSSWpMdHI0ZG5B|1503889972|a235d0e24d646c5df6b1f667abc005381c273870"
        }
    
    def get_session():
        session = requests.session()
        session.cookies = cookielib.LWPCookieJar(filename="cookies")
        try:
            session.cookies.load()
            print("cookie 加载成功!")
        except:
            print("cookie 无法加载...")
        return session
    
    session = get_session()
    
    data = {"action":"True",
            "limit":"10",
            "session_token":"c9c3581148b6d633275ba5d4412d3bd8",
            "action":"down",
            "after_id":"0",
            "desktop":"true"
            }
    
    def get_data():
        res = session.get("https://www.zhihu.com/api/v3/feed/topstory", data=data, headers=headers)
        json = res.json()
        global count
        for i in json['data']:
            try:
                print(i['target']['question']['title'])
            except:
                print('没有问题了'+str(i))
            try:
                print(i['target']['content'])
            except:
                print('找不到答案了'+str(i))
            count += 1
            print()
    count = 0
    for n in range(5):
        data["after_id"] = n*10
        get_data()
        time.sleep(3)
    
    
    print(count)
  • 相关阅读:
    日本最大的汽车品牌:丰田【仅供自己参考】
    读书笔记1
    读书笔记1
    计算机网络笔记1
    ZY凉凉经
    HK凉凉经
    访问一个网站,发生了什么?
    正向代理VS反向代理
    mac下打开hosts文件
    国际手机区号
  • 原文地址:https://www.cnblogs.com/peter1994/p/7449751.html
Copyright © 2011-2022 走看看