zoukankan      html  css  js  c++  java
  • 豆瓣电影爬虫

    import requests
    
    class DoubanSpider(object):
        def __init__(self):
            self.url='https://movie.douban.com/j/chart/top_list?'
            self.headers = {
                "Accept": '*/*',
                "Accept-Encoding": 'gzip, deflate, br',
                "Accept-Language": 'zh-CN,zh;q=0.9',
                "Connection": 'keep-alive',
                "Cookie": 'bid=8-nCfhrghnU; __yadk_uid=YRJL4YjOgUBe7yEo3opPsDvJqnarI2oM; trc_cookie_storage=taboola%2520global%253Auser-id%3D8fc3589f-2abb-45b3-b21f-dabdd8ad9733-tuct3e78214; ll="108309"; ap_v=0,6.0; __utma=30149280.2076664567.1559562682.1559562682.1563870087.2; __utmb=30149280.0.10.1563870087; __utmc=30149280; __utmz=30149280.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.1070403850.1559562682.1559562682.1563870087.2; __utmb=223695111.0.10.1563870087; __utmc=223695111; __utmz=223695111.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1563870089%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D12rRCXsSrd1oLSL7dm_tBhgHaysfIxjCzkJqy6_F4kEL-HNdTUAh3Z6A-WLXShEt%26wd%3D%26eqid%3Dae785c100002a33c000000025d36c37d%22%5D; _pk_ses.100001.4cf6=*; _pk_id.100001.4cf6=1ee68b08630bb7f8.1559562681.2.1563872988.1559562681.',
                "Host": 'movie.douban.com',
                "Referer": 'https://movie.douban.com/typerank?type_name=%E7%88%B1%E6%83%85&type=13&interval_id=100:90&action=',
                "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                "X-Requested-With": 'XMLHttpRequest',
            }
            #请求+解析
        def get_film_info(self,params):
            html_json=requests.get(
                url=self.url,
                headers=self.headers,
                params=params
            ).json()
    
            for film in html_json:
                #名称
                name=film['title']
                #评分
                score=film['score']
                print(name,score)
        def main(self):
            tp = input('请输入类型')
            num = input('请输入电影数量')
            params={
            'type': str(tp),
            'interval_id': '100:90',
            'action':'',
            'start': '0',
           'limit':str(num),
            }
    
            self.get_film_info(params)
    
    if __name__ == '__main__':
        spider=DoubanSpider()
        spider.main()
  • 相关阅读:
    PHP实现无限极分类
    html2canvas生成并下载图片
    一次线上问题引发的过程回顾和思考,以更换两台服务器结束
    Intellij IDEA启动项目报Command line is too long. Shorten command line for XXXApplication or also for
    mq 消费消息 与发送消息传参问题
    idea 创建不了 java 文件
    Java switch 中如何使用枚举?
    Collections排序
    在idea 设置 git 的用户名
    mongodb添加字段和创建自增主键
  • 原文地址:https://www.cnblogs.com/cxiaolong/p/11234848.html
Copyright © 2011-2022 走看看