zoukankan      html  css  js  c++  java
  • 6-豆瓣剧情排行爬虫

    爬去豆瓣排行

    #_*_ coding: utf-8 _*_
    
    '''
    Created on 2018年7月13日
    @author: sss
    功能:爬取豆瓣剧情排行榜的json数据
    
    '''
    
    import urllib.request
    import urllib.response
    import urllib.parse
    import random
    
    url = "https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90"
    
    #user-agent列表,每次请求随机选一个:
    ua_list = [
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko",
        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X ",
        "Mozilla/5.0 (Macintosh; Intel Mac OS "
    ]
    
    user_agnet = random.choice(ua_list)
    
    headers = {
            'User_Agnet': user_agnet
        }
    
    formdata = {
            "action" : "",
            "start" : "0",    #起始的电影编号
            "limit" : "3",    #从起始开始计数的数量
        }
     
    data = urllib.parse.urlencode(formdata).encode(encoding='utf_8')
     
    request = urllib.request.Request(url, headers = headers, data = data)
     
    response = urllib.request.urlopen(request)
    
    html = response.read()
    
    print(html)
    
    with open('douban.html', 'wb+') as f:
        f.write(html)
    
    print('finish!')
    

      

  • 相关阅读:
    作为管理者的基本职责
    websocket接口自动化的封装
    locust性能测试的使用
    git的协作提交流程
    关于接口自动化的实施步骤
    K8S的组件梳理
    jenkins pipeline中,失败后获取异常不中断业务
    pipline在执行的docker镜像中添加hosts
    sonar搭建
    django
  • 原文地址:https://www.cnblogs.com/zhumengdexiaobai/p/9303795.html
Copyright © 2011-2022 走看看