zoukankan      html  css  js  c++  java
  • 爬取豆瓣电影top250

    import requests
    from bs4 import BeautifulSoup
    
    
    movie_list = []
    
    
    def get_movies():
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0',
                   'Host': 'movie.douban.com'
    
    
                   }
    
    
        
    
        for i in range(0, 10):
            link = 'https://movie.douban.com/top250?start=' + str(i*25)
            r = requests.get(link, headers=headers, timeout=10)
            print(str(i+1), "页响应码:", r.status_code)
            soup = BeautifulSoup(r.text, "lxml")
            div_list = soup.find_all('div', class_='hd')
            for each in div_list:
                movie = each.a.span.text.strip()
                movie_list.append(movie)
        return movie_list
    
    
    get_movies()
    print(movie_list)
    

     

  • 相关阅读:
    个人博客
    个人博客
    个人博客
    个人博客
    个人博客
    个人博客
    个人博客
    5.14
    5.13
    5.12
  • 原文地址:https://www.cnblogs.com/ahacker15/p/12597967.html
Copyright © 2011-2022 走看看