zoukankan      html  css  js  c++  java
  • 爬取豆瓣电影TOP250

    import requests
    from bs4 import BeautifulSoup
    def get_movies():
        headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
        'Host':'movie.douban.com'
        }
        movie_list=[]
        for i in range(10):
            link='https://movie.douban.com/top250?start='+str(i*25)
            r=requests.get(link,headers=headers,timeout=10)
            print(str(i+1),"page status_ocde:",r.status_code)
            soup=BeautifulSoup(r.text,'lxml')
            div_list=soup.find_all('div',class_='hd')
            for each in div_list:
                movie=each.a.span.text.strip()
                movie_list.append(movie)
        return movie_list
    movies=get_movies()
    f=open('豆瓣电影TOP250.txt','a')
    for i in movies:
        f.write(i+'\n')
    f.close()
  • 相关阅读:
    ExtJs-学习篇(1)
    js中的Ajax经典示例
    软件工程开发流程
    Maven和Ajax
    搭建SSH框架
    拦截器
    OGNL
    Struts 2配置详解
    Struts 2入门
    HQL连接查询和注解
  • 原文地址:https://www.cnblogs.com/menxin/p/10705036.html
Copyright © 2011-2022 走看看