zoukankan html css js c++ java

Python 爬取豆瓣

...

import urllib.request
import time
from bs4 import BeautifulSoup

def url_open(url):
    response = urllib.request.urlopen(url)
    return response
def parse_html(response):
    html_content = response.read()
    html_soup = BeautifulSoup(html_content, 'html.parser', from_encoding='utf-8')
    tag_lis = html_soup.find_all('li')
    for li in tag_lis:
        em = li.find('em')
        title = li.find_all('span', class_='title')
        # other = li.find_all('span', class_='other')
        rating = li.find('span', class_='rating_num')
        if title != []:
            rank=em.get_text()
            print("排名:" + rank + "------评分:" + str(rating.get_text()) + "-------" + title[0].get_text())
            if rank==250:
                return None
            if int(rank)%25==0:
                url="https://movie.douban.com/top250?start="+rank+"&filter="
                return url

url = "https://movie.douban.com/top250?start=0&filter="
if __name__=='__main__':
    response=url_open(url)
    start_time=time.time()
    print("开始："+str(start_time))
    while 1:
        url=parse_html(response)
        if url==None:
            break
        response=url_open(url)
    end_time=time.time()
    print("结束:"+str(end_time))
    print("一共用了："+str(end_time-start_time)+"秒")

查看全文

相关阅读:
leftpad填充函数;
overfllow的解析
 append与after
数组扁平话的N种解法；
关于webapi调用wcf并发假死的分析
 C#金额数字转换中文繁体
 关于ios的IDFA
Windows下为MySQL做定时备份
 [System.OutOfMemoryException] {函数求值已禁用，因为出现内存不足异常。
mvc 捕获404和500 等

原文地址：https://www.cnblogs.com/mysterious-killer/p/10156985.html

Python 爬取 豆瓣

Python 爬取豆瓣