原因在于 证书信任和不做个伪装浏览器
import urllib.request import re from bs4 import BeautifulSoup from distutils.filelist import findall import ssl # 信任所有Https的安全证书 ssl._create_default_https_context = ssl._create_stdlib_context url_ = "http://movie.douban.com/top250?format=text" header_ = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/51.0.2704.63 Safari/537.36'} page = urllib.request.Request(url=url_, headers=header_) page = urllib.request.urlopen(page) contents = page.read() # print(contents) soup = BeautifulSoup(contents, "html.parser") print("豆瓣电影TOP250" + " " + " 影片名 评分 评价人数 链接 ") for tag in soup.find_all('div', class_='info'): # print tag m_name = tag.find('span', class_='title').get_text() m_rating_score = float(tag.find('span', class_='rating_num').get_text()) m_people = tag.find('div', class_="star") m_span = m_people.findAll('span') m_peoplecount = m_span[3].contents[0] m_url = tag.find('a').get('href') print(m_name + " " + str(m_rating_score) + " " + m_peoplecount + " " + m_url)