爬取CBO中国票房网所有电影片名和演员名字
# -*- coding: utf-8 -*- # 爬取CBO中国票房网所有电影片名 import json import requests import time with open("moviename.txt", 'a') as fh: for pn in range(1,320): url = 'http://www.cbooo.cn/Mdata/getMdata_movie?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=' + str(pn) print(url) time.sleep(2) try: result = requests.get(url).text jresult = json.loads(result) movices = jresult.get('pData') for movie in movices: moviename = movie.get('MovieName') print(moviename) fh.write(moviename + " ") except: print('第'+ str(pn) + '失败!') # 爬取CBO中国票房网所有演员 import json import requests import time with open("moviestar.txt", 'a') as fh: for pn in range(1,2665): url = 'http://www.cbooo.cn/Mdata/getMdate_pList?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=' + str(pn) print(url) time.sleep(2) try: result = requests.get(url).text jresult = json.loads(result) movices = jresult.get('pData') for movie in movices: moviename = movie.get('cnName') print(moviename) fh.write(moviename + " ") except: print('第'+ str(pn) + '失败!')
爬取电视剧名称
# -*- coding: utf-8 -*- # 爬取所有电视剧名称 # 来源:齐鲁电影网 from bs4 import BeautifulSoup import urllib url = "http://www.qilumovie.com/filmclass-txt/9.html" html = urllib.request.urlopen(url).read() htmldecode = html.decode("gbk") #重点关注 soup = BeautifulSoup(htmldecode,"lxml") body = soup.body maplist = body.find_all("li") with open("tvplay.txt",'a') as fh: for tvl in maplist: tv = tvl.a.text print(tv) fh.write(tv + ' ')
如有错误,还请大侠指教一二!