code
import time import sys,os import requests import shutil from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains from bs4 import BeautifulSoup def asleep(driver): driver.implicitly_wait(3.5) time.sleep(2) driver = webdriver.Chrome() asleep(driver) #719页 for k in range(1,720): url="http://zimiyy.com/mov/0/0/all/{}.html".format(k) driver.get(url) t=driver.find_element_by_xpath("//div[@class='index-tj mb clearfix']/ul").get_attribute('innerHTML') soup1 = BeautifulSoup(t, 'html.parser') tmp=soup1.findAll('a') for i in tmp: tmp_movie_url="http://zimiyy.com{}".format(i.get("href")) print(tmp_movie_url) movie_name=i.get("title") print(movie_name) pic_url=i.find("img").get("src") print(pic_url) time.sleep(2) #进入详情页 driver.get(tmp_movie_url) #获取描述 tmp_desc=driver.find_element_by_class_name("info").get_attribute('innerHTML') detail_html=driver.find_element_by_id("stab_1_71").get_attribute('innerHTML') soup2 = BeautifulSoup(detail_html, 'html.parser') tmp_play_page_list=soup2.findAll('li') print(tmp_desc) all_movie_url={} for j in tmp_play_page_list: movie_url_type=j.find("a").string play_page_url=j.find("a").get("href") #进入播放页 driver.get(play_page_url) #获取视频链接 try: movie_url=driver.find_element_by_xpath("//span[@class='dplayer-info-panel-item-data']").text except Exception as e: print(e) movie_url=None #记录 all_movie_url[movie_url_type]=movie_url print(all_movie_url) print("*"*17) time.sleep(3)