import time
import random
from selenium import webdriver
def crawl():
# 网易云音乐 只要平凡
url = 'https://music.163.com/#/song?id=574919767'
# 拿到chrome参数配置对象
opt = webdriver.ChromeOptions()
# 启动无界面模式
opt.add_argument('--headless')
# 禁用gpu
opt.add_argument('--disable-gpu')
# 拿到谷歌浏览器对象
web = webdriver.Chrome(chrome_options=opt)
# 向url发出请求
web.get(url)
# 隐式等待5秒
web.implicitly_wait(5)
# 因为评论数据在该iframe中,所以先定位到iframe标签
iframe = web.find_element_by_id('g_iframe')
# 切换到该iframe中
web.switch_to.frame(iframe)
# 直接传入iframe标签的name属性的值也行
# web.switch_to.frame('contentFrame')
# <iframe name="contentFrame" id="g_iframe" class="g-iframe" scrolling="auto" frameborder="0" src="about:blank" allowfullscreen="true"></iframe>
# 滚动到最底下,看翻页 scrollTo(x,y) x水平移动, y垂直移动
web.execute_script('scrollTo(0,document.body.scrollHeight)')
# 将评论保存在comment.txt中
f = open('comment.txt', 'w', encoding='utf-8')
# 记录条数
count = 0
# 爬取的页数
page = 1000
for i in range(page):
# 拿到每一页的20条评论列表
div_list = web.find_elements_by_class_name('itm')
for div in div_list:
content = div.find_element_by_xpath('./div[2]/div/div').text
content = content.split(':')[1]
f.write(content + '\n')
count += 1
print(count)
# 下一页
next_page = web.find_element_by_xpath('//a[contains(text(),"下一页")]')
web.execute_script("arguments[0].click();", next_page)
# sleep 0.2到1秒等下一页的内容加载完成
time.sleep(random.uniform(0.2, 1))
# 关闭浏览器对象
web.quit()
# 关闭文件对象
f.close()
if __name__ == '__main__':
start = time.time()
crawl()
end = time.time()
print('用时{:.2f}s'.format(end - start))