学了几天python,写了个简单的爬虫程序(爬取酷我音乐排行榜):
1 #! -*- coding:utf-8 -*- 2 3 import requests 4 import pyquery 5 from pyquery import PyQuery as pq 6 7 # 爬取酷我音乐排行榜 8 headers = { 9 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'} # 设置User-Agent头,绕过反爬虫机制 10 url = "http://www.kuwo.cn/bang/index" # 爬取的地址 11 html = requests.get(url, headers=headers) # 获取网页内容 12 a = pq(html.text) 13 b = a('.name').find('a') 14 for c in b.items(): 15 print(c.html()) 16 print("爬取完成!") 17 18 19