原文地址:http://bbs.csdn.net/topics/390361293
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
1 # -*- coding: utf-8 -*- 2 #抓取网易公开课下载链接 3 #By : hnicypb@126.com 4 #Ver :1.0 5 #Time:2013-12-30 6 #Python 2.7 + BeautifulSoup 3.03(用最新版本4.1乱码没搞定,退回3.03) 7 #eg: python 抓取网易公开课.py http://v.163.com/special/opencourse/paradigms.html 8 9 from BeautifulSoup import BeautifulSoup 10 import re 11 import sys,os 12 import urllib 13 14 #显示百分比 15 def rpb(blocknum, blocksize, totalsize): 16 percent = 100.0 * blocknum * blocksize / totalsize 17 if percent > 100:percent = 100 18 print "%.2f%%"% percent 19 20 def downlaod(url): 21 #获取页面 22 html = urllib.urlopen(url).read() 23 #用美汤来装载 24 soup = BeautifulSoup(html) 25 #获取课程信息,名称,简介等 26 title = soup.find('div',{"class" : "m-cdes"}) 27 print title.h2.string 28 print title.findAll('p')[0].string 29 print title.findAll('p')[1].string 30 print title.findAll('p')[2].string 31 32 #获取课程详细列表信息 33 detail=soup.findAll('tr',{"class" : "u-even"}) 34 for i in detail: 35 #获取课程名称 36 name=i.find('td',{"class" : "u-ctitle"}) 37 fileName=name.contents[0].strip() .lstrip() .rstrip(',') + name.a.string.strip() .lstrip() .rstrip(',') 38 #获取课程下载链接 39 downInfo=i.find('td',{"class" : "u-cdown"}) 40 downLink=downInfo.a['href'] 41 42 print fileName 43 print downLink 44 45 #使用urlretrieve下载该文件 46 if not os.path.exists(fileName): 47 urllib.urlretrieve(downLink,fileName+".mp4",rpb) 48 49 def main(argv): 50 if len(argv)>=2: 51 downlaod(argv[1]) 52 53 if __name__=="__main__": 54 main(sys.argv)