zoukankan
html css js c++ java
[转]Python下载百度新歌100的代码
#
!/usr/bin/python
#
-*- coding: utf-8 -*-
#
Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn>
#
License: GPLv2
#
Author: oneleaf <oneleaf AT gmail.com>
import
httplib
import
re
import
urllib
import
os
import
locale
def
getdownurl(url):
urllist
=
[]
conn
=
httplib.HTTPConnection(
'
mp3.baidu.com
'
)
conn.request(
"
GET
"
,url)
response
=
conn.getresponse()
html
=
response.read()
conn.close()
expression
=
'
http://220.181.27.54/m(.*)</a>
'
listSentence
=
re.findall(expression, html)
lineno
=
0
while
lineno
<
len(listSentence):
mp3url
=
re.search(
'
title=(.*)onclick
'
,listSentence[lineno])
if
mp3url:
mp3url
=
mp3url.group(0)
mp3url
=
re.search(
'
http(\S*)
'
,mp3url)
if
mp3url:
mp3url
=
mp3url.group(0)
try
:
mp3url
=
mp3url.decode(
'
gbk
'
)
except
:
pass
urllist.append(mp3url)
lineno
+=
2
return
urllist
def
downmp3(url,author,name,filelist):
filename
=
author
+
"
-
"
+
name;
for
i
in
filelist:
name
=
unicode(i,locale.getpreferredencoding())
if
name.find(filename)
==
0:
print
u
"
文件已经下载,忽略。
"
return
1
urllists
=
getdownurl(url)
for
i
in
urllists:
print
u
"
正在连接
"
,i
ext
=
i[
-
4
:]
try
:
urlopen
=
urllib.URLopener()
fp
=
urlopen.open(i)
data
=
fp.read()
fp.close()
filename
=
filename
+
ext;
file
=
open(filename,
'
w+b
'
)
file.write(data)
file.close()
print
u
"
下载成功!
"
return
1
except
:
continue
return
0
if
__name__
==
"
__main__
"
:
conn
=
httplib.HTTPConnection(
'
list.mp3.baidu.com
'
)
conn.request(
"
GET
"
,
'
/list/newhits.html?id=1
'
)
response
=
conn.getresponse()
html
=
response.read().decode(
'
gbk
'
)
conn.close()
expression
=
'
<a href="http://mp3.baidu.com/m(.*)</a>
'
listSentence
=
re.findall(expression, html)
lineno
=
0
while
lineno
<
len(listSentence):
url
=
re.search(
'
(.*)target
'
,listSentence[lineno])
url
=
'
/m
'
+
url.group(0)[:
-
8
]
name
=
re.search(
'
blank>(.*)
'
,listSentence[lineno])
name
=
name.group(0)[
6
:]
author
=
re.search(
'
blank>(.*)
'
,listSentence[lineno
+
1
])
author
=
author.group(0)[
6
:]
print
u
"
开始下载
"
,author,name
filelist
=
os.listdir(
'
.
'
);
if
downmp3(url,author,name,filelist)
==
0:
print
u
"
下载
"
,author,name,u
'
失败!
'
lineno
+=
2
查看全文
相关阅读:
python爬虫学习(7) —— 爬取你的AC代码
python爬虫学习(6) —— 神器 Requests
python爬虫学习(5) —— 扒一下codeforces题面
python爬虫学习(4) —— 手刃「URP教务系统」
听说你叫爬虫(3) —— 模拟登陆
python爬虫学习(2) —— 爬一下ZOL壁纸
python爬虫学习(1) —— 从urllib说起
数据结构/ 串的模式匹配法 / kmp算法与next数组的构造
ADWORLD web/PHP2
ADWORLD web/upload1
原文地址:https://www.cnblogs.com/maplye/p/450118.html
最新文章
SQL Server
Devexpress-comboBoxEdit
Devexpress-ribbonControl
Devexpress-xtraTabControl1
Devexpress-GridControl
oracle配置备份
解决devexpress新建的Form窗体没有继承默认样式的问题
使用MVCPager做AJAX分页所走的弯路
Oracle 11g导出空表、少表的解决办法
oracle备份还原bat脚本
热门文章
ASP.NET 获取area_controller_Action_Parameters
GetContentType
Errors occurred during the build完美解决之道
MyEclipse连接Mysql报错Illegal connection port value '3306>]解决方法
Servlet 一初体验
Myeclipse开发安卓-安装ADT,SDK
java引用第三方库
WinForm 中 comboBox控件之数据绑定
python爬虫学习(9) —— 一些工具和语法
python爬虫学习(8) —— 关于4399的一个小Demo
Copyright © 2011-2022 走看看