zoukankan      html  css  js  c++  java
  • [转]Python下载百度新歌100的代码

    #!/usr/bin/python 
    #
     -*- coding: utf-8 -*- 
    #
     Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> 
    #
     License: GPLv2 
    #
     Author: oneleaf <oneleaf AT gmail.com> 

    import httplib 
    import re 
    import urllib 
    import os 
    import locale 

    def getdownurl(url): 
        urllist
    =[] 
        conn 
    = httplib.HTTPConnection('mp3.baidu.com'
        conn.request(
    "GET",url) 
        response 
    = conn.getresponse() 
        html
    =response.read() 
        conn.close() 
        expression
    ='http://220.181.27.54/m(.*)</a>' 
        listSentence 
    = re.findall(expression, html) 
        lineno
    =
        
    while lineno<len(listSentence): 
            mp3url
    =re.search('title=(.*)onclick',listSentence[lineno]) 
            
    if mp3url: 
               mp3url
    =mp3url.group(0) 
               mp3url
    =re.search('http(\S*)',mp3url) 
               
    if mp3url: 
                  mp3url
    =mp3url.group(0) 
                  
    try
                      mp3url
    =mp3url.decode('gbk'
                  
    except:pass 
                  urllist.append(mp3url) 
            lineno
    +=2 
        
    return urllist 

    def downmp3(url,author,name,filelist): 
        filename
    =author+"-"+name; 
        
    for i in filelist: 
            name
    =unicode(i,locale.getpreferredencoding()) 
            
    if name.find(filename) == 0: 
                
    print u"文件已经下载,忽略。" 
                
    return 1 
        urllists
    =getdownurl(url) 
        
    for i in urllists:        
            
    print u"正在连接",i 
            
            ext
    =i[-4:] 
            
    try
                urlopen 
    = urllib.URLopener() 
                fp
    =urlopen.open(i) 
                data 
    = fp.read() 
                fp.close() 
                filename
    =filename+ext; 
                file
    =open(filename,'w+b'
                file.write(data) 
                file.close() 
                
    print u"下载成功!" 
                
    return 1 
            
    except
                
    continue 
        
    return 0 

    if __name__ == "__main__"
        conn 
    = httplib.HTTPConnection('list.mp3.baidu.com'
        conn.request(
    "GET",'/list/newhits.html?id=1'
        response 
    = conn.getresponse() 
        html
    =response.read().decode('gbk'
        conn.close() 
        expression
    ='<a href="http://mp3.baidu.com/m(.*)</a>' 
        listSentence 
    = re.findall(expression, html) 
        lineno
    =
        
    while lineno<len(listSentence): 
           url
    =re.search('(.*)target',listSentence[lineno]) 
           url
    ='/m'+url.group(0)[:-8
           name
    =re.search('blank>(.*)',listSentence[lineno]) 
           name
    =name.group(0)[6:] 
           author
    =re.search('blank>(.*)',listSentence[lineno+1]) 
           author
    =author.group(0)[6:] 
           
    print u"开始下载",author,name 
           filelist
    =os.listdir('.'); 
           
    if downmp3(url,author,name,filelist)==0: 
              
    print u"下载",author,name,u'失败!' 
           lineno
    +=2 
  • 相关阅读:
    最小二乘法求回归直线方程的推导过程
    最小二乘法求回归直线方程的推导过程
    Redis过期键的删除策略
    Redis过期键的删除策略
    最小二乘法求回归直线方程的推导过程
    最小二乘法求回归直线方程的推导过程
    不用第三方实现内网穿透
    不用第三方实现内网穿透
    X Redo丢失的4种情况及处理方法
    Problem D: 逆置链式链表(线性表)
  • 原文地址:https://www.cnblogs.com/maplye/p/450118.html
Copyright © 2011-2022 走看看