zoukankan      html  css  js  c++  java
  • python 爬取媒体文件(无防火墙)

    #coding = utf-8
    import requests
    import pandas as pd 
    import os,time
    
    root_path = './根目录/'
    input_file = '码表.xlsx'
    url = 'http://api.map.baidu.com/geocoder/v2/?id = %s&local=1'
    fail_file = root_path +'fail.csv'
    
    class Auto_down:
        def __init__(self):
            print("--start--")
        def read_excel(self):
            # pd.read_excel(converters = {u'列名':str})按照str类型读入,不会出现0被舍去的情况
            sheet = pd.read_excel(input_file,converters = {u'列名':str},sheetname = '子表名')
            cust_Id = sheet['cust_id']
            void_Id = sheet['void_id']
    
            for i in range(len(cust_Id)):
                self.create_file(cust_Id[i],void_Id[i])
    
        def download_voice(self,custid_filename,voiceid):
            print(voiceid)
            try:
                r = requests.get(url%voiceid)
                return_code = r.status_code
                if return_code == 200:
                    voice_filename = '%s/%s.mp3'%(custid_filename,voiceid)
                    with open(voice_filename, 'wb') as fd:
                        fd.write(r.content)
                else:
                    with open(fail_file, 'a+') as ff:
                        ff.write(voiceid + '
    ')
            except:
                print('request url is fail!!')
                with open(fail_file, 'a+') as ff:
                    ff.write(voiceid + '
    ')
    
        def create_file(self, custid, voiceid):
            custid_filename = root_path + custid
            if not os.path.exists(custid_filename):
                os.mkdir(custid_filename)
            else:
                self.download_voice(custid_filename,voiceid)
    
    if __name__ == '__main__':
        tStart = time.clock()
    
        AD = Auto_down()
        AD.read_excel()
    
        tEnd = time.clock()
    
        print("%s s"%(tEnd - tStart))
    #coding = utf-8
    import requests
    
    root_path = "./下载/"
    
    url = ""
    fail_file = root_path + 'fail.csv'
    voiceid = '11111'
    for i in range(3):
        try:
            r = requests.get(url)
            return_code = r.status_code
            if r.status_code == 200:
                voice_filename = root_path + 'dada.fdf'
                with open(voice_filename,'wb') as fd:
                    fd.write(r.content)
            else:
                with open(fail_file,'a+') as ff:
                    ff.write(voiceid + '
    ')
        except:
            prin("fail")
            with open(fail_file,'a+') as ff:
                ff.write(voiceid + '
    ')

    r = request.get(url)
    r.status_code 获取响应状态码
    r.text 获取响应内容
    r.headers 获取响应头
    r.encoding 获取响应编码
    r.content 获取二进制响应内容
    r.json() 获取JSON响应内容

  • 相关阅读:
    kaggle之员工离职分析
    Titanic幸存预测分析(Kaggle)
    学习python,第五篇
    VLAN入门知识
    复习下VLAN的知识
    复习下网络七层协议
    学习python,第四篇:Python 3中bytes/string的区别
    学习python,第三篇:.pyc是个什么鬼?
    学习python,第二篇
    学习python,第一篇
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/10847369.html
Copyright © 2011-2022 走看看