zoukankan      html  css  js  c++  java
  • python爬取珞珈1号卫星数据

    首先登录珞珈一号数据系统查询想要的数据

    利用浏览器审查元素获取包含下载信息的源码

    将最右侧的table相关的网页源码copy到剪切板备用

    利用python下载数据

    ## utf-8
    
    
    
    import requests
    import os
    # import urllib.request
    from bs4 import BeautifulSoup
    from tqdm import tqdm
    import pandas as pd 
    
    
    def saveFile(url,fileName):
    
        # ''' 保存文件'''
    
        r = requests.get(url, stream=True)
        chunkSize = 256
        # print('dowloading...',fileName)
        with open('data/'+fileName, 'wb') as f:
            pbar = tqdm( unit="B", total=int( r.headers['Content-Length'] ) ,desc = "downloading..."+fileName)
            for chunk in r.iter_content(chunk_size=chunkSize):
                if chunk: # filter out keep-alive new chunks
                    pbar.update (len(chunk))
                    f.write(chunk)
    
    
    html = '''将table的源码粘贴到这里'''
    
    ##  get download url and file name
    
    soup = BeautifulSoup(html)
    tbody = soup.findAll('tbody')[0]
    trs = tbody.findAll("tr")
    
    data = []
    for tr in trs:
        tds = tr.findAll("td")[-4:]
        temp = []
    
        # 
        for td in tds[:-1]:
            temp.append(td.text)
    
        a = tds[-1].findAll("a")[-1]
    
    ##   download url
        href = "http://59.175.109.173:8888" + a["href"]
    
        temp.append(href)
    
        data.append(temp)
    
    dataSet = pd.DataFrame(data,columns = ["weixing","chuanganqi","time","url"])
    
    ###file name
    dataSet.loc[:,"fileName"] = dataSet.loc[:,"weixing"] + dataSet.loc[:,"chuanganqi"] + dataSet.loc[:,"time"] + "-" + dataSet.index.map(str) + ".tar.gz"
    
    
    
    
    #### dowload
    
    
    for i in tqdm(range(dataSet.shape[0])):
        # if i<start:
        #     continue
    
        # if i > 200:
        #     continue
        row = dataSet.loc[i,:]
        fileName = row["fileName"]
        url = row["url"]
        saveFile(url,fileName)

     

  • 相关阅读:
    读书笔记—CLR via C#线程25-26章节
    算法回顾--N皇后问题简单回顾
    编程拾趣--集合子集问题
    读书笔记—CLR via C#异常和状态管理
    读书笔记—CLR via C#字符串及文本
    设计模式---抽象工厂
    读书笔记—CLR via C#反射
    读书笔记—CLR via C#委托和attribute
    C#编程实践—EventBroker简单实现
    Linux平台屏幕录像工具RecordMyDesktop
  • 原文地址:https://www.cnblogs.com/wybert/p/10613873.html
Copyright © 2011-2022 走看看