首先登录珞珈一号数据系统查询想要的数据
利用浏览器审查元素获取包含下载信息的源码
将最右侧的table相关的网页源码copy到剪切板备用
利用python下载数据
## utf-8 import requests import os # import urllib.request from bs4 import BeautifulSoup from tqdm import tqdm import pandas as pd def saveFile(url,fileName): # ''' 保存文件''' r = requests.get(url, stream=True) chunkSize = 256 # print('dowloading...',fileName) with open('data/'+fileName, 'wb') as f: pbar = tqdm( unit="B", total=int( r.headers['Content-Length'] ) ,desc = "downloading..."+fileName) for chunk in r.iter_content(chunk_size=chunkSize): if chunk: # filter out keep-alive new chunks pbar.update (len(chunk)) f.write(chunk) html = '''将table的源码粘贴到这里''' ## get download url and file name soup = BeautifulSoup(html) tbody = soup.findAll('tbody')[0] trs = tbody.findAll("tr") data = [] for tr in trs: tds = tr.findAll("td")[-4:] temp = [] # for td in tds[:-1]: temp.append(td.text) a = tds[-1].findAll("a")[-1] ## download url href = "http://59.175.109.173:8888" + a["href"] temp.append(href) data.append(temp) dataSet = pd.DataFrame(data,columns = ["weixing","chuanganqi","time","url"]) ###file name dataSet.loc[:,"fileName"] = dataSet.loc[:,"weixing"] + dataSet.loc[:,"chuanganqi"] + dataSet.loc[:,"time"] + "-" + dataSet.index.map(str) + ".tar.gz" #### dowload for i in tqdm(range(dataSet.shape[0])): # if i<start: # continue # if i > 200: # continue row = dataSet.loc[i,:] fileName = row["fileName"] url = row["url"] saveFile(url,fileName)