import urllib.request
import os
def url_open(url):
'''open url and return source html code'''
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read()
return html
def save_file(file_url):
''' open a url and save file'''
filename = file_url.split('/')[-1]
with open(filename, 'wb') as f:
file = url_open(file_url)
f.write(file)
def download_file(folder="files"):
'''to download file form internet'''
if not os.path.exists(folder):
os.makedirs(folder)
os.chdir(folder)
url = "https://physionet.org/physiobank/database/mitdb/"
for i in range(100,235):
file_list = i
file_url = url + str(file_list) + '.hea'
try:
save_file(file_url)
except:
continue
file_path = 'D:\Python\PyCharm_Projects\learn_py\file'
for root, dirs, files in os.walk(file_path):
for f in files:
empty_f = os.path.getsize(file_path + '\' + f)
if empty_f <= 0:
os.remove(file_path+'\'+f)
if __name__=='__main__':
download_file()