以下是从163源下载openstack-ocata版软件包的脚本:
from html.parser import HTMLParser
from urllib import request
import urllib
import os,sys
import socket
class myparser(HTMLParser):
'''找到a标签并把属性的值放到列表里'''
def __init__(self):
HTMLParser.__init__(self)
self.links = []
def handle_starttag(self, tag, attrs):
if tag == 'a':
if len(attrs) == 0:
pass
else:
for (variable,value) in attrs:
# print(value)
if variable == 'href':
self.links.append(value)
def callbackfunc(blocknum, blocksize, totalsize):
'''回调函数,打印下载进度
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
percent = int(100.0 * blocknum * blocksize / totalsize)
if totalsize > 505528:
pass
else:
percent = 100
sys.stdout.write('
')
sys.stdout.write(file_name + percent * '>' + str(percent) + '%')
sys.stdout.flush()
def create_dir(root_tree,catalog):
'''根据url的目录结构在本地穿件文件夹'''
os.chdir(root_tree)
try:
os.makedirs(catalog)
except FileExistsError as e:
pass
def download_file(url,down_path):
'''下载文件保存到相应的目录,并把下载失败的放在一个字典里'''
global file_name
global error_download
file_name = url.split('/')[-1]
error_download = {}
socket.setdefaulttimeout(30)
try:
request.urlretrieve(url,down_path,callbackfunc)
except socket.gaierror as e:
error_download[url] = down_path
print('socket.gaierror' , url)
except urllib.error.URLError as e:
error_download[url] = down_path
print('urllib.error.URLError',url)
sys.stdout.write('
')
def get_url_tree(url_tree):
'''获取一个字典,链接:目录,并把文件夹创建及把文件下载'''
url_tree_dict = {}
level = 0
for url in url_tree:
response = request.urlopen(url)
page = response.read().decode('utf-8')
hp = myparser()
hp.feed(page)
hp.close()
try:
hp.links.remove("../")
except ValueError as e:
pass
for file in hp.links:
if '/' in file:
create_dir(url_tree[url], file)
url_tree_dict[url+file] = url_tree[url]+file
else:
download_file(url+file,url_tree[url]+file)
if file.find('/') > 0:
level += 1
return url_tree_dict , level
url_tree = {"http://mirrors.163.com/centos/7/cloud/x86_64/openstack-ocata/":'/centos/7/cloud/x86_64/openstack-ocata/'}
try:
os.makedirs('/centos/7/cloud/x86_64/openstack-ocata/')
except FileExistsError as e:
pass
while True:
url_tree,level = get_url_tree(url_tree)
if level == 0:
break
print(url_tree,level)
for key in error_download:
download_file(key,error_download[key])