import requests
#原文:https://www.cnblogs.com/denglinzhe/p/12673766.html
#1.读取保存在本地的html文件
#2.提取页面中的链接文件
#3.在屏幕上显示提取到的链接文件
#4.将提取到的链接保存到文件
def DuQuWenJian(inputfile):
f = open(inputfile,'r',encoding='utf-8')
fo = f.readlines()
f.close()
return fo
def TiQuImgLianJei(filetxt):
urls = []
for line in filetxt:
if 'img' in line:
url = line.split('src=')[-1].split('"')[1]
if 'http' in url:
urls.append(url)
return urls
def XianShiLianJei(TiQuDaoDeLianJei):
count = 0
for url in TiQuDaoDeLianJei:
count += 1
print('第{}张图片URL:{}'.format(count,url))
def ShuChuDaoWenJian(outputfile,TiQuDaoDeLianJei):
f = open(outputfile,'w',encoding = 'utf-8')
for line in TiQuDaoDeLianJei:
f.write(line + '
')
f.close()
print('
-------------------------------------------')
print("
链接已保存至{}".format(outputfile))
def main():
inputfile = 'html.txt'
outputfile = 'pic.txt'
filetxt = DuQuWenJian(inputfile)
TiQuDaoDeLianJei = TiQuImgLianJei(filetxt)
XianShiLianJei(TiQuDaoDeLianJei)
ShuChuDaoWenJian(outputfile,TiQuDaoDeLianJei)
#得到图片链接
#main()
def download_img(img_url, img_name):
header = {"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
r = requests.get(img_url, headers=header, stream=True)
if r.status_code == 200:
with open(img_name, "wb") as f:
f.write(r.content)
del r
def download():
f = open("pic.txt", "r")
lines = f.readlines()
arr=[]
for line in lines:
line=line.strip('
')
arr.append(line)
for inx, val in enumerate(arr):
print(inx)
img_name=str(inx)+".png"
download_img(val, img_name)
#下载图片
download()