#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import urllib
from urllib.parse import urlencode
from urllib.request import Request, urlopen
import re
import time
import os
import mysql.connector
times = 0
def saveDownedurl(downedurl):
url = downedurl
conn = mysql.connector.connect(user='root', password='694521', database='picurl')
cursor = conn.cursor()
sql = "INSERT INTO downedurl (picurl) VALUES (%s)"
cursor.execute(sql,[url])
conn.commit()
print(cursor.rowcount, "记录插入成功。")
conn.close()
# sql = "INSERT INTO downedurl (picurl) VALUES (url)"
# cursor.execute(sql)
# conn.commit()
# print(cursor.rowcount, "记录插入成功。")
# conn.close()
def download_pic(pic_url,root_url,down_times):
url = pic_url
Referer = root_url
down_time = down_times
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
'Referer':Referer
}
down_path = str(down_time)+'.jpg'
print (down_path)
requests = Request(url, headers=headers)
data = urlopen(requests).read()
with open(down_path, 'wb') as f:
f.write(data)
f.close()
down_time+=1
return down_time
def jiexi_rootPic_url(next_rootUrl,down_times):
url = next_rootUrl
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0'
}
downtime = down_times
request_url = Request(url, headers=headers)
response = urlopen(request_url).read().decode("utf-8")
pattern = re.compile('<img src="(.*?)"', re.IGNORECASE)
pic_path = pattern.findall(response)
for i in pic_path:
print ('download_prepare')
downtime = download_pic(i,url,downtime)
print(i)
time.sleep(2)
return downtime
def jiexi_url(root_url,down_times):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0'
}
downtime = down_times
url = root_url
request_url = Request(url, headers=headers)
html = urlopen(request_url).read().decode("utf-8")
response = re.compile('/rnyy(.*?).html', re.IGNORECASE)
all_next_root = response.findall(html)
for i in all_next_root:
path = 'http://mmff30.com/rnyy'+i+'.html'
print (path)
saveDownedurl(path)
downtime = jiexi_rootPic_url(path,downtime)
jiexi_url('http://mmff30.com/rwmy_9_3.html',4000)