python实现自动重启本程序的方法
http://www.jb51.net/article/69174.htm
import requests, time
url_l = []
with open('DISTINCT_url.404.notailaba.txt', 'r', encoding='utf-8') as fo:
for i in fo:
url = '%s%s' % ('http://', i.replace(' ', ''))
url_l.append(url)
le, c = len(url_l), 0
import random
while True:
for i in url_l:
# 处理再某个url处停顿
if int(time.time()) % 10 == 0:
print(int(time.time()))
time.sleep(1)
break
c += 1
if 'iask.sina' in i:
continue
with open('DISTINCT_url.break.log', 'r', encoding='utf-8') as fc:
for breaker in fc:
break
if c < int(breaker):
continue
if c % 50 == 0:
time.sleep(random.randint(0, 3))
with open('DISTINCT_url.break.log', 'w', encoding='utf-8') as flog:
flog.write(str(c))
print(c, '/', le, '---', i)
try:
try:
req = requests.get(i)
if req.status_code == 404:
with open('DISTINCT_url.404.notailaba.RES.txt', 'a', encoding='utf-8') as fr:
s = '%s%s' % (i.replace('http://', ''), ' ')
print(s)
fr.write(s)
req.close()
except:
continue
except:
continue
卡顿的原因是连续的同域名url的请求,目标服务器做了限制。
import requests, time
url_l = []
with open('DISTINCT_url.404.notailaba.txt', 'r', encoding='utf-8') as fo:
for i in fo:
url = '%s%s' % ('http://', i.replace(' ', ''))
url_l.append(url)
le, c = len(url_l), 0
import random
while True:
for i in url_l:
# 处理再某个url处停顿
# if int(time.time()) % 10 == 0:
# print(int(time.time()))
# time.sleep(1)
# break
c += 1
if 'iask.sina' in i:
continue
with open('DISTINCT_url.break.log', 'r', encoding='utf-8') as fc:
for breaker in fc:
break
if c < int(breaker):
continue
if c % 10 == 0:
time.sleep(random.randint(1, 3))
with open('DISTINCT_url.break.log', 'w', encoding='utf-8') as flog:
flog.write(str(c))
print(c, '/', le, '---', i)
try:
try:
req = requests.get(i)
if req.status_code == 404:
with open('DISTINCT_url.404.notailaba.RES.txt', 'a', encoding='utf-8') as fr:
s = '%s%s' % (i.replace('http://', ''), ' ')
print(s)
fr.write(s)
req.close()
except:
continue
except:
continue
改进办法,休息
优化,统计url
结合对实际url的列表清单,目前的对观察数据的实验表明:此处离保证了qps再可承受范围
首先从从业务的角度分析出现的问题,然后借助技术工具去解决它!而不是不考虑业务场景,仅仅是剥离为一个技术问题,进而进入技术的漩涡。