做UI的朋友说花瓣访问不了, 但是个人采集还是能获得的。
赶紧下载下来备份吧。
提取码: muy1
by the way ,接口可能变动,2019年1月29日 验证有效。
python3+requests
# -*- encoding:utf-8 -*-
'''
author:thewindkee
'''
import requests
import urllib
import json
import re
import time
import random
# import queue
import os
import sys
INDEX_URL='http://login.meiwu.co/xx'
USER_PIN_URL_FORMAT='http://login.meiwu.co/%s/pins/'
DOWNLOAD_URL='http://img.hb.aicdn.com/'
global HEADERS
HEADERS={
"User-Agent:":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
"Accept:":"application/json"
}
#所有的采集
global PIN_MAP
PIN_MAP={}
#下载失败的采集
global FAILED_TO_DOWN
FAILED_TO_DOWN=[]
FAILED_TXT="fail.txt"
#保存登陆后的cookie
global LOGIN_COOKIES
LOGIN_COOKIES={}
seed = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
global EXISTED_PIC
EXISTED_PIC=[]
PIC_POSTFIX='.jpg'
DOWANLOAD_DIR='huaban'
def login(email,password):
LOGIN_URL = "http://login.meiwu.co/auth/"
r=requests.post(LOGIN_URL, headers=HEADERS,data = {'email':email,'password':password,'_ref':'loginPage'},timeout=10)
if r.status_code == requests.codes.ok:
# print("cookies:")
for key,value in r.cookies.items():
# print('%s,%s'%(key,value))
LOGIN_COOKIES[key]=value
urlname=get_urlname(r.text)
if urlname:
return USER_PIN_URL_FORMAT%urlname
print("登陆失败")
sys.exit(0)
def build_url_for_test():
LOGIN_COOKIES['sid']='xx'
urlname='xx'
return USER_PIN_URL_FORMAT%urlname
def get_pin_max(content):
# "pin_count": 3466
r = re.search('"pin_id": *(d+)', content)
# print(r)
if r: # 有id才录入
return r.group(1)
def get_urlname(content):
r = re.search('"urlname": *"(S+)"', content)
# print(r)
if r:
return r.group(1)
def randomStr(len):
return ''.join(random.sample(seed, len))
def build_headers_for_pin():
HEADERS['X-Requested-With']='XMLHttpRequest'
HEADERS['X-Request']='JSON'
return HEADERS
def get_page_pins(user_pin_url,max):
try:
user_pin_url='%s?%s&max=%s&limit=100&wfl=1'%(user_pin_url,randomStr(8),max)
print('收集下载信息 url:%s'%user_pin_url)
r=requests.get(user_pin_url, headers=HEADERS,cookies=LOGIN_COOKIES,timeout=30)
r.encoding='UTF-8'
last_pin_id = max
if r.status_code == requests.codes.ok:
d = json.loads(r.text,encoding="UTF-8")
pins = d['user']['pins']
if(pins):
for pinItem in pins:
last_pin_id=pinItem['pin_id']
PIN_MAP[str(last_pin_id)]=pinItem['file']['key']
return last_pin_id
else:
return None
except Exception as e:
print(str(e))
def save_pin_map(data):
with open("all.txt",'w') as f:
f.write(str(data))
def download(url,name):
try:
r=requests.get(url, headers=HEADERS)
with open(name,'wb') as f:
f.write(r.content)
except Exception as e:
print(str(e))
FAILED_TO_DOWN.append(url)
def download_all(total):
i=1
for pin_id,key in PIN_MAP.items():
url=DOWNLOAD_URL+key
print('%d/%d 下载 %s'%(i,total,url))
i+=1
if(not downloaded(pin_id)):
download(url,pin_id+PIC_POSTFIX)
time.sleep(0.5)
def downloaded(pin_id):
# path=os.getcwd()+os.sep+pin_id+PIC_POSTFIX
file_name=pin_id+PIC_POSTFIX
if file_name in EXISTED_PIC:
print(' %s已经存在'%file_name)
return True
else:
return False
def save_failed_to_down_url():
with open(FAILED_TXT,'w') as f:
f.write(",".join(FAILED_TO_DOWN) )
def prepare_pic_dir(DOWANLOAD_PATH):
if not os.path.exists(DOWANLOAD_PATH):
print("创建下载目录:%s"%DOWANLOAD_PATH)
os.makedirs(DOWANLOAD_PATH)
os.chdir(DOWANLOAD_PATH)
def get_first_max(user_pin_url_index):
r=requests.get(user_pin_url_index, headers=HEADERS,cookies=LOGIN_COOKIES,timeout=30)
#为了获得max那一张
return int(get_pin_max(r.text))+1
def main():
try:
# urlname=build_url_for_test()
DOWANLOAD_PATH=(os.getcwd()+os.sep+DOWANLOAD_DIR).strip()
print("下载花瓣采集到文件夹:%s "%DOWANLOAD_PATH)
EMAIL=input('请输入账号
')
PASSWORD=input('请输入密码
')
USER_PIN_URL=login(EMAIL,PASSWORD)
MAX=get_first_max(USER_PIN_URL)
print(MAX)
build_headers_for_pin()
while(True):
MAX=get_page_pins(USER_PIN_URL,MAX)
time.sleep(0.5)
if not MAX:
break
prepare_pic_dir(DOWANLOAD_PATH)
build_existed_pic(DOWANLOAD_PATH)
save_pin_map(PIN_MAP)
total=len(PIN_MAP)
print('总共:%d张,开始下载!'%total)
download_all(total)
if FAILED_TO_DOWN:
print("%s张下载失败,查看%s文件"%(len(FAILED_TO_DOWN),FAILED_TXT))
else:
print("下载完成!")
print('图片下载目录:%s'%DOWANLOAD_PATH)
save_failed_to_down_url()
except Exception as e:
print(str(e))
def build_existed_pic(DOWANLOAD_PATH):
for file in os.listdir(DOWANLOAD_PATH):
if os.path.isfile(file) and file.endswith(PIC_POSTFIX):
EXISTED_PIC.append(file)
if __name__=='__main__':
try:
main()
finally:
input("任意键退出")
效果如图: