爬取首页轮播的链接,名字并且下载图片到本地
# -*- coding: utf-8 -*- import urllib.request import ssl import json context = ssl._create_unverified_context() list_name=[] list_img=[] url='http://pcapi.vip.com/ads/index.php?callback=shopAds&type=ADSEC56K%2CADSIR7IX%2CADSX7W3G%2CADSNNLS7%2CADS7JI3F%2CADS2B669%2CADSITG64%2CADS45AV4%2CADS44T33&warehouse=VIP_NH&areaid=104104&preview=0&date_from=&time_from=&user_class=&channelId=0' url_data = urllib.request.urlopen(url).read().decode("utf-8") url_data=url_data.replace('shopAds(','') url_data=url_data.replace(')','') jsDict = json.loads(url_data) print(jsDict) jsdata = jsDict['ADADSEC56K'] jsdatas = jsdata['items'] for each in jsdatas: list_name.append(each['name']) list_img.append(each['img']) print(list_img) print(list_name) x=0 for j in list_img: urllib.request.urlretrieve(j,'D:\test\%s.jpg'%x) x=x+1
获取精选里面的名字和链接还有图片的源代码:
# -*- coding: utf-8 -*- import urllib.request import ssl import json context = ssl._create_unverified_context() list_name=[] list_img=[] url='http://www.vip.com/index-ajax.php?act=getSellingBrandListV5&warehouse=VIP_NH&areaCode=104104&channelId=0&pagecode=b&sortType=1&province_name=%E5%B9%BF%E4%B8%9C&city_name=%E5%B9%BF%E5%B7%9E%E5%B8%82&preview=&sell_time_from=&time_from=&ids=1' url_data = urllib.request.urlopen(url).read().decode("utf-8") print(url_data) jsDict=json.loads(url_data) print(jsDict) jsdata=jsDict['data'] jsfloor=jsdata['floors'] jsfirst=jsfloor['1'] jsitems=jsfirst['items'] for each in jsitems: list_img.append(each['mobile_image_one']) list_name.append(each['name']) print(len(list_img)) print(len(list_name)) print(list_name) print(list_img) # url_data=url_data.replace('shopAds(','') # url_data=url_data.replace(')','') # jsDict = json.loads(url_data) # print(jsDict) # jsdata = jsDict['ADADSEC56K'] # jsdatas = jsdata['items'] # for each in jsdatas: # list_name.append(each['name']) # list_img.append(each['img']) # print(list_img) # print(list_name) # x=0 # for j in list_img: # urllib.request.urlretrieve(j,'D:\test\%s.jpg'%x) # x=