zoukankan      html  css  js  c++  java
  • 获取图片和下载到本地和名字和链接的获取

    # -*- coding: utf-8 -*-
    import urllib.request
    import ssl
    import json
    import xlwt
    context = ssl._create_unverified_context()
    
    title=['女装','鞋包','男士','运动','饰品','美妆','母婴','居家','国际','生活']
    wb = xlwt.Workbook()
    class spider:
        def url_name(self):
            wb = xlwt.Workbook()
            for i in range(len(title)):
                list_name = []
                list_img = []
                discount=[]
                print(i+1)
                ws = wb.add_sheet(title[i])
                url = 'http://www.vip.com/index-ajax.php?act=getSellingBrandListV5&warehouse=VIP_NH&areaCode=104104&channelId=0&pagecode=b&sortType=1&province_name=%E5%B9%BF%E4%B8%9C&city_name=%E5%B9%BF%E5%B7%9E%E5%B8%82&preview=&sell_time_from=&time_from=&ids='+str(i+1)
                url_data = urllib.request.urlopen(url).read().decode("utf-8")
                print(url_data)
                jsDict = json.loads(url_data)
                print(jsDict)
                jsdata = jsDict['data']
                jsfloor = jsdata['floors']
                jsfirst = jsfloor[str(i+1)]
                jsitems = jsfirst['items']
                for each in jsitems:
                    list_img.append(each['mobile_image_one'])
                    list_name.append(each['name'])
                    dis = each['discount'].replace('<span class="salebg2">', '')
                    dis = dis.replace('</span>', '')
                    discount.append(dis[23:])
                print(len(list_img))
                print(len(list_name))
                print(list_name)
                print(list_img)
                for each in range(len(list_name)):
                    ws.write(each, 0, list_name[each])
                    ws.write(each, 1, list_img[each])
                    ws.write(each,2,discount[each])
                x=0
                for j in list_img:
                    # urllib.request.urlretrieve(j, 'D:\weipinhui\monning_1\jingxuan_pic\'+str(i+1)+'\%s.jpg' % x)
                    urllib.request.urlretrieve(j, 'D:\weipinhui\evening\jingxuan_pic\' + str(i + 1) + '\%s.jpg' % x)
                    x = x + 1
                # wb.save('D:\weipinhui\monning_1\jingxuan_name_url\jingxuan_name_url.xls')
                wb.save('D:\weipinhui\evening\jingxuan_name_url\jingxuan_name_url.xls')
        def shouye(self):
            wb = xlwt.Workbook()
            ws = wb.add_sheet('首页')
            list_name_sy=[]
            list_img_sy=[]
            url = 'http://pcapi.vip.com/ads/index.php?callback=shopAds&type=ADSEC56K%2CADSIR7IX%2CADSX7W3G%2CADSNNLS7%2CADS7JI3F%2CADS2B669%2CADSITG64%2CADS45AV4%2CADS44T33&warehouse=VIP_NH&areaid=104104&preview=0&date_from=&time_from=&user_class=&channelId=0'
            url_data = urllib.request.urlopen(url).read().decode("utf-8")
            url_data = url_data.replace('shopAds(', '')
            url_data = url_data.replace(')', '')
            jsDict = json.loads(url_data)
            print(jsDict)
            jsdata = jsDict['ADADSEC56K']
            jsdatas = jsdata['items']
            for each in jsdatas:
                list_name_sy.append(each['name'])
                list_img_sy.append(each['img'])
            print(list_img_sy)
            print(list_name_sy)
            x = 0
            for each in range(len(list_name_sy)):
                ws.write(each, 0, list_name_sy[each])
                ws.write(each, 1, list_img_sy[each])
            for j in list_img_sy:
                # urllib.request.urlretrieve(j, 'D:\weipinhui\monning_1\shouye_pic\%s.jpg' % x)
                urllib.request.urlretrieve(j, 'D:\weipinhui\evening\shouye_pic\%s.jpg' % x)
                x = x + 1
            # wb.save('D:\weipinhui\monning_1\shouye_name_url\shouye_name_url.xls')
            wb.save('D:\weipinhui\evening\shouye_name_url\shouye_name_url.xls')
    content=spider()
    #content.url_name()
    content.shouye()

     屏幕截图:

    coding: utf-8
    from selenium import webdriver
    import time
    class web_jietu:
        def shouye(self,url):
            driver = webdriver.Chrome()
            driver.maximize_window()  # 将浏览器最大化
            driver.get(url)
            time.sleep(5)
            driver.save_screenshot('D:\weipinhui\jie-pic\shouye.png')  # 截取当前网页
        def jingxuan(self,url):
            browser = webdriver.Chrome() #
            browser.maximize_window()
            browser.get(url)
            #翻滚下去,然后再翻滚上来再进行解图
            browser.execute_script("""
                    (function () {
                        var y = 0;
                        var step = 100;
                        window.scroll(0, 0);
                        function f() {
                            if (y < document.body.scrollHeight) {
                                y += step;
                                window.scroll(0, y);
                                setTimeout(f, 100);
                            } else {
                                window.scroll(0, 0);
                                document.title += "scroll-done";
                            }
                        }
                        setTimeout(f, 1000);
                    })();
                """)
            for i in range(30):
                if "scroll-done" in browser.title:
                    break
                time.sleep(10)
            browser.save_screenshot('D:\weipinhui\jie-pic\jingxuan.png')
            browser.close()
    url = 'http://www.vip.com/'
    pic=web_jietu()
    pic.shouye(url)
    pic.jingxuan(url)
  • 相关阅读:
    小程序ArrayBuffer转JSON
    梅林路由修改hosts
    小程序半屏弹窗(Half Screen Dialog)插槽(Slot)无效的解决方法
    [小程序]存在将未绑定在 WXML 的变量传入 setData 的解决方法!
    小程序scroll-view指定高度
    修改小程序mp-halfScreenDialog组件高度
    小程序图片懒加载组件 mina-lazy-image
    OpenCOLLADA v1.6.68 MAYA MAX 全文件
    位运算相关知识
    全排列 next_permutation() 函数
  • 原文地址:https://www.cnblogs.com/caicaihong/p/5925666.html
Copyright © 2011-2022 走看看