zoukankan      html  css  js  c++  java
  • python批量下载淘宝图片3

    import urllib.request
    import os
    def url_open(url):
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36')
        response = urllib.request.urlopen(url)
        html = response.read()
        return html
    def get_page(url):
        html = url_open(url).decode('gbk')
        a = html.find('current-comment-page') + 23
        b = html.find(']',a)
        return html[a:b]
    def find_imgs(url):
        html = url_open(url).decode('gbk')
        img_addrs = []
        a = html.find('img src=')  #下载的只是img src = 格式的图片
        while a!=-1:
            b = html.find('.jpg',a,a+255)
            if b != -1:
                x = html[a+9:b+4]
                tt = x[47:]    #解析图片的地址
                ttt = 'http://' + tt
                img_addrs.append(ttt)
            else:
                b = a + 9
          
            a = html.find('img src=',b)
        return img_addrs
    
    def save_imgs(folder,img_addrs):
         for each in img_addrs:
              filename = each.split("/")[-1]
              urllib.request.urlretrieve(each,filename,None)
              
                    
    def download_picture(folder='WWWW'):
        #os.mkdir(folder)
        os.chdir(folder)
        url = "https://gboy.taobao.com/index.htm?ali_trackid=17_cfb4dbeb80eb264e50f77d137e3a83d0&spm=a21bo.7724922.8410.1.HwGQ44"
        img_addrs = find_imgs(url)
        save_imgs(folder,img_addrs)
    if __name__=='__main__':
        download_picture()
  • 相关阅读:
    装饰模式
    You can't specify target table 'a' for update in FROM clause
    Spring事务知识点
    JAVA中的volatile关键字
    验证HashSet和HashMap不是线程安全
    ZYNQ7000 通过FPGA Manager加载比特流
    verilog中可综合的task使用
    verilog条件编译
    Vivado debug异常现象
    Matlab相关函数使用
  • 原文地址:https://www.cnblogs.com/chenyang920/p/4906284.html
Copyright © 2011-2022 走看看