zoukankan      html  css  js  c++  java
  • python批量下载淘宝图片3

    import urllib.request
    import os
    def url_open(url):
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36')
        response = urllib.request.urlopen(url)
        html = response.read()
        return html
    def get_page(url):
        html = url_open(url).decode('gbk')
        a = html.find('current-comment-page') + 23
        b = html.find(']',a)
        return html[a:b]
    def find_imgs(url):
        html = url_open(url).decode('gbk')
        img_addrs = []
        a = html.find('img src=')  #下载的只是img src = 格式的图片
        while a!=-1:
            b = html.find('.jpg',a,a+255)
            if b != -1:
                x = html[a+9:b+4]
                tt = x[47:]    #解析图片的地址
                ttt = 'http://' + tt
                img_addrs.append(ttt)
            else:
                b = a + 9
          
            a = html.find('img src=',b)
        return img_addrs
    
    def save_imgs(folder,img_addrs):
         for each in img_addrs:
              filename = each.split("/")[-1]
              urllib.request.urlretrieve(each,filename,None)
              
                    
    def download_picture(folder='WWWW'):
        #os.mkdir(folder)
        os.chdir(folder)
        url = "https://gboy.taobao.com/index.htm?ali_trackid=17_cfb4dbeb80eb264e50f77d137e3a83d0&spm=a21bo.7724922.8410.1.HwGQ44"
        img_addrs = find_imgs(url)
        save_imgs(folder,img_addrs)
    if __name__=='__main__':
        download_picture()
  • 相关阅读:
    node.js fs,http
    node.js global object,util and so on
    node.js second day
    node.js
    mysql 多个and的简写
    mysql 返回结果按照指定的id顺序返回
    php file_get_contents fopen 连接远程文件
    软考例题1
    Skyline中使用AxTE3DWindowEx打开新的一个球体
    使用AE进行点的坐标投影变换
  • 原文地址:https://www.cnblogs.com/chenyang920/p/4906284.html
Copyright © 2011-2022 走看看