zoukankan      html  css  js  c++  java
  • python批量下载图片3

    import urllib.request
    import os
    
    def url_open(url):
    
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36')
        response = urllib.request.urlopen(url)
        html = response.read()
        return html
    
    def get_page(url):
        html =  url_open(url).decode('utf-8')
        a = html.find('current-comment-page') + 23
        b = html.find(']',a)
        return html[a:b]
    
    
    def find_imgs(url):
        html = url_open(url).decode('utf-8')
        img_addrs = []
        a = html.find('img src=')
        while a!=-1:
            b = html.find('.jpg',a,a+255)
            if b != -1:
                img_addrs.append(html[a+9:b+4])
            else:
                b = a + 9
            a = html.find('img src=',b)
        return img_addrs
    
    def save_imgs(folder,img_addrs):
         for each in img_addrs:
              filename = each.split('/')[-1]
              urllib.request.urlretrieve(each,filename,None)
    
    def download_mm(folder='OOXX',pages=10):
        #os.mkdir(folder)
        os.chdir(folder)
        url = "http://konachan.com/post?tags=rating%3Asafe"
        page_num = int(2)
        for i in range(pages):
            page_num += i
            print(i)
            page_url = 'http://konachan.com/post?page=' + str(page_num) + '&tags=rating%3Asafe'
            print(page_url)
            img_addrs = find_imgs(page_url)
            save_imgs(folder,img_addrs)
    if __name__=='__main__':
        download_mm()

    import urllib.request
    import os
    
    def url_open(url):
        
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36')
        response = urllib.request.urlopen(url)
        html = response.read()
        return html
    
    def get_page(url):
        html =  url_open(url).decode('utf-8')
        a = html.find('current-comment-page') + 23
        b = html.find(']',a)
        return html[a:b]
    
    
    def find_imgs(url):
        html = url_open(url).decode('utf-8')
        img_addrs = []
        a = html.find('img src=')
        while a!=-1:
            b = html.find('.jpg',a,a+255)
            if b != -1:
                img_addrs.append(html[a+9:b+4])
            else:
                b = a + 9
            a = html.find('img src=',b)
        return img_addrs
    
    def save_imgs(folder,img_addrs):
         for each in img_addrs:
              filename = each.split('/')[-1]
              with open(filename,'wb')as f:
                    img = url_open(each)
                    f.write(img)
              f.close() #每次存储之后都要close()否则存储的只是一个图
    def download_mm(folder='OOXX',pages=10): os.mkdir(folder) os.chdir(folder) url = "http://konachan.com/post?tags=rating%3Asafe" page_num = int(2) for i in range(pages): page_num += i print(i) page_url = 'http://konachan.com/post?page=' + str(page_num) + '&tags=rating%3Asafe' print(page_url) img_addrs = find_imgs(page_url) save_imgs(folder,img_addrs) if __name__=='__main__': download_mm()
  • 相关阅读:
    迭代器和生成器
    案例:复制大文件
    案例:使用seek倒查获取日志文件的最后一行
    Leetcode165. Compare Version Numbers比较版本号
    Leetcode137. Single Number II只出现一次的数字2
    Leetcode129. Sum Root to Leaf Numbers求根到叶子节点数字之和
    Leetcode116. Populating Next Right Pointers in Each Node填充同一层的兄弟节点
    Leetcode114. Flatten Binary Tree to Linked List二叉树展开为链表
    Leetcode113. Path Sum II路径总和2
    C++stl中vector的几种常用构造方法
  • 原文地址:https://www.cnblogs.com/chenyang920/p/4906128.html
Copyright © 2011-2022 走看看