python小练——下载指定url中的图片

zoukankan html css js c++ java

python小练——下载指定url中的图片

#coding=gbk
#download pictures of the url
#useage: python downpicture.py www.baidu.com

import os
import sys
from html.parser import HTMLParser
from urllib.request import urlopen
from urllib.parse import urlparse

def getpicname(path):
    '''    retrive filename of url        '''
    if os.path.splitext(path)[1] == '':
        return None
    pr=urlparse(path)
    path='http://'+pr[1]+pr[2]
    return os.path.split(path)[1]

def saveimgto(path, urls):
    '''
    save img of url to local path
    '''
    if not os.path.isdir(path):
        print('path is invalid')
        sys.exit()
    else:
        for url in urls:
            of=open(os.path.join(path, getpicname(url)), 'w+b')
            q=urlopen(url)
            of.write(q.read())
            q.close()
            of.close()

class myhtmlparser(HTMLParser):
    '''put all src of img into urls'''
    def __init__(self):
        HTMLParser.__init__(self)
        self.urls=list()
        self.num=0
    def handle_starttag(self, tag, attr):
        if tag.lower() == 'img':
            srcs=[u[1] for u in attr if u[0].lower() == 'src']
            self.urls.extend(srcs)
            self.num = self.num+1

if __name__ == '__main__':
    url=sys.argv[1]
    if not url.startswith('http://'):
        url='http://' + sys.argv[1]
    parseresult=urlparse(url)
    domain='http://' + parseresult[1]

    q=urlopen(url)
    content=q.read().decode('utf-8', 'ignore')
    q.close()

    myparser=myhtmlparser()
    myparser.feed(content)

    for u in myparser.urls:
        if (u.startswith('//')):
            myparser.urls[myparser.urls.index(u)]= 'http:'+u
        elif u.startswith('/'):
            myparser.urls[myparser.urls.index(u)]= domain+u

    saveimgto(r'D:\python\song', myparser.urls)
    print('num of download pictures is {}'.format(myparser.num))

　　result：

　　num of download pictures is 19

大龄程序猿，分享互联网开发相关知识！前端、后端，架构等内容，欢迎关注公众号 chengxuyuangangzi

查看全文

相关阅读:
在IIS中设置默认网页
 vim 查找匹配字符串次数
 resource about NLP
Mongodb基本知识和常用语法
 fudanNLP keyword Extraction
nltk support chinese by sinica
Gmail小技巧：只显示未读邮件
 java get line number and file name
java classpath import package 机制
 linux 调用 fudanNLP

原文地址：https://www.cnblogs.com/lyroge/p/2145978.html