zoukankan      html  css  js  c++  java
  • python 批量下载 spring 的 xsd

    #coding=utf-8
    
    import os
    import urllib
    import urllib2
    import re
    from bs4 import BeautifulSoup
    # 利用 urllib.urlretrieve() 函数进行下载。非常方便
    
    import socket
    #超时时间
    socket.setdefaulttimeout(5)
    
    # 一个空文件夹
    basedir=r"E:spring".decode('utf-8')
    os.chdir(basedir)
    host="http://www.springframework.org"
    
    def getFolderName(url):
        pattern = re.compile(r'.*/(.*)')
        match = pattern.match(url)
        # 返回最后一个 / 后面的字符
        if match:
            return match.group(1)
    
    
    # 存储spring 的集合
    list = ['http://www.springframework.org/schema/beans',
            'http://www.springframework.org/schema/aop',
            'http://www.springframework.org/schema/mvc',
            'http://www.springframework.org/schema/p',
            'http://www.springframework.org/schema/context',
            'http://www.springframework.org/schema/tx'
            ]
    
    # 获取页面中的 文件名称
    def getFilesByUrl(url):
        #返回列表
        list=[]
        try:
            html = urllib2.urlopen(url)
        except Exception as err:
            print err
        soup = BeautifulSoup(html,"lxml")
        for link in soup.find_all('a'):
            fileName = link['href']
            if not fileName.startswith('/') and (fileName.endswith('xsd') or fileName.endswith('/')):
                list.append(fileName)
        return list
    
    list = getFilesByUrl("http://www.springframework.org/schema")
    print list
    for url in list:
        print url[:-1]
        url = "http://www.springframework.org/schema/"+url[:-1]
        folder = getFolderName(url)
        print folder
        if not os.path.exists(folder):
            os.mkdir(folder)
        files = getFilesByUrl(url)
        print files
        for fileName in files:
            try:
                print url+"/"+fileName
                urllib.urlretrieve(url+"/"+fileName,folder+"/"+fileName)
            except Exception as err:
                print err
  • 相关阅读:
    软件工程—附加作业
    软件工程最终总结
    电梯调度(两人结对)
    VS单元测试
    第二周作业(2,3题)
    VS的安装
    补救
    漂亮男孩不说谎
    博客带我成长
    Java后缀数组-求sa数组
  • 原文地址:https://www.cnblogs.com/whm-blog/p/7168224.html
Copyright © 2011-2022 走看看