zoukankan      html  css  js  c++  java
  • 实现抓图的工具

    实现抓图的工具

    #encoding:UTF-8

    import urllib
    import urllib2
    import re
    import os
    from BeautifulSoup import BeautifulSoup

    def GetUrlContent(url,path):
    #url = "http://www.2cto.com/meinv/sexmv/"
    req = urllib2.urlopen(url)
    content = req.read()
    soup = BeautifulSoup(content)
    # print soup.pret()
    #查找左右链接,并且不含title属性
    alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
    i = 0
    for a in alinks:
    surl = a['href']
    print surl
    GetUrl(surl,path)
    print " "
    print " "
    #print surl

    def createFileWithFileName(localPathParam,fileName):
    totalPath=localPathParam+'\'+fileName
    if not os.path.exists(totalPath):
    file=open(totalPath,'a+')
    file.close()
    return totalPath

    def GetFileName(url):
    sFilename=os.path.basename(url)
    return sFilename

    def GetUrl(myUrl,localPath):
    #url = "http://www.2cto.com/meinv/sexmv/1819.html"
    try:
    req = urllib2.urlopen(myUrl,None,5)
    content = req.read()
    soup = BeautifulSoup(content)
    alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})

    for d in alinks:
    imgUrl=d["src"]
    print imgUrl
    fileName=GetFileName(imgUrl)
    print fileName
    urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
    except Exception,e:
    print "Error"

    if __name__=='__main__':
    #GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
    print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")
  • 相关阅读:
    Go语言基础(四)
    Go语言基础(一)
    Go语言简介和开发环境搭建
    数据分析(四)数据可视化之Matplotlib
    数据分析(三)Pandas
    git简介与基本操作
    drf框架之三大认证之频率类源码分析
    数据分析(二)numpy
    换零钱问题的非递归解法 SICP 1.2.2中的一个问题
    # Java中的代理类
  • 原文地址:https://www.cnblogs.com/djcsch2001/p/5374247.html
Copyright © 2011-2022 走看看