zoukankan      html  css  js  c++  java
  • python3下爬取网页上的图片的爬虫程序

     1 import urllib.request
     2 import re
     3 #py抓取页面图片并保存到本地
     4 
     5 #获取页面信息
     6 def getHtml(url):
     7     html = urllib.request.urlopen(url).read()
     8     return html
     9 
    10 #通过正则获取图片
    11 def getImg(html):
    12     reg = 'src="(.+?.jpg)" pic_ext'
    13     imgre = re.compile(reg)
    14     imglist = re.findall(imgre,html)
    15    # print(imglist)
    16     return imglist
    17 
    18 html = getHtml("http://*****")
    19 
    20 list=getImg(html.decode())
    21 
    22 #循环把图片存到本地
    23 x = 0
    24 for imgurl in list:
    25     print(x)
    26     urllib.request.urlretrieve(imgurl,'d:\%s.jpg'% x)
    27     x+=1
    28 
    29 print("done")
    

    指定网页获取图片并保存到AWS_s3

     1 import boto3
     2 import urllib.request
     3 
     4 
     5 def lambda_handler(request, context):
     6     #download_url = "https://s3.amazonaws.com/testforcustomerservice/192x192.png"
     7     download_url = "https://gss2.bdstatic.com/-fo3dSag_xI4khGkpoWK1HF6hhy/baike/s%3D220/sign=3707d191fa03738dda4a0b20831bb073/279759ee3d6d55fb3cfdd81761224f4a20a4ddcc.jpg"
     8     list = download_url.split('/')
     9     upload_key = list[len(list)-1]
    10     response = urllib.request.urlopen(url=download_url)
    11     context = response.read()
    12     #print(context)
    13     bucket = "testforcustomerservice"
    14     s3 = boto3.resource("s3")
    15     file_obj = s3.Bucket(bucket).put_object(Key=upload_key, Body=context)
    16     print(file_obj)
    17     response = {
    18         "url": "https://s3.amazonaws.com/testforcustomerservice/"+upload_key
    19     }
    20     return response
  • 相关阅读:
    python学习2(转载)
    python学习1
    第三方授权登录
    新浪微博开发者平台应用申请及配置说明
    新浪微博登录第三方网站实现原理是什么?
    Redis中PipeLine使用
    发送短信验证码的功能开发
    Python celery原理及运行流程
    设置django缓存+验证码图片接口
    编写注册接口
  • 原文地址:https://www.cnblogs.com/sen-c7/p/9780225.html
Copyright © 2011-2022 走看看