zoukankan      html  css  js  c++  java
  • python爬虫之一---------豆瓣妹子图

     1 #-*- coding:utf-8 -*-
     2 __author__ = "carry"
     3 import urllib
     4 import urllib2
     5 from bs4 import BeautifulSoup
     6 
     7 
     8 url = 'http://www.dbmeinv.com/?pager_offset=1'
     9 x = 1
    10 def crawl(url):
    11     headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    12     req = urllib2.Request(url,headers=headers)
    13     page = urllib2.urlopen(req,timeout=20)
    14     contents = page.read()
    15     #print (contents.decode('utf-8'))
    16     soup = BeautifulSoup(contents,'html.parser')
    17     my_girl = soup.find_all('img')
    18     #print my_girl
    19     for girl in my_girl:
    20         link = girl.get('src')
    21         print link
    22         global x
    23         urllib.urlretrieve(link,'image\%s.jpg'%x)
    24         print("正在下载第%s张"%x)
    25         x +=1
    26 
    27 for page in range(1,20):
    28     url = 'http://www.dbmeinv.com/?pager_offset=%d'%page
    29     crawl(url)
    30 print("图片下载完毕")
  • 相关阅读:
    14-定时器
    13-JS中的面向对象
    12-关于DOM操作的相关案例
    11-DOM介绍
    10-关于DOM的事件操作
    09-伪数组 arguments
    08-函数
    07-常用内置对象
    06-流程控制
    05-数据类型转换
  • 原文地址:https://www.cnblogs.com/lxs1314/p/7102099.html
Copyright © 2011-2022 走看看