zoukankan      html  css  js  c++  java
  • python爬虫之一---------豆瓣妹子图

     1 #-*- coding:utf-8 -*-
     2 __author__ = "carry"
     3 import urllib
     4 import urllib2
     5 from bs4 import BeautifulSoup
     6 
     7 
     8 url = 'http://www.dbmeinv.com/?pager_offset=1'
     9 x = 1
    10 def crawl(url):
    11     headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    12     req = urllib2.Request(url,headers=headers)
    13     page = urllib2.urlopen(req,timeout=20)
    14     contents = page.read()
    15     #print (contents.decode('utf-8'))
    16     soup = BeautifulSoup(contents,'html.parser')
    17     my_girl = soup.find_all('img')
    18     #print my_girl
    19     for girl in my_girl:
    20         link = girl.get('src')
    21         print link
    22         global x
    23         urllib.urlretrieve(link,'image\%s.jpg'%x)
    24         print("正在下载第%s张"%x)
    25         x +=1
    26 
    27 for page in range(1,20):
    28     url = 'http://www.dbmeinv.com/?pager_offset=%d'%page
    29     crawl(url)
    30 print("图片下载完毕")
  • 相关阅读:
    Linux centos7修改根目录
    gitlab的安装
    windows上svn图标不显示 绿色对号
    java中的Serializable接口
    List Map Set的线程安全
    javascript中的each遍历
    jdk 1.7新特性
    jdk 1.6 新特性
    jdk1.5 新特性
    java 运算符
  • 原文地址:https://www.cnblogs.com/lxs1314/p/7102099.html
Copyright © 2011-2022 走看看