zoukankan      html  css  js  c++  java
  • 多线程下载图片

    1. # -*- coding:utf8 -*-
    2. from bs4 import BeautifulSoup
    3. import os, sys, urllib2, urllib
    4. import thread, threading
    5. class downloader(threading.Thread):
    6. """docstring for downloader"""
    7. def __init__(self, url, name):
    8. threading.Thread.__init__(self)
    9. self.url = url
    10. self.name = name
    11. def run(self):
    12. print 'downling from %s' % self.url
    13. urllib.urlretrieve(self.url, self.name)
    14. threads=[]
    15. def page_loop(page=1):
    16. url = 'http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s' % page
    17. content = urllib2.urlopen(url)
    18. soup = BeautifulSoup(content)
    19. my_girl = soup.find_all('img')
    20. global x
    21. # 加入结束检测,写的不好....
    22. if len(my_girl) <5:
    23. print '已经全部抓取完毕'
    24. sys.exit(0)
    25. print '开始抓取'
    26. for girl in my_girl:
    27. link = girl.get('src')
    28. if 'jpg' in link:
    29. flink = 'http://www.beautylegmm.com' + link
    30. print flink
    31. path = 'dbmeizi'+'/' + str(x) + flink[-4:]
    32. x = x + 1
    33. t = downloader(flink, path)
    34. threads.append(t)
    35. t.start()
    36. # content2 = urllib2.urlopen(flink).read()
    37. # with open('dbmeizi'+'/' + str(x) + flink[-4:],'wb') as code:
    38. # code.write(content2)
    39. page = int(page) + 1
    40. print '开始抓取下一页'
    41. print 'the %s page' % page
    42. page_loop(page)
    43. x = 1
    44. page_loop()





  • 相关阅读:
    Winsock 2 入门指南
    Winsock 2 入门指南
    [手游新项目历程]-40-linux环境实现C/C++程序崩溃退出时打印栈信息
    1月下旬解题
    poj1226,poj3080
    poj3666
    poj3067
    poj12月其他题解(未完)
    poj1823,3667
    poj2352
  • 原文地址:https://www.cnblogs.com/highroom/p/cbb0d977a78d35dac83bd56f5d08f61c.html
Copyright © 2011-2022 走看看