zoukankan      html  css  js  c++  java
  • 第一个爬虫小程序

     1 import requests,os,urllib,urllib.request
     2 from bs4 import BeautifulSoup as be
     3 
     4 
     5 path = '/users/2018/desktop/'#设置路径
     6 new_file_name = 'tylor swift'#设置文件名
     7 urls = 'https://weheartit.com/inspirations/taylorswift?page='
     8 
     9 
    10 
    11 new_path = os.path.join(path,new_file_name)
    12 if not os.path.isdir(new_path):
    13         os.makedirs(new_path)
    14 
    15 #根据页码创建路径
    16 def build_path(name):
    17     paths = os.path.join(new_path, str(name))
    18     if not os.path.isdir(paths):
    19         os.makedirs(paths)
    20     return paths
    21 
    22 #图片写入路径
    23 def img_file(url,page_num):
    24     web_data = requests.get(url)
    25     soup = be(web_data.text,'lxml')
    26     i=0
    27     for link in soup.find_all("img",class_='entry-thumbnail'):
    28         i=i+1
    29         img_addr=link.get('src')
    30         img_content = requests.get(img_addr).content
    31         img_name = str(i)+'.jpg'
    32 
    33         with open(build_path(page_num)+'/'+img_name,'wb') as write_file:
    34             write_file.write(img_content)
    35     print(i)
    36 
    37 #获取不同页码
    38 def img_files(start,end):
    39     for page_num in range(start,end):
    40         real_urls = urls+str(page_num)
    41 
    42         img_file(real_urls,page_num)
    43 
    44 
    45 
    46 img_files(1,10)

     

  • 相关阅读:
    把文本数据转化为json
    componentsSeparatedByString 的注意事项
    内存管理
    审核问题2.3.1
    H5缩放效果的问题和缓存问题
    iOS库
    DDOS 攻击防范
    连接数过多的问题
    nginx 长连接keeplive
    javascript 判断身份证的正确性
  • 原文地址:https://www.cnblogs.com/yangmingustb/p/8528067.html
Copyright © 2011-2022 走看看