zoukankan      html  css  js  c++  java
  • [深田咏美]用Python在新浪上获取深田图片

    目的:

      馋她

    效果:

    实现:

     1 from Common import Common
     2 import requests
     3 
     4 class Picture(Common):
     5     """
     6     title:Python爬取新浪深田图片
     7     time:2020/03/08 15:57
     8     """
     9     def get_imgs(self):
    10         url = 'https://weibo.com/232655234?profile_ftype=1&is_all=1#1583650565067'
    11         #创建打开Chrome()对象,访问url
    12         self.open_url(url)
    13         #向下滚动加载更多
    14         self.scroll_top(0,800,20)
    15         #获取图片标题
    16         img_title = self.get_text("xpath",'//*[@id="Pl_Official_Headerv6__1"]/div[1]/div/div[2]/div[2]/h1')
    17         #获取图片列表
    18         img_list = self.locateElement("xpath_s",'//*[@id="Pl_Official_MyProfileFeed__20"]/div/div[*]/div[*]/div[*]/div[*]/div/ul/li[*]/img')
    19         for index, item in enumerate(img_list):
    20             # 获取列表src属性中的href属性
    21             item_href = item.get_attribute("src")
    22             # 导入访问图片的src
    23             res = requests.get(item_href)
    24             # 定义生成图片的名称
    25             img_name = img_title+"[" + str(index+1) + "].jpg"
    26             # 生成图片
    27             print(img_name+"开始写入----"+item_href)
    28             with open('./image/'+img_name, 'wb') as f:
    29                 f.write(res.content)
    30             print(img_name+""+str(index+1)+"张图片下载完成")
    31 
    32 if __name__ == '__main__':
    33     mn = Picture()
    34     mn.get_imgs()

    公共:

     1 from selenium import webdriver
     2 from time import sleep
     3 
     4 class Common(object):
     5     def __init__(self):
     6         self.chrome = webdriver.Chrome()
     7         self.chrome.implicitly_wait(5)
     8         self.chrome.maximize_window()
     9 
    10     def open_url(self,url):
    11         self.chrome.get(url)
    12         self.chrome.implicitly_wait(10)
    13 
    14     def locateElement(self,locate_type,value):
    15         le = None
    16         if locate_type == 'css':
    17             el = self.chrome.find_element_by_css_selector(value)
    18         elif locate_type == 'css_s':
    19             el = self.chrome.find_elements_by_css_selector(value)
    20         elif locate_type == 'name':
    21             el = self.chrome.find_element_by_name(value)
    22         elif locate_type == 'name_s':
    23             el = self.chrome.find_elements_by_name(value)
    24         elif locate_type == 'class':
    25             el = self.chrome.find_element_by_class_name(value)
    26         elif locate_type == 'class_s':
    27             el = self.chrome.find_elements_by_class_name(value)
    28         elif locate_type == 'id':
    29             el = self.chrome.find_element_by_id(value)
    30         elif locate_type == 'tag':
    31             el = self.chrome.find_element_by_tag_name(value)
    32         elif locate_type == 'tag_s':
    33             el = self.chrome.find_elements_by_tag_name(value)
    34         elif locate_type == 'xpath':
    35             el = self.chrome.find_element_by_xpath(value)
    36         elif locate_type == 'xpath_s':
    37             el = self.chrome.find_elements_by_xpath(value)
    38         elif locate_type == 'text':
    39             el = self.chrome.find_element_by_link_text(value)
    40         elif locate_type == 'text_s':
    41             el = self.chrome.find_elements_by_link_text(value)
    42         elif locate_type == 'liketext':
    43             el = self.chrome.find_element_by_partial_link_text(value)
    44         elif locate_type == 'liketext_s':
    45             el = self.chrome.find_elements_by_partial_link_text(value)
    46 
    47         if el is not None:
    48             return el
    49 
    50     def click(self,locate_type,value):
    51         el = self.locateElement(locate_type,value)
    52         el.click()
    53 
    54     def input_data(self,locate_type,value,date):
    55         el = self.locateElement(locate_type,value)
    56         el.send_keys(date)
    57 
    58     def get_text(self,locate_type,value):
    59         el = self.locateElement(locate_type,value)
    60         return el.text
    61 
    62     def get_attr(self,locate_type,value,attrname):
    63         el = self.locateElement(locate_type,value)
    64         return el.get_attribute(attrname)
    65 
    66     def scroll_top(self,x,y,num):
    67         index = y
    68         sleep(5)
    69         for item in range(num):
    70             print(index)
    71             js = "window.scrollTo("+str(x)+","+str(index)+")"
    72             self.chrome.execute_script(js)
    73             index=index+y
    74             sleep(0.5)
    75 
    76     def close_win(self):
    77         self.chrome.quit()
    78 
    79     def __del__(self):
    80         sleep(1)
    81         self.chrome.close()
  • 相关阅读:
    Chelsio T520 T420安装iSER
    Chelsio T520 T420开启RDMA-NFS
    Chelsio T520 T420开启RDMA(服务器&&客户端)
    [JAR包] android引入JAR包,打包成JAR包,打包成Library项目,导入Library项目
    android EditText中inputType的属性列表
    JavaSwing标准对话框
    Android反编译与防止反编译
    spinner的使用
    Content Provider基础
    字体颜色添加阴影效果
  • 原文地址:https://www.cnblogs.com/lfotest/p/12442971.html
Copyright © 2011-2022 走看看