zoukankan      html  css  js  c++  java
  • 第三十二节 selenium爬取拉勾网

     1 from selenium import webdriver
     2 from selenium.webdriver.chrome.options import Options
     3 from selenium.webdriver.common.by import By
     4 from lxml import etree
     5 import time
     6 
     7 
     8 class lagou():
     9 
    10     def __init__(self):
    11         self.browers_path = r'C:UsersAdministratorAppDataLocal360ChromeChromeApplication360chrome.exe'
    12         self.chrome_option = Options()
    13         self.chrome_option.binary_location = self.browers_path
    14         self.driver = webdriver.Chrome(options=self.chrome_option)
    15         self.url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
    16 
    17     def run(self):
    18         all_list = []
    19         sourse = self.driver.get(self.url)
    20         elementi= self.driver.find_element_by_xpath('//div[@class="body-btn"]')
    21         elementi.click()
    22         time.sleep(5)
    23         sourse = self.driver.page_source
    24         self.parse_path(sourse)
    25         # all_list.extend(s)
    26         # return all_list
    27 
    28     def parse_path(self,sourse):
    29         position_list = []
    30         html = etree.HTML(sourse)
    31         links = html.xpath('//a[@class="position_link"]//@href')
    32         for link in links:
    33             self.xiangqingye(link)
    34         #     position_list.append(h)
    35         # return position_list
    36 
    37     def xiangqingye(self,ur):
    38         # self.driver.execute_script("window.open('https://www.lagou.com/jobs/6232081.html?show=23decdaefd344a719f63cfa436b063b8')" )
    39         self.driver.execute_script("window.open('%s')"%ur)
    40         self.driver.switch_to.window(self.driver.window_handles[1])
    41         time.sleep(2)
    42         xiang_source = self.driver.page_source
    43         html = etree.HTML(xiang_source)
    44         job_resqust_spans = html.xpath('//dd[@class="job_request"]//span')
    45         salary = job_resqust_spans[0].xpath('.//text()')[0].strip()
    46         city = job_resqust_spans[1].xpath('.//text()')[0].strip()
    47         workyears = job_resqust_spans[2].xpath('.//text()')[0].strip()
    48         position = {
    49             'salary':salary,
    50             'city':city,
    51             'workyears':workyears
    52         }
    53         print(position)
    54         time.sleep(1)
    55         self.driver.close()
    56         self.driver.switch_to.window(self.driver.window_handles[0])
    57         # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
    58 
    59 if __name__ == '__main__':
    60     lagou_shili = lagou()
    61     q = lagou_shili.run()
    62     print(q)
    63 
    64     # browers_driver.execute_script('window.open("https://www.douban.com")')
    65     # window_list = browers_driver.window_handles#获取窗口列表
    66     # browers_driver.switch_to.window(window_list[1])#将browers_driver的指针转移到指定的窗口
    67     # print(browers_driver.current_url)#打印browers_driver指向的窗口网址
  • 相关阅读:
    PHP快速入门
    redis命令_ZREVRANGEBYSCORE
    redis命令_ZRANGE
    redis命令_ZREM
    redis命令_ZINCRBY
    redis命令_ZADD
    redis命令_SETEX
    编译过程的一点心得
    关于c语言中的program_invocation_short_name
    关于toolchain(工具链)的一点知识
  • 原文地址:https://www.cnblogs.com/kogmaw/p/12507168.html
Copyright © 2011-2022 走看看