zoukankan      html  css  js  c++  java
  • selenium爬取拉勾网招聘信息

    from urllib import request
    import requests
    import time
    import pymysql
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    db=pymysql.connect('localhost','root','123qwe','ok')
    cur=db.cursor()
    #sql='create table lagou(name varchar(50),salary varchar(10),tiao varchar(10),company varchar(20),skill varchar(30),daiyu varchar(30))'
    #cur.execute(sql)
    url='https://www.lagou.com/'
    #opt = webdriver.chrome.options.Options()
    #opt.set_headless()
    broswer=webdriver.Chrome()
    broswer.get(url)
    broswer.find_element_by_id("cboxClose").click()
    time.sleep(1)
    ok=broswer.find_element_by_xpath('//input[@id="search_input"]')
    broswer.execute_script("arguments[0].click();", ok)
    broswer.find_element_by_xpath('//input[@id="search_input"]').send_keys('python')
    op=broswer.find_element_by_id('search_button')
    broswer.execute_script("arguments[0].click();",op)
    k=1
    while k <= 30:
      print(k)
      time.sleep(2)
      name=broswer.find_elements_by_xpath('//a[@class="position_link"]/h3')
      classd=broswer.find_elements_by_xpath('//div[@class="p_bot"]/div[@class="li_b_l"]')
      comany=broswer.find_elements_by_xpath('//div[@class="company_name"]/a')
      skill=broswer.find_elements_by_xpath('//div[@class="list_item_bot"]/div[@class="li_b_l"]')
      daiyu=broswer.find_elements_by_xpath('//div[@class="list_item_bot"]/div[@class="li_b_r"]')
      for i in range(len(name)):
          sql='insert into lagou(name,salary,tiao,company,skill,daiyu) values(%s,%s,%s,%s,%s,%s)'
          value=(name[i].text,classd[i].text.split(' ')[0],classd[i].text.split(' ')[1],comany[i].text,skill[i].text,daiyu[i].text)
          cur.execute(sql,value)
          db.commit()
      time.sleep(1)
      js="var q=document.documentElement.scrollTop=3000"
      broswer.execute_script(js)#下拉滚动条
      time.sleep(1)
      pages = broswer.find_element_by_xpath('//*[@id="s_position_list"]/div[2]/div/span[6]')
      ActionChains(broswer).move_to_element(pages).perform()
      try:
          next = WebDriverWait(broswer, 10).until(
              EC.element_to_be_clickable((By.CLASS_NAME, 'pager_next '))
          )
          next.click()
      except TimeoutException:
          next_page()#点击下一页非常关键,用平时click失效的方法,会从第3页直接跳到最后一页
      k=k+1
    cur.close()
    broswer.close()

  • 相关阅读:
    C#多线程中lock的用法
    杭州交通违规处理地点大全
    J2me c/s结构,访问数据库
    谁将成为下一个Twitter?最新最酷的创业奇思妙想
    那些相见恨晚的 JavaScript 技巧
    Adobe Dreamweaver CS5试用点滴感受
    python百炼成钢实例008输出 9*9 乘法口诀表
    python百炼成钢实例006斐波那契数列输出最后一个和整个数列,迭代
    python百炼成钢实例007复制,浅拷贝和深拷贝
    python百炼成钢实例080猴子分桃
  • 原文地址:https://www.cnblogs.com/persistence-ok/p/11701327.html
Copyright © 2011-2022 走看看