1、环境python2.7+selenium+PhantomJS(软件安装和库的安装网上都有教程我们跳过,so easy)
2、原理
绕过首页登录需要验证码,直接进入搜索栏,输入搜索的职位+地区搜索出职位列表,点击全选,选择该页的所有工作,当然培训的也选起了,未做筛选的工作,亲们可以加上,第一次提交不需要验证码,第二次提交就需要验证码了,手动输入,验证码输入用的raw_input,输入之后程序继续运行,往复操作,总的来说也不智能,需要修改的地方很多,就当大家了解下selenium+PhantomJS的功能。
3、贴上代码
#!/usr/bin/Python # -*- coding: utf-8 -*- import os from selenium import webdriver import urllib2 import time from os import path import image d = path.dirname(__file__) class Zhilian: def __init__(self): self.arr = [] print 1 def main(self, browser): '''函数主体''' try: for v in browser.find_elements_by_class_name('newlist'): job_name = v.find_element_by_class_name('zwmc').text price = v.find_element_by_class_name('zwyx').text print job_name+'-'+price except: print 'select error!!' time.sleep(2) ###获取用户信息 user_name = browser.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div[1]').text try: user_name = browser.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div[1]').text if user_name ==u'登录注册': is_logined = 0 else: is_logined = 1 except: print 'head error!!' try: browser.find_element_by_xpath('//*[@id="checkbox4al2"]').click() browser.find_element_by_xpath('//*[@id="newlist_list_div"]/p[1]/a[1]').click() except: print 'error' time.sleep(2) ####第一次需要执行登录 print is_logined test(browser) if is_logined == 0: try: time.sleep(3) ####开始输入你的用户名和密码 name = raw_input('please input your zhilian user_name: ') pwd = raw_input('Please input your zhilian password: ') browser.find_element_by_xpath('//*[@id="simplaceholder"]').clear() browser.find_element_by_xpath('//*[@id="simplaceholder"]').send_keys(name) browser.find_element_by_xpath('//*[@id="loginBlock"]/form/ul/li[3]/label/input').clear() browser.find_element_by_xpath('//*[@id="loginBlock"]/form/ul/li[3]/label/input').send_keys(pwd) browser.find_element_by_xpath('//*[@id="submitBlock"]/div[1]/a').click() print 'login success!!' except: print "login false or or this account is online!!" time.sleep(3) ##第二次开始需要填入验证码 try: if browser.find_element_by_xpath('//*[@id="validate"]'): ###开始截图 browser.get_screenshot_as_file(path.join(d,'1.png')) ###自动打开截图 ####根绝截图输入验证码 os.system('start E:\test\zhilian\1.png') ###开始输入验证码 code = raw_input('please input the code: ') ###将验证码放入输入框 browser.find_element_by_xpath('//*[@id="validate"]').clear() browser.find_element_by_xpath('//*[@id="validate"]').send_keys(code) browser.find_element_by_xpath('//*[@id="applynowbutton"]').click() print 'apply success!!' except: print 'code error!!' ###开始查询下一页 print 'We can select next page!!' next_page = browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a') if next_page: browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a').click() self.main(browser) ''' try: next_page = browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a') if next_page: self.main(city,job,page) except: print 'this is the last page!!' ''' def check(): job = raw_input('please input the job name which you want to select: ') city = raw_input('please input the city name which you want to select: ') answer = raw_input('Do you sure the city name is "'+city+'" and the job name is "'+job+'", please input yes or no to check: ') if answer =='yes': ###确认无误 list = [] list.append(city) list.append(job) else: check() if list: return list def test(browser): '''测试函数''' ###开始截图 browser.get_screenshot_as_file(path.join(d,'1.png')) ###自动打开截图 ####根绝截图输入验证码 os.system('start E:\test\zhilian\1.png') if __name__ == '__main__': zhilian = Zhilian() ###设置关键字 list = check() page=1 city = list[0].decode('gbk') job = list[1].decode('gbk') browser = webdriver.PhantomJS('E:\p_python\Scripts\phantomjs\bin\phantomjs.exe') #browser.get('http://www.baidu.com') browser.get('http://sou.zhaopin.com/jobs/searchresult.ashx') time.sleep(3) ####开始模拟搜索条件 try: browser.find_element_by_xpath('//*[@id="KeyWord_kw2"]').clear() browser.find_element_by_xpath('//*[@id="KeyWord_kw2"]').send_keys(job) browser.find_element_by_xpath('//*[@id="JobLocation"]').clear() browser.find_element_by_xpath('//*[@id="JobLocation"]').send_keys(city) browser.find_element_by_xpath('//*[@id="searchForm"]/form/div[6]/button').click() except: print 'input select_info error!!' time.sleep(2) zhilian.main(browser) #test()