import requests import re session = requests.session() # 第一步:访问登陆页,拿到X_Anti_Forge_Token,X_Anti_Forge_Code # 1、请求url:https://passport.lagou.com/login/login.html # 2、请求方法:GET # 3、请求头: # User-agent r1 = session.get('https://passport.lagou.com/login/login.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0] X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0] # print(X_Anti_Forge_Token,X_Anti_Forge_Code) # 第二步:登陆 # 1、请求url:https://passport.lagou.com/login/login.json # 2、请求方法:POST # 3、请求头: # cookie # User-agent # Referer:https://passport.lagou.com/login/login.html # X-Anit-Forge-Code:53165984 # X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 # X-Requested-With:XMLHttpRequest # 4、请求体: # isValidate:true # username:18611453110 # password:70621c64832c4d4d66a47be6150b4a8e # request_form_verifyCode:'' # submit:'' r2 = session.post('https://passport.lagou.com/login/login.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ "isValidate": True, 'username': 'xxxxxxxxxxx', 'password': 'xxxxxxxxxxxxxxxxxxx', #加密后的密码 'request_form_verifyCode': '', 'submit': '' } ) # 第三步:授权 # 1、请求url:https://passport.lagou.com/grantServiceTicket/grant.html # 2、请求方法:GET # 3、请求头: # User-agent # Referer:https://passport.lagou.com/login/login.html r3 = session.get('https://passport.lagou.com/grantServiceTicket/grant.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', } ) # 第四步:验证 r4 = session.get('https://www.lagou.com/resume/myresume.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } ) # print('18611453110' in r4.text) # 第五步:筛选职位信息 # 请求url:https://www.lagou.com/jobs/list_java%E9%AB%98%E7%BA%A7%E5%BC%80%E5%8F%91 # 请求方法:GET # 请求头: # User-Agent # 请求参数: # gj:3年及以下 # px:default # yx:25k-50k # city:北京 from urllib.parse import urlencode res = urlencode({'k': 'java高级开发'}, encoding='utf-8').split('=')[-1] url = 'https://www.lagou.com/jobs/list_' + res # # r5 = session.get(url, # headers={ # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', # }, # params={ # 'gj': '3年及以下', # 'px': 'default', # 'yx': '25k-50k', # 'city': '北京' # } # ) # # print(r5.text) #请求url:https://www.lagou.com/jobs/positionAjax.json #请求方法:POST #请求头 # Referer # User-Agent #请求体: # first:true # pn:1 # kd:java高级开发 #请求参数 # params={ # 'gj': '3年及以下', # 'px': 'default', # 'yx': '25k-50k', # 'city': '北京', # 'needAddtionalResult':False, # 'isSchoolJob':0 # } r6=session.post('https://www.lagou.com/jobs/positionAjax.json', headers={ 'Referer':url, 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, data={ 'first':True, 'pn':2, 'kd':'java高级开发' }, params={ 'gj': '3年及以下', 'px': 'default', 'yx': '25k-50k', 'city': '北京', 'needAddtionalResult': False, 'isSchoolJob': 0 } ) from pprint import pprint # print(r6.json()) comapines_list=r6.json()['content']['positionResult']['result'] for comapiny in comapines_list: positionId=comapiny['positionId'] company_link='https://www.lagou.com/jobs/{pos_id}.html'.format(pos_id=positionId) companyShortName = comapiny['companyShortName'] positionName = comapiny['positionName'] salary = comapiny['salary'] print(''' 详情连接:%s 公司名:%s 职位名:%s 薪资:%s ''' %(company_link,companyShortName,positionName,salary)) #第七步:访问详情页,拿到X_Anti_Forge_Token,X_Anti_Forge_Code # 请求url:详情页地址 # 请求方式:GET # 请求头:User-Agent r7=session.get(company_link, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r7.text, re.S)[0] X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r7.text, re.S)[0] # print(X_Anti_Forge_Token,X_Anti_Forge_Code) #第八步:投递简历 #请求url:https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json #请求方式:POST #请求头: #Referer:详情页地址 #User-agent #X-Anit-Forge-Code:53165984 #X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 #X-Requested-With:XMLHttpRequest #请求体: # positionId:职位ID # type:1 # force:true session.post('https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': company_link, 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ 'positionId':positionId, 'type':1, 'force':True } ) print('%s 投递成功' %(companyShortName))
# -*- coding: utf-8 -*- import re import requests session = requests.session() # 第一步,访问51job登录页面 # 请求url: https://login.51job.com/login.php # 请求方式:get # 请求头 # User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 r1 = session.get("https://login.51job.com/login.php", headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36" }) # 第二部:提交post请求 # 请求url https://login.51job.com/login.php # 请求方式post # 请求头:Referer:https://login.51job.com/login.php User-Agent # 请求体: # lang:c # action:save # from_domain:i # loginname:17343037825 # password:zhang123456 # verifycode: # isread:on r2 = session.post("https://login.51job.com/login.php", headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", "Referer": "https://login.51job.com/login.php" }, data={ "lang": "c", "action": "save", "from_domain": "i", "loginname": "xxxxxxxxx", "password": "xxxxxxxxx", "verifycode": "", "isread": "on", }) # 第三部: # 请求url:http://my.51job.com/my/My_login_trace.php # 请求方式:get # 请求头:user_agent r3 = session.get("http://my.51job.com/my/My_login_trace.php", headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", }) # 第四部:登录 # 请求url:http://i.51job.com/userset/my_51job.php # 请求方式;get # 请求头: r4 = session.get("http://i.51job.com/userset/my_51job.php", headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", }) # r4.encoding = "gbk" # 此时已经成功登录51job # print("张建平" in r4.text) from urllib.parse import urlencode #url = "http://search.51job.com/jobsearch/search_result.php?" # 第五步:获取到某个职业的列表 # 请求方式get # 请求头 url_a = "http://search.51job.com/list/010000,000000,0000,00,9,99,Python%2B%25E5%2589%258D%25E7%25AB%25AF,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=" r5 = session.get(url_a, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", }, ) r5.encoding = "gbk" res_id = re.findall('name="delivery_jobid" value="(.*?)"', r5.text, re.S) for res in res_id: # 第六步:获取到某个职位的详细信息 # 请求方式get # 请求头: url_detal = "http://jobs.51job.com/beijing-hdq/{res_id}.html?s=01&t=0".format(res_id=res) # print(url_detal) r6 = session.get(url_detal, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", }, ) r6.encoding = "gbk" print("============") # print("民营公司" in r6.text) # 第七步:提交简历 # 请求方式: # 请求头Referer:http://jobs.51job.com/beijing-hdq/93642433.html?s=01&t=0 # 请求体 # url_req = "rand=0.23323633700693014&jsoncallback=jsonp1516194811064&_=1516195019149&jobid=(93642433%3A0)&prd=&prp=01&cd=jobs.51job.com&cp=01&resumeid=369702041&cvlan=0&coverid=&qpostset=1&elementname=hidJobID&deliverytype=1&deliverydomain=http%3A%2F%2Fi.51job.com&language=c&imgpath=http%3A%2F%2Fimg01.51jobcdn.com%2F" # url_pro = "http://i.51job.com/delivery/delivery.php?" parm_pro = { "rand": "0.23323633700693014", "jsoncallback": "jsonp1516194811064", "_": "1516195019149", "jobid": "(93642433%3A0)", "prp": "01", "cd": "jobs.51job.com", "cp": "01", "resumeid": "369702041", "cvlan": "0", "coverid": "", "qpostset": "1", "elementname": "hidJobID", "deliverytype": "1", "deliverydomain": "http%3A%2F%2Fi.51job.com", "language": "c", "imgpath": "http%3A%2F%2Fimg01.51jobcdn.com%2F", } parm_pro_res = urlencode(parm_pro, encoding="utf-8") url_pro = "http://i.51job.com/delivery/delivery.php?jobid=({uid}%3A0)".format(uid=res) referer = "http://jobs.51job.com/beijing-cyq/{uuu}.html?s=01&t=0".format(uuu=res) r7 = session.get(url_pro, headers={ "Referer": referer, "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", }) r7.encoding = "gbk" print("%s 投递成功"%url_detal) # 第八步:确认申请 # 请求方式:get # 请求头 Referer:http://jobs.51job.com/beijing-hdq/93642433.html?s=01&t=0 # ss = "http://i.51job.com/delivery/delivery.php?rand=0.30970096098808897&jsoncallback=jsonp1516194811063&_=1516194949862&jobid=(93642433%3A0)&prd=&prp=01&cd=jobs.51job.com&cp=01&resumeid=&cvlan=&coverid=&qpostset=&elementname=hidJobID&deliverytype=1&deliverydomain=http%3A%2F%2Fi.51job.com&language=c&imgpath=http%3A%2F%2Fimg01.51jobcdn.com%2F"