1 import requests 2 from lxml import etree 3 import mysqlhelper 4 5 myhelper = mysqlhelper.MysqlHelper() 6 sql = 'INSERT INTO tenxunzhaoping (title, duty, people_num, address,addtime) VALUES(%s, %s, %s, %s,%s)' 7 8 headers = { 9 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 10 "Accept-Encoding": "gzip, deflate, br", 11 "Accept-Language": "zh-CN,zh;q=0.9", 12 "Cache-Control": "no-cache", 13 "Connection": "keep-alive", 14 "Cookie": "pgv_pvi=5854498816; _ga=GA1.2.608623393.1534496276; pt2gguin=o1900227304; PHPSESSID=0smi013v1lr7r3ki2aqtacp493; pgv_si=s8414673920", 15 "Host": "hr.tencent.com", 16 "Pragma": "no-cache", 17 "Referer": "https://hr.tencent.com/position.php?&start=10", 18 "Upgrade-Insecure-Requests": "1", 19 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 20 21 } 22 # url = 'https://hr.tencent.com/position.php?keywords=&lid=2156&start=0#a' 23 base_url = 'https://hr.tencent.com/position.php?keywords=&tid=0&lid=2156&start=%s#a' 24 25 for i in range(0,300,10): 26 url = base_url % i 27 response = requests.get(url, headers=headers) 28 html_ele = response.text 29 # print(html_ele) 30 ver = etree.HTML(html_ele) 31 # print(ver) 32 for v in range(2,12): 33 li_list = ver.xpath('//div[@id="position"]/div/table/tr[{}]'.format(v)) 34 # print(li_list) 35 for li_ele in li_list: 36 # if li_ele == li_ele.xpath('./tr[1]')[0].text: 37 # continue 38 title = li_ele.xpath('./td/a')[0].text 39 print(title) 40 duty = li_ele.xpath('./td[2]')[0].text 41 print(duty) 42 people_num = li_ele.xpath('./td[3]')[0].text 43 print(people_num) 44 address = li_ele.xpath('./td[4]')[0].text 45 print(address) 46 addtime = li_ele.xpath('./td[5]')[0].text 47 print(addtime) 48 49 data = (title, duty, people_num, address,addtime) 50 myhelper.execute_modify_sql(sql, data)