源码:
1 import requests 2 from lxml import etree 3 from my_mysql import MysqlConnect 4 5 6 mc = MysqlConnect('127.0.0.1','root','123456','homework') 7 sql = 'insert into lianjia(title,addr,shape,area,dire,price) values(%s,%s,%s,%s,%s,%s)' 8 for page in range(3): 9 url = 'https://bj.lianjia.com/zufang/pg{}rp2rp1/'.format(page) 10 response = requests.get(url) 11 html = etree.HTML(response.text) 12 li_list = html.xpath('//ul[@id="house-lst"]/li') 13 # print(li_list) 14 for li_ele in li_list: 15 title = li_ele.xpath('./div[2]/h2/a')[0].text 16 addr = li_ele.xpath('./div[2]/div[1]/div[1]/a/span')[0].text 17 shape = li_ele.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text 18 area = li_ele.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text 19 dire = li_ele.xpath('./div[2]/div[1]/div[1]/span[3]')[0].text 20 price = li_ele.xpath('./div[2]/div[2]/div[1]/span')[0].text 21 # print(title,addr,shape,area,price) 22 data = (title,addr,shape,area,dire,price) 23 print(data) 24 mc.exec_data(sql,data) 25 # break