zoukankan      html  css  js  c++  java
  • 我爱我家数据爬取

     1 import requests
     2 from lxml import etree
     3 import mysqlhelper
     4 
     5 base_url = 'https://bj.5i5j.com/zufang/huilongguan/n%s/'
     6 myhelper = mysqlhelper.MysqlHelper()
     7 sql = 'insert into woaiwojiaxinxi(title, space, address, follow, price) values(%s,%s,%s,%s,%s)'
     8 
     9 for i in range(1, 4):
    10     url = base_url % i
    11     response = requests.get(url)
    12     html_ele = etree.HTML(response.text)
    13 
    14     li_list = html_ele.xpath('//ul[@class="pList"]/li')
    15     for li_ele in li_list:
    16         title = li_ele.xpath('./div[2]/h3/a')[0].text
    17         print(title)
    18 
    19 
    20         space = li_ele.xpath('./div[2]/div[1]/p[1]/text()')[0]
    21         print(space)
    22         address = li_ele.xpath('./div[2]/div[1]/p[2]/a/text()')[0] + li_ele.xpath('./div[2]/div[1]/p[2]/text()')[0]
    23         print(address)
    24         follow = li_ele.xpath('./div[2]/div[1]/p[3]/text()')[0]
    25         print(follow)
    26         price = li_ele.xpath('./div[2]/div/div/p/strong')[0].text + " 元/月"
    27         print(price)
    28 
    29         data = (title, space, address, follow, price)
    30         myhelper.execute_modify_sql(sql, data)
  • 相关阅读:
    枚举enum
    C# 位运算符
    运算符&和&&以及|和||区别比较
    LINQ TO JSON
    LINQ 随机排序
    .NET Core LinQ
    CSharp笔记>>>多线程
    3D旋转
    CSharp 之CSkin的使用教程
    CSharp笔记>>>多语言,注册,模态对话框返回值
  • 原文地址:https://www.cnblogs.com/daihao9527/p/9503166.html
Copyright © 2011-2022 走看看