zoukankan      html  css  js  c++  java
  • 买房指南,链家最近房源

    # coding=utf-8

    """
    Author: nieliangcai
    version: 0.1

    date: 2019/7/29 11:30
    """

    import requests_html
    import xlwt
    import time
    import openpyxl
    from pprint import pprint


    now_time = time.strftime("%Y%m%d%H%M")
    session = requests_html.HTMLSession()

    House_List = ["徐泾北城", "泗泾", "佘山", "九亭", "宝龙广场", "洞泾", "蟠龙路", "宝山"]


    def write_house_data(table_info, title="地区房价%s.xlsx" % now_time):
    """
    :param table_info: 所有内容
    :param title: filename
    :return:
    """
    # print(table_info)
    # 创建一个空的Workbook,并且删除默认的Sheetname
    file = openpyxl.Workbook()
    file.remove(file["Sheet"])
    # file = xlwt.Workbook()

    for i in range(len(table_info)):
    """sheet_name和单元格内容"""
    address = table_info[i][0]
    Values = table_info[i][1]

    # 使用抓到的数据做sheet_name
    table = file.create_sheet(address)
    # table = file.add_sheet(address)
    title_list = ['Title', 'house_info', 'height', 'total_price', 'unit_price']
    # 写数据从1开始
    for i in range(5):
    table.cell(1, i+1, title_list[i])

    # print(Values)
    for i in range(len(Values)):
    for j in range(len(Values[i])):
    # print(Values[i][j])
    table.cell(i + 2, j+1, Values[i][j])
    file.save(title)


    all_house = []
    for house in House_List:
    URL = "https://sh.lianjia.com/ershoufang/rs%s/" % house
    res = session.get(URL)
    title_all = res.html.find(".title>a") # 获取标题信息
    house_info_all = res.html.find(".houseInfo") # 房源信息
    position_Info_all = res.html.find(".positionInfo") # 位置
    totalPrice = res.html.find(".totalPrice") # 总价
    unitPrice = res.html.find(".unitPrice") # 单价

    list_house = []
    for i in range(len(title_all)):
    list_house.append([title_all[i].text, house_info_all[i].text, position_Info_all[i].text, totalPrice[i].text,
    unitPrice[i].text])
    all_house.append(list_house)

    zipped = list(zip(House_List, all_house))
    pprint(zipped)
    write_house_data(zipped)
  • 相关阅读:
    Shell中调用java时的参数
    简析echo命令在Linux系统中的使用
    设置Linux环境变量的三种方法
    nohup 后台运行,以及重定向标准输出和标准错误 &/dev/null 文件
    &命令
    linux下卸载gij的java
    在Linux下运行可执行Jar包
    jar参数运行应用时classpath的设置方法
    shell获取当前进程pid和上一个进程pid
    检查文件,如果文件不存在则创建
  • 原文地址:https://www.cnblogs.com/nieliangcai/p/11263581.html
Copyright © 2011-2022 走看看