zoukankan      html  css  js  c++  java
  • scrapy上海买房指南

    Spider:

    # -*- coding: utf-8 -*-
    import scrapy
    from scrapy_zhaopin.items import ScrapyHouseItem
    from scrapy.http import Request
    
    
    class MySpider(scrapy.Spider):
        name = "spiderhouse"
        allowed_domains = ["sh.lianjia.com"]
        start_urls = ["https://sh.lianjia.com/ershoufang/rs徐泾北城/"]
    
        def parse(self, response):
    
            for line in response.xpath('//*[contains(@log-mod,"list")]//li[contains(@class,"clear")]'):
                item = ScrapyHouseItem()
    
                item['title'] = line.xpath('//title/text()').extract()[0].split("_")[0].replace("二手房房源", "")
                item['name'] = line.xpath('.//*[@class="title"]/a/text()').extract()
                item['address'] = line.xpath('.//*[@class="positionInfo"]/a/text()').extract()
                item['house_info'] = line.xpath('.//*[@class="houseInfo"]/text()').extract()
                item['price'] = line.xpath('.//*[@class="totalPrice"]//span/text()').extract()
                item['unit_price'] = line.xpath('.//*[@class="unitPrice"]//span/text()').extract()[0].replace("单价", "").replace("元/平米", "")
                yield item
    
            address_list = ["徐盈路", "徐泾镇", "华新镇", "嘉定北", "中山公园", "汇金路", "青浦新城", "爱博家园", "九亭",
                            "佘山", "泗泾", "洞泾", "赵巷"]
    
            for i in address_list:
                address_url = f'https://sh.lianjia.com/ershoufang/rs{i}/'
                yield Request(address_url, callback=self.parse)
    
            # if self.page < response.xpath('(//*[@class="pager-num"]//*[@class="num-iten"])[last()]/text()'):
            #     self.page += 1
            #     page_url = self.page_url % self.page
            #     yield Request(page_url, callback=self.parse)
  • 相关阅读:
    悟透JavaScript(理解JS面向对象的好文章)
    ClassLoader的等级加载机制
    ClassLoader的类结构分析
    如何实现自己的ClassLoader
    Servlet的ClassLoader
    idea远程调试linux下的tomcat
    centos VM 识别U盘
    linux yum 命令
    centos 基本操作(快捷键开户终端,复制,粘贴,yum命令)
    CentOS 配置Apache+Mysql+PHP (yum)与卸载
  • 原文地址:https://www.cnblogs.com/nieliangcai/p/13322671.html
Copyright © 2011-2022 走看看