zoukankan html css js c++ java

scrapy基本爬虫，采集多页

# -*- coding: utf-8 -*-
import csv

import scrapy


class GjSpider(scrapy.Spider):
    name = 'gj'
    allowed_domains = ['ganji.com']
    start_urls = ['http://sz.ganji.com/zufang/']



    def parse(self, response):
        houseList = response.xpath('.//div[@class="f-main-list"]/div/div[position()>2]')
        for houst in houseList:
            title = houst.xpath(".//dl/dd[contains(@class,'title')]/a/@title").extract_first()
            size = houst.xpath(".//dl/dd[contains(@class,'size')]/span[3]/text()").extract_first()
            chaoxiang = houst.xpath(".//dl/dd[contains(@class,'size')]/span[5]/text()").extract_first()
            price = houst.xpath(".//dl/dd[contains(@class,'info')]/div/span[1]/text()").extract_first()
            address1 = houst.xpath(".//dl/dd[contains(@class,'address')]/span/a[1]/text()").extract_first()
            address2 = houst.xpath(".//dl/dd[contains(@class,'address')]/span/a[2]/span/text()").extract_first()

            item = {'title':title,"size":size,"chaoxiang":chaoxiang, "price":price,"address": str(address1)+"-"+str(address2)}
            yield item

        next_links = response.xpath('.//div[@class="pageBox"]//a[contains(@class,"next")]/@href').extract()
        if(len(next_links) > 0) :
            next_link = next_links[0]
            print(next_link)
            yield scrapy.Request(next_link,self.parse)

查看全文

相关阅读:
善用性能工具进行SQL整体优化
 mysql use index （）优化查询的例子
 mysql优化 explain index
mysql中explain用法和结果的含义
 MySQL运行状态show status中文详解
 Mysql运行状态查询命令及调优详解
 数据库工具——Navicat Premium使用技巧
 细说mysql索引
 对国家失望：汉末儒生集体沉默（儒家主张积极入世，以经国济世为己任的）
韦尔股份：打造国际半导体设计行业领先企业（各种企业问题的问答）

原文地址：https://www.cnblogs.com/brady-wang/p/12505324.html