zoukankan      html  css  js  c++  java
  • 爬取58同城二手房数据存储到redis数据库和mysql数据库

    主程序代码:

     1 import scrapy
     2 from  scrapyDemo.items  import  ScrapydemoItem
     3 
     4 class PostSpider(scrapy.Spider):
     5     name = 'home'
     6     # allowed_domains = ['www.baidu.com']
     7     start_urls = ['https://bj.58.com/shahe/ershoufang/e4/?PGTID=0d30000c-0142-1050-f5c4-dad0a3db3138&ClickID=1']
     8 
     9     def parse(self, response):
    10         li_list=response.xpath('//ul[@class="house-list-wrap"]/li')
    11         for li in li_list:
    12             title=li.xpath('./div[2]/h2/a/text()').extract_first()
    13             price=li.xpath('./div[3]/p/b/text()').extract_first()
    14 
    15             item=ScrapydemoItem()
    16             item["title"]=title
    17             item["price"]=price
    18 
    19             yield item
    爬虫代码

    管道代码:

     1 import  pymysql
     2 from redis import  Redis
     3 
     4 #存储到mysql数据库
     5 class Scrapydemotomysql(object):
     6     #连接
     7     conn=None
     8     #游标
     9     cursor=None
    10 
    11     def open_spider(self,spider):
    12         self.conn = pymysql.Connect(host="127.0.0.1", port=3306, user="root", password="root", db="spider")
    13 
    14     def process_item(self, item, spider):
    15         title=item["title"]
    16         price=item["price"]
    17         sql='insert into house values ("%s","%s")'%(title,price)
    18         self.cursor=self.conn.cursor()
    19         try:
    20             self.cursor.execute(sql)
    21             self.conn.commit()
    22         except Exception as e:
    23             print(str(e))
    24             self.conn.rollback()
    25         return  item
    26     def close_spider(self,spider):
    27         self.cursor.close()
    28         self.conn.close()
    29 
    30 #存储到redis数据库
    31 class Scrapydemotoredis(object):
    32     #连接
    33     conn=None
    34     def open_spider(self,spider):
    35         self.conn = Redis(host='127.0.0.1',port=6379)
    36 
    37     def process_item(self, item, spider):
    38         title=item["title"]
    39         price=item["price"]
    40         dict={
    41             'title':title,
    42             'price':price
    43         }
    44         dict=str(dict)
    45         self.conn.lpush('home',dict)
    46         return  item
    pipelines
  • 相关阅读:
    堆优化Dijkstra模版
    poj_1364King
    快速排序库函数qsort的使用
    CMD type命令
    开放地址法
    poj_3159Candies
    poj_1511Invitation Cards
    何谓数据结构
    div ul li添加文本自动自动
    java虚拟机使用内存
  • 原文地址:https://www.cnblogs.com/duanhaoxin/p/10138893.html
Copyright © 2011-2022 走看看