zoukankan      html  css  js  c++  java
  • scrapy爬取中关村在线手机频道

     1 # -*- coding: utf-8 -*-
     2 import scrapy
     3 from pyquery import PyQuery as pq
     4 
     5 from zolphone.items import ZolphoneItem
     6 
     7 
     8 class PhoneSpider(scrapy.Spider):
     9     name = "phone"
    10     # allowed_domains = ["www.zol.com.cn"]
    11     # start_url = 'http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_1.html'
    12     start_url = 'http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_'
    13 
    14     def start_requests(self):
    15 
    16         for page in range(1, 209):
    17             url = self.start_url + str(page) + '.html'
    18             yield scrapy.Request(url,callback=self.parse_index)
    19 
    20 
    21     def parse_index(self, response):
    22         base_url = 'http://detail.zol.com.cn'
    23         doc = pq(response.text)
    24         lis = doc('.list-box .list-item').items()
    25         for result in lis:
    26             detail_url = base_url + result.find('.pro-intro h3 a').attr('href')
    27             yield scrapy.Request(url=detail_url, callback=self.parse_detail)
    28 
    29     def parse_detail(self,response):
    30         doc = pq(response.text)
    31         title1 = response.css('.page-title h1::text').extract_first()
    32         title2 = doc('.page-title h2').text()
    33         price = doc('.product-price .price-type').text()
    34         release_time = doc('.section div h3 .showdate').text()
    35         print(title1, title2, price, release_time)
    36         item = ZolphoneItem()
    37         item['title1'] = title1
    38         item['title2'] = title2
    39         item['price'] = price
    40         item['release_time'] = release_time
    41 
    42         yield item
     1 import scrapy
     2 
     3 
     4 class ZolphoneItem(scrapy.Item):
     5     # define the fields for your item here like:
     6     # name = scrapy.Field()
     7     title1 = scrapy.Field()
     8     title2 = scrapy.Field()
     9     price = scrapy.Field()
    10     release_time = scrapy.Field()
  • 相关阅读:
    USACOZero Sum
    USACOControlling Companies
    USACOParty Lamps
    USACOMoney Systems
    UVa11292
    USACOLongest Prefix
    USACOThe Tamworth Two
    USACORunaround Numbers
    业内常见电子病历编辑器简单比较(1)编辑控件来源比较
    GB(国标)字典大全
  • 原文地址:https://www.cnblogs.com/themost/p/7072431.html
Copyright © 2011-2022 走看看