zoukankan      html  css  js  c++  java
  • scrapy crawl itcast -o teachers.json 爬虫案列

    1. spider.py文件配置
        1 
        2 # -*- coding: utf-8 -*-
        3 import scrapy
        4 from itTeachers.items import ItteachersItem
        5 
        6 
        7 class ItcastSpider(scrapy.Spider):
        8     name = 'itcast'
        9     allowed_domains = ['itcast.cn']
       10     start_urls = ['http://www.itcast.cn/channel/teacher.shtml#']
       11 
       12     def parse(self, response):
       13         #with open("teacher.html","w") as f:
       14             #f.write(response.body)
       15 
       16         items = []
       17 
       18         teacher_list = response.xpath('//div[@class="li_txt"]')
       19         for each in teacher_list:
       20 
       21             #我们将得到的数据封装到一个'ItcastItem'对象
       22             item = ItteachersItem()
       23             name = each.xpath('h3/text()').extract()
       24             title = each.xpath('h4/text()').extract()
       25             info = each.xpath('p/text()').extract()
       26 
       27             #xpath返回的是包含一个元素的列表
       28             item['name'] = name[0]
       29             item['title'] = title[0]
       30             item['info'] = info[0]
       31 
       32             items.append(item)
       33         #直接返回最后数据
       34         return items
      ~                         
    2. items.py文件配置
        1 # -*- coding: utf-8 -*-
        2 
        3 # Define here the models for your scraped items
        4 #
        5 # See documentation in:
        6 # https://doc.scrapy.org/en/latest/topics/items.html
        7 
        8 import scrapy
        9 
       10 
       11 class ItteachersItem(scrapy.Item):
       12     # define the fields for your item here like:
       13     # name = scrapy.Field()
       14     name = scrapy.Field()
       15     title = scrapy.Field()
       16     info = scrapy.Field()

  • 相关阅读:
    spring 装配核心笔记
    小明种苹果
    线性分类器
    报数
    编程计划2.0 //不断更新
    MySQL基础之存储过程
    MySQL基础之视图
    指令系统.传送类指令
    MySQL基础之索引
    寻址方式总结
  • 原文地址:https://www.cnblogs.com/hizf/p/8270008.html
Copyright © 2011-2022 走看看