# -*- coding: utf-8 -*-
#这只是爬虫文件内容,使用pycharm运行,在terminal中使用命令行,要用爬虫名字
import scrapy
from insist.items import InsistItem
class InsistsSpider(scrapy.Spider):
name = 'insists'
allowed_domains = ['itcast.cn']
start_urls = ['http://www.itcast.cn/channel/teacher.shtml']
def parse(self, response):
node_list=response.xpath("//div[@class='li_txt']")
items=[]
for node in node_list:
#创建item字段对象,用来存储信息
item=InsistItem()#items里面的类
name=node.xpath("./h3/text()").extract()#extract()将xpath对象转化为Unicode字符串
title=node.xpath("./h4/text()").extract()
info=node.xpath("./p/text()").extract()
item['name']=name[0]
item['title']=title[0]
item['info']=info[0]
items.append(item)
return items
#pass