shenzhensgxk - 走看看

zoukankan html css js c++ java

shenzhensgxk

# -*- coding: utf-8 -*-
import scrapy
import re

class SpiderShenzhenSpider(scrapy.Spider):
name = 'spider_shenzhen'

start_urls = ['http://www.szjs.gov.cn/bsfw/jggs/sgxk/']
# urls = 'http://portal.szjs.gov.cn:8888/gongshi/sgxkList.html?page=1&qymc=&ann_serial=&pro_name='
# urls = 'http://portal.szjs.gov.cn:8888/gongshi/sgxkList.html?page=1369&qymc=&ann_serial=&pro_name='
def start_requests(self):
for i in range(400,600):
urls = 'http://portal.szjs.gov.cn:8888/gongshi/sgxkList.html?page={}&qymc=&ann_serial=&pro_name='.format(str(i))
yield scrapy.Request(url=urls,callback=self.get_parse,priority=1)
def get_parse(self, response):
r = response.xpath('//tr/td/a/@onclick').extract()
for rs in r:
r = re.match("serachbyId('(.*?)','(.*?)')",rs)
result1 = r.group(1)
result2 = r.group(2)
# print(result1,result2)
content_url = 'http://portal.szjs.gov.cn:8888/gongshi/sgxkz.html?instanceGuid={}&yxtywlsh={}'.format(result1,result2)
# print(content_url)
test_url = 'http://portal.szjs.gov.cn:8888/gongshi/sgxkz.html?instanceGuid=4403062018002301&yxtywlsh=2018-0214'
yield scrapy.Request(url=content_url,callback=self.get_one,priority=4)
def get_one(self,response):
result = response.xpath('//tr/td/text()').extract()
print(result)
# result_list = '_'.join(result)
# re.findall()
print(len(result))
with open('test400-600.xlsx','a+',encoding='utf-8') as f:
f.write(response.url+' ')
i = 1
while i<len(result):
# print(result[i-1].replace('xa0',''),result[i].replace('xa0',''))
f.write(result[i].replace('xa0','').replace(' ','').replace(' ','').replace(' ','')+' ')
i += 2
f.write(' ')

查看全文

相关阅读:
C#中的字符串处理
 c#复习
 git 取消对某个文件的跟踪
 react 脚手架立即可以写业务 react + react-router-dom + less + axios + antd
vue 脚手架立即可以写业务 vue + vue-router + less + axios + elementUI + moment
mac 在命令行用webstorm打开文件
 React create-react-app Build fails after eject: Cannot find module '@babel/plugin-transform-react-jsx'
微信小程序 canvas 文字自动换行
 微信小程序 canvas 文字居中
 微信小程序 canvas 绘制圆形状

原文地址：https://www.cnblogs.com/currynashinians000/p/9014851.html