zoukankan      html  css  js  c++  java
  • scrapy管道MySQL简记

    import pymysql
    from scrapy.exceptions import DropItem
    import time

    class ErshouchePipeline(object):
    def __init__(self):
    self.conn = pymysql.connect(
    host = '127.0.0.1',
    port = 3306,
    user = 'root',
    passwd = 'mlpythonlmoi',
    db = 'ershouche',
    charset = 'utf8'
    )
    self.cusor = self.conn.cursor(cursor=pymysql.cursors.DictCursor)
    sql1 = "select 路由网址 from 二手车之家"
    result = self.cusor.execute(sql1)#读取已经爬取的数据url
    # print(result)
    temp = self.cusor.fetchall()#返回查询到的所有记录
    print('返回查询得到的记录:',temp)
    self.url_list = []
    for i in temp:
    self.url_list.append(i['路由网址'])
    print('存在的:',self.url_list)



    def process_item(self, item, spider):
    if item['car_url'] not in self.url_list:
    sql = "insert into 二手车之家 values(Null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    lst = (item['city'],item['trademark'],item['model'],item['colour'],item['price'],item['purpose'],
    item['vehicle_condition'],item['drive_mode'],item['Truck_kilometer'],item['car_license'],
    item['Stop_displacemen'],item['year_jian_due'],item['insurance_policy_matures'],item['assurance_due'],
    item['emission_standard'],item['guohu_number'],item['maintenance'],item['car_url'])
    self.cusor.execute(sql,lst)
    self.conn.commit()
    else:
    raise DropItem('该item数据库中已经存在!')
    return item

    def close_spider(self, spider):
    self.cusor.close()
    self.conn.close()#关闭连接
    print("操作结束!")
    print('结束时间:' + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
  • 相关阅读:
    @resource、@Autowired、@Service在一个接口多个实现类中的应用
    java 中 静态块的作用
    数组、List和ArrayList的区别
    C#优化
    Net XML操作
    Asp.Net MVC+EF+三层架构的完整搭建过程
    C#多线程
    算法
    详解java中的byte类型
    虚幻4蓝图编译剖析(三)
  • 原文地址:https://www.cnblogs.com/Army-Knife/p/10610831.html
Copyright © 2011-2022 走看看