zoukankan      html  css  js  c++  java
  • 世界500强

    import psycopg2
    import json
    from news_project.config.sql_log import log
    from news_project.middlewares import Deal_Content

    class NewsProjectPipeline(object):
    def open_spider(self, spider):
    l = self.l = log()
    self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)


    def process_item(self, item, spider):
    l = self.l = log()
    self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)
    self.cur = self.conn.cursor()

    item = dict(item)
    d = Deal_Content()
    item['time'] = d.handleTime(item['time'],item['title_url']) #修改时间格式
    print("item*************************************///////////////////////", item['time'])

    for i in item.keys():
    if item[i] == "" or item[i] == None:
    item[i] = None

    if item['type_cn'] == None:
    item['type_cn'] = "行业新闻"

    if item['type_no'] == None:
    item['type_no'] = 16

    if item['content'] == None:
    return 0
    #两种存储状态。
    if not item.get('association_id'):
    self.cur.execute(
    "INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
    (item['type_cn'], item['news'], item['id'], item['pid'], item['time'], item['title'], item['title_url'],
    item['content'], item['tags'], item['type_no']))
    elif item.get('association_id'):
    self.cur.execute(
    "INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no,association_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(item['type_cn'],item['news'],item['id'],item['pid'],item['time'],item['title'],item['title_url'],item['content'],item['tags'],item['type_no'],item['association_id']))
    else:
    pass
    #提交
    self.conn.commit()
    self.cur.close()
    self.conn.close()

    return item


    def close_spider(self, spider):

    self.conn.close()

  • 相关阅读:
    linux线程
    linux线程
    c++之堆、栈、数据段、
    fork()、僵死进程和孤儿进程
    linux之管理mysql
    linux之管理apache
    Django 时间与时区设置问题
    Django rest framework:__str__ returned non-string (type NoneType) 真正原因
    Django获取当前页面的URL——小记
    Django中出现:TemplateDoesNotExist at
  • 原文地址:https://www.cnblogs.com/yuanjia8888/p/10233834.html
Copyright © 2011-2022 走看看