zoukankan      html  css  js  c++  java
  • python+mysql抓取百度新闻的标题存到数据库

    #!usr/bin/python
    # -*- coding:utf-8 -*-
    import urllib2

    import re

    import MySQLdb


    class BaiDuNews:

    def __init__(self):
    self.baseurl = 'http://news.baidu.com/'

    def getPage(self):
    request = urllib2.Request(self.baseurl)
    response = urllib2.urlopen(request)
    # print response.read()
    return response.read().decode('gbk')

    def getContents(self,page):
    pattern = re.compile('<li class="hd.*?<a.*?>(.*?)</a>', re.S)
    items = re.findall(pattern, page)
    contents = []
    for item in items:
    print item
    contents.append(item.encode('utf-8'))
    return contents

    def saveDB(self, contents):
    db = MySQLdb.connect(host='127.0.0.1',user='root',passwd='',db='test',charset='utf8')
    cur = db.cursor()
    # sql = 'CREATE TABLE baidunews (`id` INT NOT NULL PRIMARY ,`text` VARCHAR(255))'
    # cur.execute(sql)
    sql2 = """INSERT INTO baidunews VALUES (NULL ,"%s")"""
    for content in contents:
    cur.execute(sql2 % (content))
    cur.close()
    db.commit()
    db.close()


    news = BaiDuNews()
    news.saveDB(news.getContents(news.getPage()))
  • 相关阅读:
    hdu1066之数学题
    hdu1065计算几何
    hdu1060
    hdu1056
    appium安装说明
    LR安装说明
    网络编程
    读写excel
    dom
    HTML
  • 原文地址:https://www.cnblogs.com/luolizhi/p/5207557.html
Copyright © 2011-2022 走看看