zoukankan      html  css  js  c++  java
  • python+mysql抓取百度新闻的标题存到数据库

    #!usr/bin/python
    # -*- coding:utf-8 -*-
    import urllib2

    import re

    import MySQLdb


    class BaiDuNews:

    def __init__(self):
    self.baseurl = 'http://news.baidu.com/'

    def getPage(self):
    request = urllib2.Request(self.baseurl)
    response = urllib2.urlopen(request)
    # print response.read()
    return response.read().decode('gbk')

    def getContents(self,page):
    pattern = re.compile('<li class="hd.*?<a.*?>(.*?)</a>', re.S)
    items = re.findall(pattern, page)
    contents = []
    for item in items:
    print item
    contents.append(item.encode('utf-8'))
    return contents

    def saveDB(self, contents):
    db = MySQLdb.connect(host='127.0.0.1',user='root',passwd='',db='test',charset='utf8')
    cur = db.cursor()
    # sql = 'CREATE TABLE baidunews (`id` INT NOT NULL PRIMARY ,`text` VARCHAR(255))'
    # cur.execute(sql)
    sql2 = """INSERT INTO baidunews VALUES (NULL ,"%s")"""
    for content in contents:
    cur.execute(sql2 % (content))
    cur.close()
    db.commit()
    db.close()


    news = BaiDuNews()
    news.saveDB(news.getContents(news.getPage()))
  • 相关阅读:
    [并发编程] 进程、线程
    100. 相同的树
    Python 问题集
    this关键字在函数中的应用
    去除列表右边框
    JS——作用域
    javascript——值传递!!
    null和undefined的区别?
    浏览器内核——四大主流
    http常用状态码
  • 原文地址:https://www.cnblogs.com/luolizhi/p/5207557.html
Copyright © 2011-2022 走看看