zoukankan html css js c++ java

python爬虫学习(2)__抓取糗百段子，与存入mysql数据库

import pymysql
import requests
from bs4 import BeautifulSoup
#pymysql链接数据库
conn=pymysql.connect(host='127.0.1',unix_socket='/tmp/mysql.sock',user='root',passwd='19950311',db='mysql')
cur=conn.cursor()
cur.execute("USE scraping") 
#存储段子标题，内容
def store(title,content):
    cur.execute("insert into pages(title,content) values("%s","%s")",(title,content))
    cur.connection.commit()
global links
class QiuShi(object):
    def __init__(self,start_url):
        self.url=start_url
    def crawing(self):
        try:
            html=requests.get(self.url,'lxml')
            return html.content
        except  ConnectionError as e:
            return ''
    def extract(self,htmlContent):
        if len(htmlContent)>0:
            bsobj=BeautifulSoup(htmlContent,'lxml')
            #print bsobj
            jokes=bsobj.findAll('div',{'class':'article block untagged mb15'})
            for j in jokes:
                text=j.find('h2').text
                content=j.find('div',{'class':'content'}).string
                if text != None and content != None:
                     # print text,content，数据库编码为utf-8
                     store(text.encode('utf-8'),content.encode('utf-8'))
                     print text.encode('utf-8'),content.encode('utf-8')
                     print '------------------------------------------------------------------------------'
        else:
            print ''
    def main(self):
        text=self.crawing()
        self.extract(text)
try:
    qiushi=QiuShi('http://www.qiushibaike.com/')
    qiushi.main()
finally:
#关闭cursor,connection
    cur.close()
    conn.close()

查看全文

相关阅读:
wp8开发时模拟器无法联网解决方法
 软件测试技术---白盒测试
 软件测试技术---代码检查，走查与评审
 简谈WP，IOS，Android智能手机OS
软件测试技术---在软件生命周期中测试的实施
 软件测试技术---测试的基本概念
 zookeeper源码分析三LEADER与FOLLOWER同步数据流程
 zookeeper源码分析（一）工作原理
 分布式服务框架 Zookeeper -- 管理分布式环境中的数据
 构建高并发高可用的电商平台架构实践(转)

原文地址：https://www.cnblogs.com/yunwuzhan/p/5765963.html