一、数据库读书笔记
SQLite是一种嵌入式数据库,它的数据库就是一个文件。由于SQLite本身是C写的,而且体积很小,所以,经常被集成到各种应用程序中,iOS和Android的App中都可以集成。
注:Python就内置了SQLite3,所以,在Python中使用SQLite,不需要安装任何东西,直接使用,需要导入sqlite3库。
1.数据库的连接
import sqlite3 #导入模块 conn = sqlite3.connect('example.db') #连接数据库 # 连接到SQLite数据库 # 数据库文件是example.db # 如果文件不存在,会自动在当前目录创建: |
2.创建游标对象和表
成功创建Connection对象以后,再创建一个Cursor对象,并且调用Cursor对象的execute()方法来执行SQL语句创建数据表以及查询、插入、修改或删除数据库中的数据:
c = conn.cursor()
c.execute('''CREATE TABLE stocks (date text, trans text, symbol text, qty real, price real)''') # 创建表 c.execute("INSERT INTO stocks VALUES ('2006-01-05','BUY', 'RHAT', 100, 35.14)") # 插入一条记录
conn.commit()# 提交当前事务,保存数据
conn.close()# 关闭数据库连接
|
3.查询表中内容
select操作
下面的 Python 程序显示了如何从前面创建的 COMPANY 表中获取并显示记录:
#!/usr/bin/python
import sqlite3
conn = sqlite3.connect('test.db')
c = conn.cursor()
print "Opened database successfully";
cursor = c.execute("SELECT id, name, address, salary from COMPANY")
for row in cursor:
print "ID = ", row[0]
print "NAME = ", row[1]
print "ADDRESS = ", row[2]
print "SALARY = ", row[3], " "
print "Operation done successfully";
conn.close()
上述程序执行时,它会产生以下结果:
Opened database successfully
ID = 1
NAME = Paul
ADDRESS = California
SALARY = 20000.0
ID = 2
NAME = Allen
ADDRESS = Texas
SALARY = 15000.0
ID = 3
NAME = Teddy
ADDRESS = Norway
SALARY = 20000.0
ID = 4
NAME = Mark
ADDRESS = Rich-Mond
SALARY = 65000.0
Operation done successfully
二、爬取中国大学网
基本代码
import requests from bs4 import BeautifulSoup import bs4
def getHTMLText(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding='utf-8' return r.text except: return ""
def fillUnivList(allUniv,soup): data=soup.find('tbody').children for tr in data: if isinstance(tr,bs4.element.Tag): ltd=tr('td') allUniv.append([ltd[0].string,ltd[1].string,ltd[2].string,ltd[3].string,ltd[4].string])
def printUnivList(allUniv,num): print("{1:^2} {2:{0}^10} {3:{0}^6} {4:{0}^4} {5:{0}^10}".format(chr(12288),"排名","学校名称","省份","总分","生涯质量")) for i in range(num): u=allUniv[i] print("{1:^4} {2:{0}^10} {3:{0}^5} {4:{0}^8.1f} {5:{0}^10}".format(chr(12288),u[0],u[1],u[2],eval(u[3]),u[4]))
def main(num): allo=[] url='http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html' html=getHTMLText(url) soup=BeautifulSoup(html,"html.parser") fillUnivList(allo,soup) printUnivList(allo,num) main(10) |
结果如下图
保存为csv文件
import requests from bs4 import BeautifulSoup import bs4 import pandas as pd def getHTMLText(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding='utf-8' return r.text except: return ""
def fillUnivList(allUniv,soup): data=soup.find('tbody').children for tr in data: if isinstance(tr,bs4.element.Tag): ltd=tr('td') allUniv.append([ltd[0].string,ltd[1].string,ltd[2].string,ltd[3].string,ltd[4].string])
def printUnivList(allUniv,num): print("{1:^2} {2:{0}^10} {3:{0}^6} {4:{0}^4} {5:{0}^10}".format(chr(12288),"排名","学校名称","省份","总分","生涯质量")) one=[] for i in range(num): u=allUniv[i] one.append(u) print("{1:^4} {2:{0}^10} {3:{0}^5} {4:{0}^8.1f} {5:{0}^10}".format(chr(12288),u[0],u[1],u[2],eval(u[3]),u[4])) return one
def save(u): #存为csv文件 name=["排名","学校名称","省份","总分","生涯质量"] li=[u[0],u[1],u[2],u[3],u[4],u[5],u[6],u[7],u[8],u[9]] test=pd.DataFrame(columns=name,data=li) test.to_csv('D:/pan.csv',encoding="gbk")
def main(num): allo=[] url='http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html' html=getHTMLText(url) soup=BeautifulSoup(html,"html.parser") fillUnivList(allo,soup) k=printUnivList(allo,num) save(k) main(10) |
结果:
三、将csv文件写入数据库
代码
import sqlite3 import openpyxl lists=sqlite3.connect('panq.db') c=lists.cursor() c.execute('''CREATE TABLE q4an("序号","排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研合作","成果转化","学生国际化" )''') listinsheet=openpyxl.load_workbook(r'D:/panqo.xlsx') datainlist=listinsheet.active #获取excel文件当前表格 data_truck=('''INSERT INTO q4an("序号","排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研合作","成果转化","学生国际化") VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''') for row in datainlist.iter_rows(min_row=2,max_col=15,max_row=datainlist.max_row): #使excel各行数据成为迭代器 cargo=[cell.value for cell in row] #敲黑板!!使每行中单元格成为迭代器 c.execute(data_truck,cargo) #敲黑板!写入一行数据到数据库中表rankf for row in c.execute('SELECT * FROM q4an ORDER BY "序号"'): print(row)
lists.commit() lists.close() |
部分结果:
四、查询某学校排名
import sqlite3 import openpyxl lists=sqlite3.connect('panq.db') c=lists.cursor() c.execute('''CREATE TABLE q4bn("序号","排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研合作","成果转化","学生国际化" )''') listinsheet=openpyxl.load_workbook(r'D:/panqo.xlsx') datainlist=listinsheet.active #获取excel文件当前表格 data_truck=('''INSERT INTO q4bn("序号","排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研合作","成果转化","学生国际化") VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''') for row in datainlist.iter_rows(min_row=2,max_col=15,max_row=datainlist.max_row): #使excel各行数据成为迭代器 cargo=[cell.value for cell in row] #敲黑板!!使每行中单元格成为迭代器 c.execute(data_truck,cargo) #敲黑板!写入一行数据到数据库中表rankh c.execute('SELECT * FROM q4bn WHERE "学校名称"="华南理工大学"')#由于我们学校没有上榜就查询了广州大学的排名 r = c.fetchall() print(r) lists.commit() lists.close() |
结果