聚合
多数据文档进行整理统计
db.collectionName.aggregate()
功能:聚合函数,配合聚合条件进行数据整理统计
参数:聚合条件
聚合操作符
$group 分组 :和分组操作符配合使用,确定按什么分组
分组操作符
$sum 求和:{$sum:1}表示统计每组个数(即gender中每统计一个,sum加1次)
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$sum:1}}})
---------聚合----------分组---------按gender分组-----统计结果
{ "_id" : null, "total_num" : 4 }
{ "_id" : "men", "total_num" : 2 }
{ "_id" : "women", "total_num" : 4 }
$sum 求和:{$sum:‘$Age'’}表示统计每组个数(即gender中每统计一个,Age加1次)
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$sum:'$Age'}}})
{ "_id" : null, "total_num" : 80 }
{ "_id" : "men", "total_num" : 85 }
{ "_id" : "women", "total_num" : 125 }
>
$avg 求平均数
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$avg:'$Age'}}})
{ "_id" : null, "total_num" : 40 }
{ "_id" : "men", "total_num" : 42.5 }
{ "_id" : "women", "total_num" : 31.25 }
>
$min 求最小值
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$min:'$Age'}}})
{ "_id" : null, "total_num" : 25 }
{ "_id" : "men", "total_num" : 30 }
{ "_id" : "women", "total_num" : 24 }
>
$max求最大值
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$max:'$Age'}}})
{ "_id" : null, "total_num" : 55 }
{ "_id" : "men", "total_num" : 55 }
{ "_id" : "women", "total_num" : 45 }
>
$first返回每组第一个文档指定域值
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$first:'$name'}}})
{ "_id" : null, "total_num" : "永强" }
{ "_id" : "men", "total_num" : "广坤" }
{ "_id" : "women", "total_num" : "刘英" }
>
$last返回每组第一个文档指定域值
> db.class1.aggregate({$group:{_id:'$gender',total_num:{$last:'$name'}}})
{ "_id" : null, "total_num" : "王天来" }
{ "_id" : "men", "total_num" : "玉田" }
{ "_id" : "women", "total_num" : "小刘英" }
>
$project
用于修饰文档的显示结构
> db.class1.aggregate({$project:{_id:0,name:1}})
{ "name" : "刘英" }
{ "name" : "广坤" }
{ "name" : "谢大脚" }
{ "name" : "小梦" }
{ "name" : "永强" }
{ "name" : "玉田" }
{ "name" : "小刘英" }
{ "name" : "刘能" }
{ "name" : "大老乃" }
{ "name" : "王天来" }
>
可以修改显示的域的名字,但实际并未修改
> db.class1.aggregate({$project:{_id:0,NAME:'$name'}})
{ "NAME" : "刘英" }
{ "NAME" : "广坤" }
{ "NAME" : "谢大脚" }
{ "NAME" : "小梦" }
{ "NAME" : "永强" }
{ "NAME" : "玉田" }
{ "NAME" : "小刘英" }
{ "NAME" : "刘能" }
{ "NAME" : "大老乃" }
{ "NAME" : "王天来" }
$match 过滤数据
操作符的值,同find的query
> db.class1.aggregate({$match:{name:{$gt:'玉田'}}})
{ "_id" : ObjectId("5d315c6f9a271d2e4db00377"), "name" : "王天来", "faimly" : { "father" : "刘能", "mother" : "谢大脚" } }
{ "_id" : ObjectId("5d2fe2be6c7d5889fff5fb31"), "name" : "谢大脚", "hobby" : [ "跳舞", "睡觉", "喝酒", "唱歌" ], "Age" : 45, "location" : "象牙山", "gender" : "women" }
$skip 跳过前几条文档
> db.class1.aggregate({$skip:6})
{ "_id" : ObjectId("5d301d939646724b48d62ae5"), "name" : "小刘英", "hobby" : [ "学习", "看电影", "听音乐" ], "Age" : 24, "location" : "象牙山", "gender" : "women" }
{ "_id" : ObjectId("5d30261f9646724b48d62b0e"), "name" : "刘能", "Age" : 55, "hobby" : [ "喝酒", "吹牛", "打麻将" ], "telephone" : 138 }
{ "_id" : ObjectId("5d3158d59a271d2e4db00376"), "name" : "大老乃" }
{ "_id" : ObjectId("5d315c6f9a271d2e4db00377"), "name" : "王天来", "faimly" : { "father" : "刘能", "mother" : "谢大脚" } }
>
$limit显示几条文档
> db.class1.aggregate({$limit:3})
{ "_id" : ObjectId("5d2ed0c9226651eee5c79df3"), "name" : "刘英", "hobby" : [ "吃饭", "睡觉", "打豆豆" ], "Age" : 31, "location" : "象牙山", "gender" : "women" }
{ "_id" : ObjectId("5d2ee382226651eee5c79df6"), "name" : "广坤", "hobby" : [ "抽烟", "烫头" ], "Age" : 55, "location" : "象牙山", "gender" : "men" }
{ "_id" : ObjectId("5d2fe2be6c7d5889fff5fb31"), "name" : "谢大脚", "hobby" : [ "跳舞", "睡觉", "喝酒", "唱歌" ], "Age" : 45, "location" : "象牙山", "gender" : "women" }
>
$sort 排序
> db.class1.aggregate({$sort:{name:1}})
{ "_id" : ObjectId("5d30261f9646724b48d62b0e"), "name" : "刘能", "Age" : 55, "hobby" : [ "喝酒", "吹牛", "打麻将" ], "telephone" : 138 }
{ "_id" : ObjectId("5d2ed0c9226651eee5c79df3"), "name" : "刘英", "hobby" : [ "吃饭", "睡觉", "打豆豆" ], "Age" : 31, "location" : "象牙山", "gender" : "women" }
{ "_id" : ObjectId("5d3158d59a271d2e4db00376"), "name" : "大老乃" }
{ "_id" : ObjectId("5d301d939646724b48d62ae5"), "name" : "小刘英", "hobby" : [ "学习", "看电影", "听音乐" ], "Age" : 24, "location" : "象牙山", "gender" : "women" }
{ "_id" : ObjectId("5d2fe2f16c7d5889fff5fb32"), "name" : "小梦", "hobby" : [ "看电视", "睡觉", "打飞机" ], "Age" : 25, "location" : "象牙山", "gender" : "women" }
{ "_id" : ObjectId("5d2ee382226651eee5c79df6"), "name" : "广坤", "hobby" : [ "抽烟", "烫头" ], "Age" : 55, "location" : "象牙山", "gender" : "men" }
{ "_id" : ObjectId("5d2fe30a6c7d5889fff5fb33"), "name" : "永强", "hobby" : [ "看电视", "睡觉" ], "Age" : 25, "location" : "象牙山" }
{ "_id" : ObjectId("5d2fe49e6c7d5889fff5fb37"), "name" : "玉田", "hobby" : [ "打豆豆", "睡觉" ], "Age" : 30, "location" : "象牙山", "gender" : "men" }
{ "_id" : ObjectId("5d315c6f9a271d2e4db00377"), "name" : "王天来", "faimly" : { "father" : "刘能", "mother" : "谢大脚" } }
{ "_id" : ObjectId("5d2fe2be6c7d5889fff5fb31"), "name" : "谢大脚", "hobby" : [ "跳舞", "睡觉", "喝酒", "唱歌" ], "Age" : 45, "location" : "象牙山", "gender" : "women" }
>
聚合管道
将前一个聚合操作的结果,给下一个聚合操作,继续执行
db.collcetioName.aggregate([聚合1,聚合2,...])
> db.class1.aggregate([{$match:{name:{$gt:'刘能'}}},{$project:{_id:0,display_name:'$name'}},{$sort:{name:-1}}])
{ "display_name" : "刘英" }
{ "display_name" : "大老乃" }
{ "display_name" : "小刘英" }
{ "display_name" : "小梦" }
{ "display_name" : "广坤" }
{ "display_name" : "永强" }
{ "display_name" : "玉田" }
{ "display_name" : "王天来" }
{ "display_name" : "谢大脚" }
>
> db.class1.aggregate([{$group:{_id:'$gender',total_num:{$sum:1}}},{$match:{$nor:[{_id:'men'},{_id:'women'}]}}])
{ "_id" : null, "total_num" : 4 }
>
固定集合
用途:日志处理或临时缓存
mongo中可以创建大小固定的集合,称之为固定集合,固定集合的性能出色,使用于很多场景
特点:1、插入速度快
2、顺序查询速度快
3、能够淘汰早期数据
4、可以控制集合空间
创建:
db.createCollection(collectionName,{capped:true,size:数值,max:数值})
size:设置固定集合的大小,单位kb
max:最多能容纳多少文档
> db.createCollection('Log',{capped:true,size:10,max:3})
> db.Log.insert({ID:'1',error:'文件错误'})
WriteResult({ "nInserted" : 1 })
> db.Log.insert({ID:'2',error:'文件错误'})
WriteResult({ "nInserted" : 1 })
> db.Log.insert({ID:'3',error:'输入错误'})
WriteResult({ "nInserted" : 1 })
> db.Log.insert({ID:'4',error:'输入错误'})
WriteResult({ "nInserted" : 1 })
> db.Log.find({},{_id:0})
{ "ID" : "2", "error" : "文件错误" }
{ "ID" : "3", "error" : "输入错误" }
{ "ID" : "4", "error" : "输入错误" }
>
文件存储
数据库存储文件的方式:
1、在数据库中以字符串的方式,存储文件在本地的路径
优点:节省数据库空间;
缺点:当数据库或文件位置发生变化,则需要相应修改数据库内容
2、将文件以二进制数据的方式,存放在数据库里
优点:文件存入数据库,只要数据库在,文件则不会丢失
缺点:当文件较大时,数据库空间占用大,提前困难
mongo中使用GridFS方法,进行大文件存储
GridFS:是mongodb中,大文件存储的一种方案,mongo中认为大于16M的文件为大文件
方案原理:
在mongo数据库中,创建两个集合,共同完成对文件的存储
fs.files:存储文件的相关信息,比如:文件名和文件类型
fs.chunks:实际存储文件的内容,以二进制方式分块存储,将大文件分为多个小块,每块占一个空间
缺点:由于多了,分块和组成的过程,文件存储和读取慢
mongofiles -d dbname put file
yangrui@ubuntu:~/day9$ mongofiles -d grid_db put picture.jpg
2019-07-22T13:51:02.243+0800 connected to: localhost
added file: picture.jpg
如果数据库不存在,则自动创建。
> db.fs.files.find()
{ "_id" : ObjectId("5d354ec61d41c850f13fbecf"), "chunkSize" : 261120, "uploadDate" : ISODate("2019-07-22T05:51:02.508Z"), "length" : 6487058, "md5" : "df0a525105fc3b62bcddd62a52ed4926", "filename" : "picture.jpg" }
>
>db.fs.chunks.find()
{ "_id" : ObjectId("5d354ec61d41c850f13fbee3"), "files_id" : ObjectId("5d354ec61d41c850f13fbecf"), "n" : 19, "data" : BinData(0,"d/tY8sjsfXrSdmrhzXZIjzIxudxcFOSV6D39KkjkbzU8zDJt4I6sT04/z0qI71YoB5RCZBzlQ
fs.files和fs.chunks是通过objectid产生关联
从mongodb获取文件,存放到Linux
yangrui@ubuntu:~$ mongofiles -d grid_db get picture.jpg
优点:存储方便,方便数据库移植,对文件个数没有太多限制;
缺点:读写效率低
游标:
为什么使用游标:
1、防止网络拥塞,造成数据传输慢
2、提高用户解析体验,可以后端解析
> use stu
switched to db stu
> var cursor=db.class1.find() 创建游标
cursor.hasNext() 查看是否有下一个数据
> cursor.next() 获取下一个数据
{
"_id" : ObjectId("5d2ed0c9226651eee5c79df3"),
"name" : "刘英",
"hobby" : [
"吃饭",
"睡觉",
"打豆豆"
],
"Age" : 31,
"location" : "象牙山",
"gender" : "women"
}
>
通过Python操作MongoDB数据库
安装
sudo pip3 install pymongo
yangrui@ubuntu:~$ sudo apt install python3-pip安装pip3
yangrui@ubuntu:~$ sudo pip3 install pymongo
操作步骤;
1、创建mongo数据库的链接对象
conn=MongoClient('localhos',27017)
2、生产数据库对象
db=conn.stu
3、生产集合对象
my_set=db.class1
4、增删改查索引聚合操作
插入数据:
insert()
insert_many()
insert_one()
save()
from pymongo import MongoClient
#创建链接对象
conn=MongoClient('localhost',27017)
#创建集合对象和数据库对象
db=conn.stu
my_set=db.class1
#my_set.insert({'name':'刘大脑袋','Age':50,'hobby':['谢大脚','喝酒']})
#插入多个文档
#my_set.insert_many([{'name':'王大拿','Age':55},{'name':'老王','Age':76}])
#my_set.insert_one({'name':'隔壁老刘','hobby':['刘英','二人转']})
#my_set.save({'name':'赵四'})
删除数据
remove({},multi=True)
multi:默认为true ,表示删除所有符合条件的数据
设置为False 表示只删除一条
#my_set.remove({'name':'赵四'})
my_set.remove()删除所有文档
数据查找
find()
功能:查找数据库内容
参数:和mongo shell相同
返回值:返回一个游标
迭代器
cursor
next()
count()
limit()
skip()
sort()
mongoshell--------sort({'name':1})
pymongo------------sort([('name',1)])
***进行排序时,游标要确保没有被访问过
修改操作
update()
参数和mongoshel中的update相同
update_many()
匹配到多个文档时,全部修改
update_one()
只修改匹配到的第一条文档
如果要修改的文档没有匹配,通过upsert=True,直接添加一条文档
my_set.update({'name':'刘能'},{'$set':{'Age':'45','hobby':['跳舞','吹牛']}},upsert=True)
**编程过程中mongo的数据类型null,可以用python中的None替代