pymongo的聚合操作
数据类型样式
/* 1 */ { "_id" : ObjectId("5e5a32fe2a89d7c2fc05b9fc"), "user_id" : "1", "amount" : 500, "status" : "A" } /* 2 */ { "_id" : ObjectId("5e5a33092a89d7c2fc05ba07"), "user_id" : "1", "amount" : 250, "status" : "A" } /* 3 */ { "_id" : ObjectId("5e5a33152a89d7c2fc05ba13"), "user_id" : "2", "amount" : 200, "status" : "A" } /* 4 */ { "_id" : ObjectId("5e5a33262a89d7c2fc05ba1c"), "user_id" : "1", "amount" : 300, "status" : "B" }
$match:过滤数据,返回符合条件的数据
def aggregate(self): match_dict = {"$match":{"status":"A"}} result = self.db["test_info"].aggregate([match_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF1648> {'_id': ObjectId('5e5a32fe2a89d7c2fc05b9fc'), 'user_id': '1', 'amount': 500, 'status': 'A'} {'_id': ObjectId('5e5a33092a89d7c2fc05ba07'), 'user_id': '1', 'amount': 250, 'status': 'A'} {'_id': ObjectId('5e5a33152a89d7c2fc05ba13'), 'user_id': '2', 'amount': 200, 'status': 'A'}
$group:将过滤后的数据进行分组
def aggregate_match_group(self): match_dict = {"$match": {"status": "A"}} group_dict = {"$group":{"_id":"$user_id"}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FEF708> {'_id': '2'} {'_id': '1'}
# 注意: {"$group":{"_id":"$user_id"}} 分组的名称必须是_id才行换成其他key或者自己重新命名key报错:pymongo.errors.OperationFailure: The field 'user_id' must be an accumulator object
分组后,我们要求,每组的amount的总和是多少?
def aggregate_match_group(self): match_dict = {"$match": {"status": "A"}} group_dict = {"$group":{"_id":"$user_id","amount_total":{"$sum":"$amount"}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FECD48> {'_id': '2', 'amount_total': 200} {'_id': '1', 'amount_total': 750}
# 注意:虽然分了两组,但是其实第二组,包含了两个内容
怎么才能显示,每个里面成员的数量呢?
def aggregate_match_group(self): match_dict = {"$match": {"status": "A"}} group_dict = {"$group":{"_id":"$user_id","part_quantity":{"$sum":1}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF0E08> {'_id': '2', 'part_quantity': 1} {'_id': '1', 'part_quantity': 2}
# 注意: {"$sum":1} 表示组内有一个,按照1递增, {"$sum":2} 就变成了 {'_id': '1', 'part_quantity': 4} 也就是按照2递增!
如果我们想知道整个文档里面符合$match过滤条件的文档有多少个呢?
def aggregate_match_group(self): match_dict = {"$match": {"status": "A"}} group_dict = {"$group":{"_id":None,"part_quantity":{"$sum":1}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FEBFC8> {'_id': None, 'part_quantity': 3}
如果想知道整个collection里面有多少个文档呢?
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":None,"part_quantity":{"$sum":1}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF1D48> {'_id': None, 'part_quantity': 4}
将$match过滤条件设置为{ },就可以作用于整个collection,$group分组条件"_id":None,表示文档不分组,也就是整个文档是一组!
/* 1 */ { "_id" : ObjectId("5e5a41b22a89d7c2fc05c1c5"), "user_id" : "1", "name" : "科比", "hometown" : "费城", "age" : "100", "gender" : "男" } /* 2 */ { "_id" : ObjectId("5e5a41db2a89d7c2fc05c1dc"), "user_id" : "2", "name" : "纳什", "hometown" : "加拿大", "age" : "100", "gender" : "男" } /* 3 */ { "_id" : ObjectId("5e5a42022a89d7c2fc05c1f1"), "user_id" : "3", "name" : "蔡徐坤", "hometown" : "不详", "age" : "100", "gender" : "女" } /* 4 */ { "_id" : ObjectId("5e5a42252a89d7c2fc05c204"), "user_id" : "4", "name" : "gigi", "hometown" : "洛杉矶", "age" : "100", "gender" : "女" }
怎么获取不同性别的人的所有user_id呢?
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":"$gender","user_id":{"$push":"$user_id"}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) {'_id': '女', 'user_id': ['3', '4']} {'_id': '男', 'user_id': ['1', '2']}
# 注意:$push:将结果追加到列表中
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":"$gender","user_id":{"$push":"$$ROOT"}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF0DC8> {'_id': '女', 'user_id': [{'_id': ObjectId('5e5a42022a89d7c2fc05c1f1'), 'user_id': '3', 'name': '蔡徐坤', 'hometown': '不详', 'age': '100', 'gender': '女'}, {'_id': ObjectId('5e5a42252a89d7c2fc05c204'), 'user_id': '4', 'name': 'gigi', 'hometown': '洛杉矶', 'age': '100', 'gender': '女'}]} {'_id': '男', 'user_id': [{'_id': ObjectId('5e5a41b22a89d7c2fc05c1c5'), 'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': '100', 'gender': '男'}, {'_id': ObjectId('5e5a41db2a89d7c2fc05c1dc'), 'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': '100', 'gender': '男'}]}
# $$sort将整个文档放入列表中
$gorup分组条件的 "_id" 多条件分组
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":{"user_id":"$user_id","name":"$name","hometown":"$hometown","age":"$age","gender":"$gender"}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) {'_id': {'user_id': '4', 'name': 'gigi', 'hometown': '洛杉矶', 'age': '100', 'gender': '女'}} {'_id': {'user_id': '3', 'name': '蔡徐坤', 'hometown': '不详', 'age': '100', 'gender': '女'}} {'_id': {'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': '100', 'gender': '男'}} {'_id': {'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': '100', 'gender': '男'}}
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":{"name":"$name","age":"$age","gender":"$gender"}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002D4EE48> {'_id': {'name': 'gigi', 'age': '100', 'gender': '女'}} {'_id': {'name': '蔡徐坤', 'age': '100', 'gender': '女'}} {'_id': {'name': '纳什', 'age': '100', 'gender': '男'}} {'_id': {'name': '科比', 'age': '100', 'gender': '男'}}
多条件分组,并统计数量
def aggregate_match_group(self): match_dict = {"$match": {}} group_dict = {"$group":{"_id":{"年龄":"$age","性别":"$gender"},"人数":{"$sum":1}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FECD88> {'_id': {'年龄': '100', '性别': '女'}, '人数': 2} {'_id': {'年龄': '100', '性别': '男'}, '人数': 2}
对查询数据进行修改
/* 1 */ { "_id" : ObjectId("5e5a41b22a89d7c2fc05c1c5"), "user_id" : "1", "name" : "科比", "hometown" : "费城", "age" : "42", "gender" : "男" } /* 2 */ { "_id" : ObjectId("5e5a41db2a89d7c2fc05c1dc"), "user_id" : "2", "name" : "纳什", "hometown" : "加拿大", "age" : "40", "gender" : "男" } /* 3 */ { "_id" : ObjectId("5e5a42022a89d7c2fc05c1f1"), "user_id" : "3", "name" : "蔡徐坤", "hometown" : "不详", "age" : "3", "gender" : "女" } /* 4 */ { "_id" : ObjectId("5e5a42252a89d7c2fc05c204"), "user_id" : "4", "name" : "gigi", "hometown" : "洛杉矶", "age" : "14", "gender" : "女" }
获取年龄年龄大于3岁,小于42岁的人的信息
$match
def aggregate_match_group(self): match_dict = {"$match":{"age":{"$gt":"3"}}} result = self.db["test_info"].aggregate([match_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF1C48> {'_id': ObjectId('5e5a41b22a89d7c2fc05c1c5'), 'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': '42', 'gender': '男'} {'_id': ObjectId('5e5a41db2a89d7c2fc05c1dc'), 'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': '40', 'gender': '男'}
# 查询错误:gigi的年龄也是大于3,不显示,我们将数据里面的年龄类型从str换成int类型,继续查看
def aggregate_match_group(self): match_dict = {"$match":{"age":{"$gt":3}}} result = self.db["test_info"].aggregate([match_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF1C88> {'_id': ObjectId('5e5a41b22a89d7c2fc05c1c5'), 'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': 42, 'gender': '男'} {'_id': ObjectId('5e5a41db2a89d7c2fc05c1dc'), 'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': 40, 'gender': '男'} {'_id': ObjectId('5e5a42252a89d7c2fc05c204'), 'user_id': '4', 'name': 'gigi', 'hometown': '洛杉矶', 'age': 14, 'gender': '女'}
# 查询正确:因此当进行比较值的操作,注意字段类型必须是int类型
获取年龄大于3岁,不同性别的人数
def aggregate_match_group(self): match_dict = {"$match":{"age":{"$gt":3}}} group_dict = {"$group":{"_id":"$gender","数量":{"$sum":1}}} result = self.db["test_info"].aggregate([match_dict,group_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FF1C88> {'_id': '女', '数量': 1} {'_id': '男', '数量': 2}
$preject类型与find里面的limit,需要显示的设置为1,不显示的设置为0
def aggregate_project(self): project_dict = {"$project":{"_id":0,"name":1,"hometown":1}} result = self.db["test_info"].aggregate([project_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FE9F88> {'name': '科比', 'hometown': '费城'} {'name': '纳什', 'hometown': '加拿大'} {'name': '蔡徐坤', 'hometown': '不详'} {'name': 'gigi', 'hometown': '洛杉矶'}
# 注意:其他字段没有赋值1就不显示,但是_id字段除外,不设置,默认显示
def aggregate_project(self): group_dict = {"$group":{"_id":"$gender","quantity":{"$sum":1}}} project_dict = {"$project":{"_id":1,"quantity":1}} result = self.db["test_info"].aggregate([group_dict,project_dict]) print(type(result)) print(result) for i in result: print(i) {'_id': '女', 'quantity': 2} {'_id': '男', 'quantity': 2}
$sort:排序命令
年龄从小到大返回排序好的数据
def aggregate_sort(self): sort_dict = {"$sort":{"age":1}} result = self.db["test_info"].aggregate([sort_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000003012148> {'_id': ObjectId('5e5a42022a89d7c2fc05c1f1'), 'user_id': '3', 'name': '蔡徐坤', 'hometown': '不详', 'age': 3, 'gender': '女'} {'_id': ObjectId('5e5a42252a89d7c2fc05c204'), 'user_id': '4', 'name': 'gigi', 'hometown': '洛杉矶', 'age': 14, 'gender': '女'} {'_id': ObjectId('5e5a41db2a89d7c2fc05c1dc'), 'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': 40, 'gender': '男'} {'_id': ObjectId('5e5a41b22a89d7c2fc05c1c5'), 'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': 42, 'gender': '男'}
年龄从大到小返回排序好的数据
def aggregate_sort(self): sort_dict = {"$sort":{"age":-1}} result = self.db["test_info"].aggregate([sort_dict]) print(type(result)) print(result) for i in result: print(i) <class 'pymongo.command_cursor.CommandCursor'> <pymongo.command_cursor.CommandCursor object at 0x0000000002FE5F88> {'_id': ObjectId('5e5a41b22a89d7c2fc05c1c5'), 'user_id': '1', 'name': '科比', 'hometown': '费城', 'age': 42, 'gender': '男'} {'_id': ObjectId('5e5a41db2a89d7c2fc05c1dc'), 'user_id': '2', 'name': '纳什', 'hometown': '加拿大', 'age': 40, 'gender': '男'} {'_id': ObjectId('5e5a42252a89d7c2fc05c204'), 'user_id': '4', 'name': 'gigi', 'hometown': '洛杉矶', 'age': 14, 'gender': '女'} {'_id': ObjectId('5e5a42022a89d7c2fc05c1f1'), 'user_id': '3', 'name': '蔡徐坤', 'hometown': '不详', 'age': 3, 'gender': '女'}
数据类型
/* 10 */ { "_id" : ObjectId("5e58c4102a89d7c2fc051ba4"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-06-15T15:45:22.000Z"), "is_delete" : "0" } /* 11 */ { "_id" : ObjectId("5e5a510d2a89d7c2fc05cac7"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-07-15T15:45:22.000Z"), "is_delete" : "0" } /* 12 */ { "_id" : ObjectId("5e5a511b2a89d7c2fc05cad2"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-08-15T15:45:22.000Z"), "is_delete" : "0" } /* 13 */ { "_id" : ObjectId("5e5a51282a89d7c2fc05cada"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-10-15T15:45:22.000Z"), "is_delete" : "0" } /* 14 */ { "_id" : ObjectId("5e5a51422a89d7c2fc05caec"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-11-15T15:45:22.000Z"), "is_delete" : "0" } /* 15 */ { "_id" : ObjectId("5e5a514d2a89d7c2fc05caf5"), "vaccine_name" : "破伤风", "vaccine_id" : "2", "user_id" : "110", "farm_id" : "110", "fold_id" : "110", "farm_name" : "110牧场", "fold_name" : "110圈舍", "animal_number" : "133", "equipment_number" : "133", "type" : "goat", "inject_quantity" : "100", "vaccine_time" : ISODate("2020-12-15T15:45:22.000Z"), "is_delete" : "0" }
需求:获取equipment_number=13,vaccine_time按照时间倒叙排列,返回数据
def get_all_by_time_object(self,collection): """按照时间类型排序 vaccine_time的类型是 ISODate("2020-12-15T15:45:22.000Z")类型""" if self.connect_result: match_dict = {"$match":{"equipment_number":"133","type":"goat"}} sort_dict = {"$sort":{"vaccine_time":-1}} result = self.db[collection].aggregate([match_dict,sort_dict]) for i in result: print(i) {'_id': ObjectId('5e5a514d2a89d7c2fc05caf5'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 12, 15, 15, 45, 22), 'is_delete': '0'} {'_id': ObjectId('5e5a51422a89d7c2fc05caec'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 11, 15, 15, 45, 22), 'is_delete': '0'} {'_id': ObjectId('5e5a51282a89d7c2fc05cada'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 10, 15, 15, 45, 22), 'is_delete': '0'} {'_id': ObjectId('5e5a511b2a89d7c2fc05cad2'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 8, 15, 15, 45, 22), 'is_delete': '0'} {'_id': ObjectId('5e5a510d2a89d7c2fc05cac7'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 7, 15, 15, 45, 22), 'is_delete': '0'} {'_id': ObjectId('5e58c4102a89d7c2fc051ba4'), 'vaccine_name': '破伤风', 'vaccine_id': '2', 'user_id': '110', 'farm_id': '110', 'fold_id': '110', 'farm_name': '110牧场', 'fold_name': '110圈舍', 'animal_number': '133', 'equipment_number': '133', 'type': 'goat', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 6, 15, 15, 45, 22), 'is_delete': '0'}
过滤掉一些字段,选择性显示需要的字段
def get_all_by_time_object(self,collection): """按照时间类型排序 vaccine_time的类型是 ISODate("2020-12-15T15:45:22.000Z")类型""" if self.connect_result: match_dict = {"$match":{"equipment_number":"133","type":"goat"}} sort_dict = {"$sort":{"vaccine_time":-1}} project_dict = {"$project":{"_id":0,"animal_number":1,"inject_quantity":1,"vaccine_time":1,"vaccine_name":1}} result = self.db[collection].aggregate([match_dict,sort_dict,project_dict]) for i in result: print(i) {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 12, 15, 15, 45, 22)} {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 11, 15, 15, 45, 22)} {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 10, 15, 15, 45, 22)} {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 8, 15, 15, 45, 22)} {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 7, 15, 15, 45, 22)} {'vaccine_name': '破伤风', 'animal_number': '133', 'inject_quantity': '100', 'vaccine_time': datetime.datetime(2020, 6, 15, 15, 45, 22)}
# TODO 待续
#