pymongo使用经验
MongoClient不要放到for循环里面,放到外面作为全局变量,速度快5倍#!/usr/bin/env python# -*-coding:utf-8-*-# @Date:2021/9/7# @Time:13:46import timefrom pymongo import MongoClientfrom database.simplelogger import SimpleLoggerfro
·
1、MongoClient不要放到for循环里面,放到外面作为全局变量,速度快5倍;
2、pymongo.errors.CursorNotFound: Cursor not found 可能就是数据库连接创建的太多导致;
3、no_cursor_timeout=True参数对于单次查询影响不大。
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Date:2021/9/7
# @Time:13:46
import time
from pymongo import MongoClient
from database.simplelogger import SimpleLogger
from readwrite.writefile import FileWriter
logger = SimpleLogger.getLogger(u"msgdeallogger")
"""1000 records consuming 43.94992995262146 s"""
FW = FileWriter()
# MongoClient不要放到for循环里面,作为全局变量,速度快5倍
db_client = MongoClient('mongodb://user:password@ip:port/')
def main(database, collection):
# connection
if check_mongodb(db_client, database, collection):
# database
db_name = db_client[database]
# collections
collection_name = db_name[collection]
cursor_results = collection_name.find({"is_deleted": {"$ne": True}}, no_cursor_timeout=True)
results = list(cursor_results)
insert_list = []
cnt = 0
s = time.time()
for res in results:
if cnt == 1000:
e = time.time()
print('1000 records consuming {} s'.format(e-s))
cid = res['company_id']
company_type_list = res['company_types']
company_name, is_deleted = find_company_name(cid)
company_type = transfer_key(company_type_list)
insert_list.append([cid, company_name, is_deleted, company_type])
cnt += 1
columns = ["cid", "company_name", "is_deleted", "company_type"]
FW.excel_write_clean(excel_path='company333.xlsx', sheet_name='Sheet1', insert_list=insert_list, columns=columns)
def find_company_name(cid):
"""
find()是得到1个游标对象,Returns
find_one()得到的是一个字典,Returns a single document, or ``None``
:param cid:
:return:
"""
database = 'xxx'
collection = 'yyy'
if check_mongodb(db_client, database, collection):
# database
db_name = db_client[database]
# collections
collection_name = db_name[collection]
cursor_results = collection_name.find({"cid": cid})
results = list(cursor_results)
try:
company_name = results[0]['name']
is_deleted = results[0]['is_deleted']
except:
# print(cid, results) # 98932
company_name = '查无此公司'
is_deleted = True
return company_name, is_deleted
def transfer_key(company_type_list):
type_list = []
type_dict = {u"80": u"金融"}
for type_code in company_type_list:
try:
type_list.append(type_dict[str(type_code)])
except:
type_list.append("不存在标签"+str(type_code))
final_type = ','.join(type_list)
return final_type
if __name__ == '__main__':
main(database='database', collection='company_xxx')
https://blog.csdn.net/qq_42470170/article/details/121017679
更多推荐
所有评论(0)