去游标
mongo
游标机制:
在遍历全表、集合的情况下,当表、集合本身在增量时,游
刚开始是一致的,后续,就有效增量为0
1094295 / 1300000 ---- {'_id': ObjectId('5b03c2a99341f521755dd7c1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:47
1094295 / 1305000 ---- {'_id': ObjectId('5b03cc479341f521755deb49')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:49
1094295 / 1310000 ---- {'_id': ObjectId('5b03d61a9341f521755dfed1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:50
1094295 / 1315000 ---- {'_id': ObjectId('5b03d6249341f521755e1259')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:52
1094295 / 1320000 ---- {'_id': ObjectId('5b03e79d9341f521755e25e1')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:54
1094295 / 1325000 ---- {'_id': ObjectId('5b05107d9341f521755e3969')}
start:2018-11-23 17:16:03now:2018-11-23 17:20:55
from ProjectUtil.usingModuleTOMODIFY import getNow, mysql_write, mysql_fetch, time, randomSleep, return_logging import os, random from pymongo import MongoClient ''' logging INIT ''' this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os.sep)[ -1] f_log = '{}{}{}'.format(time.strftime('%Y%m%d', time.localtime(time.time())), this_file_name, '.log') logging = return_logging(f_log) # 打开mongo连接 host, username, password = '10.14.14.52', 'ain', 'adm' uri = "mongodb://%s:%s@%s" % (username, password, host,) mongo_ask_id_f = 'mongo_ask_id.txt' c = 0 start_ = getNow() os.remove(mongo_ask_id_f) id_l = [] while True: try: mongo_client = MongoClient(uri) db = mongo_client.superpub c_ask = db.ask # c_similar_keyword = db.similar_keyword cursor = c_ask.find({}, {'_id': 1}) while cursor.alive: for doc in cursor: c += 1 # 猜测游标机制 # 使得游标下移速度不小于数据增长速度(该表只增) if c % 100 == 0: # print(c,'猜测游标机制') continue id_ = doc['_id'] with open(mongo_ask_id_f, 'a', encoding='utf-8') as fa: s = '{} '.format(id_) fa.write(s) if c % 1000 == 0: print('----------------------------',c) with open(mongo_ask_id_f, 'r', encoding='utf-8') as fr: l = [i.replace(' ', '') for i in fr] print(len(set(l)), '/', c, '----', doc) del l s = '{}{}{}{} '.format('start:', start_, 'now:', getNow()) print(s) except Exception as e: print(e) try: mongo_client.close() except Exception as e: print(e)