使用
data$ mongo localhost:27017/jd_51job_raw updateName.js --shell
js 脚本: updateName.js
var cursor = db.getCollectionNames(); for(i=0;i<cursor.length;i++){ if("system.indexes"==cursor[i]||"system.profile"==cursor[i]){ print("Find system"); continue; } else{ curCol = db.getCollection(cursor[i]); print("now processing...",curCol); print(curCol.count()); curCol.find().forEach(function(item){ curCol.update({"_id":item._id},{"$set":{"jdJob.jobDiploma":item.jobDiploma}}); // curCol.update({"_id":item._id},{"$unset":{"jobDiploma":0}}); }); printjson(curCol.findOne()); } }
updateName.py
#!/usr/bin/env python # coding=utf-8 from pymongo import MongoClient from bson.json_util import dumps from multiprocessing import Pool client = MongoClient("mongodb://localhost:xxxxx/") db = client.jd_51job_raw cols = db.collection_names(include_system_collections=False) cols = sorted(cols) def del_jobDiploma(colName): print "=="*20,colName col = db.get_collection(colName)
col.update({},{"$unset":{"jobDiploma":""}},multi=True) print dumps(col.find_one(),ensure_ascii=False,indent=4) print ""
# 10个进程一起来!
pools = Pool(10) pools.map(del_jobDiploma,cols)