zoukankan      html  css  js  c++  java
  • python读取es中的所有数据并计算md5然后进行持久化

    #!/usr/bin/python
    import threading
    import json
    import time
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    import os
    import sys
    import argparse
    host_list = [
        {"host":"1.58.55.11","port":9200},
        {"host":"1.58.55.12","port":9200},
        {"host":"1.58.55.13","port":9200},
    ]
    
    
    es = Elasticsearch(host_list)
    
    
    
    size = 1000
    query = es.search(index='full_sight',scroll='1m',size=size)
    results = query['hits']['hits'] # es查询出的结果第一页
    
    total = query['hits']['total'] # es查询出的结果总量
    scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
    
    
    
    # 获取总的页数
    page = divmod(total,size)
    if page[1] == 0:
        page = page[0]
    else:
        page = page[0] + 1
    
    import hashlib
    obj = hashlib.md5()
    num = 1
    
    # 获取所有的数据,计算每条数据的md5值,然后写到文件中
    for i in range(0, page): # scroll参数必须指定否则会报错
        query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
        for m in query_scroll:
            temp = {}
            s = json.dumps(m)
            obj.update(bytes(s,encoding="utf-8"))
            v = obj.hexdigest()
            k = m["_id"]
    
            temp[k] = v
            with open("test.text","a") as f:
                f.write(json.dumps(temp))
                f.write("
    ")
    
            print(k,num,sep="============>")
            num += 1
    
  • 相关阅读:
    Windows7共享设置
    13-运算符
    13-数据类型转换
    06-移动web之flex布局
    09-sass
    08-less预处理器
    移动端必须掌握知识点
    11-JS变量
    10-响应式
    07-rem
  • 原文地址:https://www.cnblogs.com/bainianminguo/p/10718713.html
Copyright © 2011-2022 走看看