zoukankan      html  css  js  c++  java
  • python读取es中的所有数据并计算md5然后进行持久化

    #!/usr/bin/python
    import threading
    import json
    import time
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    import os
    import sys
    import argparse
    host_list = [
        {"host":"1.58.55.11","port":9200},
        {"host":"1.58.55.12","port":9200},
        {"host":"1.58.55.13","port":9200},
    ]
    
    
    es = Elasticsearch(host_list)
    
    
    
    size = 1000
    query = es.search(index='full_sight',scroll='1m',size=size)
    results = query['hits']['hits'] # es查询出的结果第一页
    
    total = query['hits']['total'] # es查询出的结果总量
    scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
    
    
    
    # 获取总的页数
    page = divmod(total,size)
    if page[1] == 0:
        page = page[0]
    else:
        page = page[0] + 1
    
    import hashlib
    obj = hashlib.md5()
    num = 1
    
    # 获取所有的数据,计算每条数据的md5值,然后写到文件中
    for i in range(0, page): # scroll参数必须指定否则会报错
        query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
        for m in query_scroll:
            temp = {}
            s = json.dumps(m)
            obj.update(bytes(s,encoding="utf-8"))
            v = obj.hexdigest()
            k = m["_id"]
    
            temp[k] = v
            with open("test.text","a") as f:
                f.write(json.dumps(temp))
                f.write("
    ")
    
            print(k,num,sep="============>")
            num += 1
    
  • 相关阅读:
    cocos2dx遇到的一些坑
    cocos2dx场景切换的坑
    整合quickx到普通cocos2dx
    Hadoop、spark
    Redis高级特性及应用场景
    wpf相关好资源
    MVVM模式的几个开源框架
    ASP.NET的IIS映射
    NET 开发者必备的工具箱
    C#开源汇总
  • 原文地址:https://www.cnblogs.com/bainianminguo/p/10718713.html
Copyright © 2011-2022 走看看