zoukankan      html  css  js  c++  java
  • 随机生成50个字段的elasticsearch的测试程序输入

    词典位置:https://raw.githubusercontent.com/jonbcard/scrabble-bot/master/src/dictionary.txt

    import json
    from random import sample, randint
    from uuid import uuid4
    
    def gen_random_words():
        with open("D:\exp\test_data\dictionary.txt") as f:
            words = [word.strip() for word in f]
            f.close()
            # print "OK. words length:", len(words)
            return sample(words, 3000)
        return []
    
    
    total_words = 0
    def sample_words(search_words, random_words):
        global total_words
        sample_cnt = 1000
        for word in random_words:
            total_words += 1
            if len(search_words) < sample_cnt:
                search_words.append(word)
            else:
                if randint(1, total_words) <= sample_cnt:
                    kick_off = randint(0, sample_cnt-1)
                    search_words[kick_off] = word
    
    
    def gen_an_event(words, search_words):
        event_data = {}
        for i in range(50):
            query_words = sample(words, randint(1, 10))
            sample_words(search_words,query_words)
            event_data["field-"+str(i)] = " ".join(query_words)
        return {"event": event_data, "sourcetype": "hec_test2"}
    
    
    if __name__ == "__main__":
        search_words = []
        for i in range(500):
            words = gen_random_words()
            index_head = json.dumps({"index" : { "_index" : "hec_test2", "_type" : "hec_type2" } })
            es_out_put = ""
            splunk_out_put = ""
            for i in range(500):
                if i == 0:
                    es_out_put += index_head + "
    "
                else:
                    es_out_put += "
    " + index_head + "
    "
                event = gen_an_event(words, search_words)
                splunk_out_put += json.dumps(event)
                es_out_put += json.dumps(event["event"])
            # print es_out_put
            # print splunk_out_put
            out_puts = [es_out_put, splunk_out_put]
            file_name = str(uuid4()) + ".json"
            for i,dir_name in enumerate(["ES", "Splunk"]):
                outfile = "D:\test_data\%s\%s" % (dir_name, file_name)
                f = open(outfile, "w")
                f.write(out_puts[i])
                f.close()
                print outfile
        outfile = "D:\test_data\search_words.txt"
        f = open(outfile, "w")
        f.write(json.dumps(search_words))
        f.close()
  • 相关阅读:
    js常用函数(不断添加中。。。)
    【转】linux常用命令全集
    【转】VC++消息钩子编程
    beyond compare ftp 文件夹同步
    【转】notepad++ 应用学习 -- 列模式,十六进制模式
    网页嵌入视频播放器
    java中 正则表达式的使用
    【转】正则表达式30分钟入门教程
    【转】深入研究java.lang.Runtime类
    js 中map的几种实现方式
  • 原文地址:https://www.cnblogs.com/bonelee/p/6599974.html
Copyright © 2011-2022 走看看