#!/usr/bin/env python #coding=utf-8 from elasticsearch import Elasticsearch from elasticsearch_dsl import Search class ES(object): def __init__(self): self.es = Elasticsearch(hosts="localhost:9200",timeout=60) def get_es_data(self, query = ""): resp = self.es.search(index="test", body=query, scroll="1m",size=10000) scroll_id = resp['_scroll_id'] resp_docs = resp["hits"]["hits"] total = resp['hits']['total'] print total count = len(resp_docs) datas = resp_docs while len(resp_docs) > 0: scroll_id = resp['_scroll_id'] resp = self.es.scroll(scroll_id=scroll_id, scroll="1m") resp_docs = resp["hits"]["hits"] datas.extend(resp_docs) count += len(resp_docs) if count >= total: break return datas def get_ip_data(self, start_time, end_time, ip): query = {"query": { "bool":{ "filter":{"range":{"timestamp":{"gte":start_time, "lt":end_time}}}, "must":{"match_phrase":{"src_ip": ip}} } } } data = self.get_es_data(query) print len(data) data = [d["_source"] for d in data] print len(data) print data[0:5] return data def get_ips(self, start_time, end_time): query = { "query":{ "bool":{ "filter":{"range":{"timestamp":{"gte":start_time, "lt":end_time}}}, "must":{"exists":{"field":"src_ip"}} } } } data = self.get_es_data(query) ips = [d["_source"]["src_ip"] for d in data] print len(ips) ips = list(set(ips)) print len(ips) print ips return ips if __name__ == "__main__": es_obj = ES() #es_obj.get_ips("2017-06-01T00:00:00", "2017-06-01T01:00:00") es_obj.get_ip_data("2016-11-14T00:00:00", "2016-11-15T00:00:00","192.168.0.45")