zoukankan      html  css  js  c++  java
  • tpot从elastic search拉攻击数据之三 用于拉取的java程序

    package download;
    
    import org.json.JSONArray;
    
    import java.io.*;
    import java.net.URL;
    import java.net.URLConnection;
    import java.nio.Buffer;
    import java.text.SimpleDateFormat;
    import java.util.*;
    import org.json.JSONObject;
    
    public class Downloader {
    
        public static void main(String[] args) throws IOException {
    //        Configer.configProxy();
            System.out.println("爬取完成,条数:"+getresult().size());
    
        }
    
        public static String indexstr = "";
        public static Properties p;
    
        public static Properties loadPropertiesFromFile(String filename) throws IOException {
            Properties p = new Properties();
            InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);
            p.load(input);
            return p;
        }
    
        static {
            try {
                p = loadPropertiesFromFile("downloader.properties");
            } catch (IOException e) {
                System.out.println("downloader.properties读取失败");
                e.printStackTrace();
            }
        }
    
        public static  InputStream get_whitelist_inputstream(){
            //获取配置文件的inputstream
            ClassLoader classLoader=Downloader.class.getClassLoader();
            InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));
            return whitelist_inputstream;
    
            //获取配置文件的路径名
    //        ClassLoader classLoader=Downloader.class.getClassLoader();
    //        URL resource=classLoader.getResource(p.getProperty("white_list_file"));
    //        String path=resource.getPath();
        }
    
        public static String get_whitelist_regex() throws IOException {
            InputStream whitelist_inputstream=get_whitelist_inputstream();
    
            BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));
    
            String whitelist_regex="";
            String line=null;
            while((line=whitelist_reader.readLine())!=null){
                whitelist_regex+="("+line+")|";
            }
            if(whitelist_regex.length()!=0){
                whitelist_regex=whitelist_regex.substring(0,whitelist_regex.length()-1);
            }
    
    
            whitelist_inputstream.close();
            whitelist_reader.close();
    
            return whitelist_regex;
    
        }
    
        public static String post(String url, String param, Map<String, String> header) throws IOException {
            PrintWriter out = null;
            BufferedReader in = null;
            String result = "";
            URL realUrl = new URL(url);
            // 打开和URL之间的连接
            URLConnection conn = realUrl.openConnection();
            //设置超时时间
            conn.setConnectTimeout(5000);
            conn.setReadTimeout(15000);
            // 设置通用的请求属性
            if (header != null) {
                for (Map.Entry<String, String> entry : header.entrySet()) {
                    conn.setRequestProperty(entry.getKey(), entry.getValue());
                }
            }
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setRequestProperty("user-agent",
                    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
    
    
            // 发送POST请求必须设置如下两行
            conn.setDoOutput(true);
            conn.setDoInput(true);
            // 获取URLConnection对象对应的输出流
            out = new PrintWriter(conn.getOutputStream());
            // 发送请求参数
            out.print(param);
            // flush输出流的缓冲
            out.flush();
            // 定义BufferedReader输入流来读取URL的响应
            in = new BufferedReader(
                    new InputStreamReader(conn.getInputStream(), "utf8"));
            String line;
            while ((line = in.readLine()) != null) {
                result += line;
            }
            if (out != null) {
                out.close();
            }
            if (in != null) {
                in.close();
            }
            return result;
        }
    
        public static String get(String url) throws IOException {
            BufferedReader in = null;
    
            URL realUrl = new URL(url);
            // 打开和URL之间的连接
            URLConnection connection = realUrl.openConnection();
            // 设置通用的请求属性
            connection.setRequestProperty("accept", "*/*");
            connection.setRequestProperty("connection", "Keep-Alive");
            connection.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
            connection.setConnectTimeout(5000);
            connection.setReadTimeout(5000);
    
    
            // 建立实际的连接
            connection.connect();
            // 定义 BufferedReader输入流来读取URL的响应
            in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            StringBuffer sb = new StringBuffer();
            String line;
            while ((line = in.readLine()) != null) {
                sb.append(line);
            }
    
            in.close();
            return sb.toString();
        }
    
        public static void getIndexStr() {
            indexstr = "logstash-" + new SimpleDateFormat("yyyy.MM.dd").format(new Date());
            //indexstr = "tpot_test";//for test
        }
    
        public static Set<String> getAttackTypeSet() throws IOException {
    
            getIndexStr();
    
            String attacktypeurl = p.getProperty("els.host") + "/" + indexstr + "/" + "_mapping?pretty=true";
            System.out.println("【getting all types today】>>" + attacktypeurl);
            String attacktyperesult = get(attacktypeurl);
    
            //parse json
            JSONObject jobj1 = new JSONObject(attacktyperesult);
            JSONObject jobj2 = jobj1.getJSONObject(indexstr);
            JSONObject jobj3 = jobj2.getJSONObject("mappings");
    
    
            return jobj3.keySet();
        }
    
        public static LinkedList<NearRealtimeIntelligence> getresult() throws IOException {
            LinkedList<NearRealtimeIntelligence> result = new LinkedList<NearRealtimeIntelligence>();
            Set<String> attacktypeset = getAttackTypeSet();
    
            String param = "{
    " +
                    "  "query": {
    " +
                    "    "bool": {
    " +
                    "      "must_not": [
    " +
                    "        {
    " +
                    "          "regexp":{
    " +
                    "            "src_ip":"" + get_whitelist_regex() + ""
    " +
                    "          }
    " +
                    "        }
    " +
                    "      ]
    " +
                    "    }
    " +
                    "  },"size":" + p.getProperty("els.batch_size") + "
    " +
                    "}";
    
            for (String attacktype : attacktypeset) {
    
                //忽略default、syslog两个type
                if (attacktype.equals("_default_") || attacktype.equals("Syslog")) {
                    continue;
                }
    
                System.out.println("【getting "+attacktype+" data】");
                String req = p.getProperty("els.host") + "/" + indexstr + "/" + attacktype + "/_search?scroll=" + p.getProperty("scroll_timegap");
                System.out.println("posting url>>" + req);
                String res = post(req, param, null);
                //parse json
                JSONObject res_json = new JSONObject(res);
                JSONObject all_hits = res_json.getJSONObject("hits");
                JSONArray docu_array = all_hits.getJSONArray("hits");
    
                int total = all_hits.getInt("total");
                int pages = (int) Math.ceil(total / Double.parseDouble(p.getProperty("els.batch_size")));
                System.out.println("数据条数:"+total + " 页数:" + pages);
                String scroll_id = res_json.getString("_scroll_id");
    
    //            System.out.println("######################################batch0");
                for (int j = 0; j < docu_array.length(); j++) {
                    JSONObject docu = (JSONObject) docu_array.get(j);
                    JSONObject source = docu.getJSONObject("_source");
                    if (source.has("src_ip")) {
                        String src_ip = source.getString("src_ip");
                        System.out.println(src_ip);
                        NearRealtimeIntelligence adata=new NearRealtimeIntelligence();
                        adata.setName(src_ip);
                        adata.setSourceName(attacktype);
                        result.add(adata);
                    }
                }
    
                for (int i = 1; i < pages; i++) {
    //                System.out.println("######################################batch" + i);
                    req = p.getProperty("els.host") + "/_search/scroll";
    //                System.out.println("posting url>>" + req);
                    String param_scroll = "{
    " +
                            "  "scroll":"" + p.getProperty("scroll_timegap") + "",
    " +
                            "  "scroll_id":"" + scroll_id + ""
    " +
                            "}";
                    res = post(req, param_scroll, null);
                    //parse json
                    res_json = new JSONObject(res);
                    all_hits = res_json.getJSONObject("hits");
                    docu_array = all_hits.getJSONArray("hits");
    
                    for (int j = 0; j < docu_array.length(); j++) {
                        JSONObject docu = (JSONObject) docu_array.get(j);
                        JSONObject source = docu.getJSONObject("_source");
                        if (source.has("src_ip")) {
                            String src_ip = source.getString("src_ip");
    //                        System.out.println(src_ip);
                            NearRealtimeIntelligence adata=new NearRealtimeIntelligence();
                            adata.setName(src_ip);
                            adata.setSourceName(attacktype);
                            result.add(adata);
                        }
                    }
                }
            }
    
            return result;
        }
    }

    拉取过程中,注意:

    1、请求参数中过滤掉白名单+设置大小分页读取

    url: http://xxx.xxx.xxx.xxx:8000/logstash-2018.07.30/Honeytrap/_search?scroll=3m
    String param = "{ " + " "query": { " + " "bool": { " + " "must_not": [ " + " { " + " "regexp":{ " + " "src_ip":"" + get_whitelist_regex() + "" " + " } " + " } " + " ] " + " } " + " },"size":" + p.getProperty("els.batch_size") + " " + "}";

    2、读取文件

    获得inputstream

    ClassLoader classLoader=Downloader.class.getClassLoader();
    InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));

    使用inputstream按行读

    BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));
    
    String line=null;
    while((line=whitelist_reader.readLine())!=null){
    }

    3、读取文件

    Properties p = new Properties();
    InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);
    p.load(input);

    4、解析json字符串

    JSONObject res_json = new JSONObject(res);
    JSONObject all_hits = res_json.getJSONObject("hits");
    JSONArray docu_array = all_hits.getJSONArray("hits");
  • 相关阅读:
    laravel疑难问题---5、laravel的api开发
    laravel报403错误
    JS数组常用方法---14、2个归并方法
    JS字符串常用方法(自)---10、总结
    JS字符串常用方法(自)---9、字符串匹配
    win7便笺元数据损坏,最新解决办法
    【转】OS X 中快速调出终端
    【转】实用API大全
    免费手机号码归属地API查询接口
    【转】Intellij IDEA 提交代码到远程GitHub仓库
  • 原文地址:https://www.cnblogs.com/zealousness/p/9391940.html
Copyright © 2011-2022 走看看