zoukankan      html  css  js  c++  java
  • pandas 绘图 机器学习看特征相关性

    pandas 绘图

    import numpy as np
    import tflearn
    from tflearn.layers.core import dropout
    from tflearn.layers.normalization import batch_normalization
    from tflearn.data_utils import to_categorical
    from sklearn.model_selection import train_test_split
    import sys 
    import pandas as pd
    from pandas import Series,DataFrame
    import matplotlib.pyplot as plt 
    
    data_train = pd.read_csv("feature_with_dnn_todo2.dat")
    data_train.info()
    import matplotlib.pyplot as plt
    print(data_train.columns)
    
    for col in data_train.columns[1:]:
        fig = plt.figure()
        fig.set(alpha=0.2)
        plt.figure()
        data_train[data_train.label == 0.0][col].plot()
        data_train[data_train.label == 1.0][col].plot()
        data_train[data_train.label == 2.0][col].plot()
        data_train[data_train.label == 3.0][col].plot()
        plt.xlabel(u"sample data id")
        plt.ylabel(col) 
        plt.title(col)
        plt.legend((u'white', u'cdn',u'tunnel', u"msad"),loc='best')
        plt.show()

    结果:

    Index(['label', 'flow_cnt', 'len(srcip_arr)', 'len(dstip_arr)',
           'subdomain_num', 'uniq_subdomain_ratio',
           'np.average(dns_request_len_arr)', 'np.average(dns_reply_len_arr)',
           'np.average(subdomain_tag_num_arr)', 'np.average(subdomain_len_arr)',
           'np.average(subdomain_weird_len_arr)',
           'np.average(subdomain_entropy_arr)', 'A_rr_type_ratio',
           'incommon_rr_type_rato', 'valid_ipv4_ratio', 'uniq_valid_ipv4_ratio',
           'request_reply_ratio', 'np.max(dns_request_len_arr)',
           'np.max(dns_reply_len_arr)', 'np.max(subdomain_tag_num_arr)',
           'np.max(subdomain_len_arr)', 'np.max(subdomain_weird_len_arr)',
           'np.max(subdomain_entropy_arr)', 'avg_distance', 'std_distance'],
          dtype='object')

    。。。。

    输入数据样例:

    label,flow_cnt,len(srcip_arr),len(dstip_arr),subdomain_num,uniq_subdomain_ratio,np.average(dns_request_len_arr),np.average(dns_reply_len_arr),np.average(subdomain_tag_num_arr),np.average(subdomain_len_arr),np.average(subdomain_weird_len_arr),np.average(subdomain_entropy_arr),A_rr_type_ratio,incommon_rr_type_rato,valid_ipv4_ratio,uniq_valid_ipv4_ratio,request_reply_ratio,np.max(dns_request_len_arr),np.max(dns_reply_len_arr),np.max(subdomain_tag_num_arr),np.max(subdomain_len_arr),np.max(subdomain_weird_len_arr),np.max(subdomain_entropy_arr),avg_distance,std_distance
    0.0,1.0,1.0,1.0,1.0,1.0,35.0,148.0,1.0,3.0,0.0,0.0,1.0,0.0,1.0,1.0,0.142857142857,35.0,148.0,1.0,3.0,0.0,-0.0,0,0
    0.0,10.0,1.0,3.0,6.0,0.6,42.7,143.5,1.83333333333,8.5,0.0,2.75986309274,0.6,0.0,0.2,0.2,0.0117096018735,44.0,287.0,2.0,10.0,0.0,2.94770277922,2.2,1.46969384567
    0.0,100.0,1.0,2.0,50.0,0.5,49.63,62.96,1.0,7.7,0.0,2.41418035734,0.51,0.0,0.26,0.01,0.00100745516825,56.0,134.0,1.0,14.0,0.0,3.27761343682,7.14285714286,2.65729646253
    0.0,100.0,1.0,3.0,17.0,0.17,46.11,70.53,1.0,4.47058823529,0.0,1.29411764706,0.4,0.0,0.13,0.05,0.00108436347864,54.0,631.0,1.0,12.0,0.0,3.0,5.1875,1.84454432042
    0.0,100.0,1.0,4.0,50.0,0.5,45.8,59.59,1.0,5.8,0.0,2.06068705052,0.5,0.0,0.22,0.01,0.00109170305677,46.0,126.0,1.0,6.0,0.0,2.58496250072,4.14285714286,0.925820099773
    0.0,100.0,11.0,7.0,26.0,0.26,42.64,58.51,1.0,4.80769230769,0.0,1.53846153846,0.84,0.0,0.7,0.25,0.00117260787992,47.0,201.0,1.0,5.0,0.0,2.0,1.28,0.825590697622

    。。。



  • 相关阅读:
    Unity 绘制带颜色的流线 streamline
    Tinyply 源码阅读
    题解 [BZOJ2952]长跑
    莫比乌斯反演技巧
    题解 pyh的求和
    Java Web基础
    后端常用数据持久层模板及框架以及一些工具类模板的配置使用集合
    12306火车订票系统(C++)
    C++/Java文件读写并执行相关操作、文件复制、文件格式转换等(举例)
    《Java EE编程技术》综合应用系统开发_作业管理系统_Struts2_设计报告
  • 原文地址:https://www.cnblogs.com/bonelee/p/8617654.html
Copyright © 2011-2022 走看看