zoukankan      html  css  js  c++  java
  • 数据离散化和归一化

    数据离散化和归一化

    在进行数据分析时,通常需要对数据进行归一化和离散化的操作

    from pylab import *
    from numpy import *
    import codecs
    import matplotlib.pyplot as plt
    import operator                                      #新加了一个库,用于排序
    import pandas as pd
    from numpy.random import random
    from sklearn import preprocessing 
    
    
    url = "resultData.txt"
    nmi_all=[]                                           #存储所有的互信息的值
    data_number = 0                                      #用于计数
    FeatureNum=6                                            #定义待读取数据的特征数量
    data_num = 100                                         #一百条数据
    data = []
    def open_file(url):   
        with codecs.open(url, "r") as f:
            tmp = []
            for line in f.readlines():
                line1=line.strip()
                line2=line1.split(',')
                for i in range(0, FeatureNum):
                    tmp.append(float(line2[i]))
                data.append(tmp)
                tmp = []
            datas = array(data)
    
    def gui_yi_hua(data):
        min_max_scaler = preprocessing.MinMaxScaler()  
        tseg_minMax = min_max_scaler.fit_transform(data)
        return(tseg_minMax)
        #tseg_out = pd.DataFrame(tseg_minMax)
        #tseg_out.to_csv('tseg_out.csv')
    
    def arry_discretization(tseg_minMax):
        for tmp in tseg_minMax:
            print(tmp)
            ages=tmp
            bins = [0,0.25,0.5,0.75,1]
            group_names=['这个属于0-0.25','这个属于0.25-0.5','这个属于0.5-0.75','这个属于0.75-1']
            cuts=pd.cut(ages,bins,labels=group_names)
            print(cuts)
            print(pd.value_counts(cuts))
        
        
        
    if __name__ == '__main__':
        open_file(url)
        arry_discretization(gui_yi_hua(data))
    View Code
  • 相关阅读:
    js技巧大全
    DOM
    网页页面跳转几种方法
    JavaScript中创建对象的几种方式
    web本地存储-UserData
    图片预加载
    闭包
    JavaScript高级编程学习笔记(第三章之一)
    .net core 部署到ubuntu
    fiddler autoresponder 动态修改响应内容
  • 原文地址:https://www.cnblogs.com/xingnie/p/10334763.html
Copyright © 2011-2022 走看看