zoukankan      html  css  js  c++  java
  • python数据预处理

    缺失值处理

    import pandas as pda
    import numpy as npy
    import matplotlib.pylab as pyl
    # data=pda.read_excel("D:/taobao2.xls")
    def index(data):
      data = pda.DataFrame(data[1:],columns=data[0])
      print(data)
      data["价格"][(data["价格"]==0)]=None
      print(data)
      x=0
      for i in data.columns:
       for j in range(len(data)):
         if(data[i].isnull())[j]:
            data[i][j]=data["价格"].mean()
            x+=1
            print(x)
      
    if __name__ == "__main__":
      data = nosupervision_read_data()
      index(data)
    

    数据离散化处理

    #离散化
    #连续型数据离散化
    #等宽离散化
    import pandas as pda
    import numpy as npy
    import matplotlib.pylab as pyl
    # data=pda.read_excel("D:/taobao2.xls")
    def index(data):
        data = pda.DataFrame(data[1:], columns=data[0])
        da=data.values
        price=da[:,2]
        price.sort()
        print(price)
        k=5
        c1=pda.cut(price,k,labels=["太便宜","便宜","适中","贵","太贵"])
        print(c1)
    #指点区间离散化
        k=[0,50,100,price.max()]
        print(k)
        c2=pda.cut(price,k,labels=["非常便宜","适中","贵"])
        print(c2)
    if __name__ == "__main__":
       data = nosupervision_read_data()
       index(data)
    

    数据集成处理

    # -*- coding:utf-8 -*-
    # 异常值处理
    import pandas as pda
    import numpy as npy
    def index(data):
    # 输出结果必须为字典output
       output = {}
    # data = pda.read_excel("D:/taobao2.xls")
       data = pda.DataFrame(data[1:], columns=data[0])
    # print(data)
       da = data.values
    # 数据集成
       da1 = da[0:10]
       da2 = da[10:20]
       da3 = npy.concatenate((da1, da2))
       pda.DataFrame(da3)
       output['data_数据集成'] = pda.DataFrame(da3).values.tolist()
       print(pda.DataFrame(da1))
       print(pda.DataFrame(da2))
       print(pda.DataFrame(da3))
       print(output)
       return output
    if __name__ == "__main__":
       data = nosupervision_read_data()
       index(data)
    

      

  • 相关阅读:
    (转)classload和class.forname()区别
    (转)HashMap和HashTable源码
    (转)spring 框架介绍
    [Spring入门学习笔记][创建网站URL]
    [spring入门学习笔记][spring的IoC原理]
    [J2EE学习][post,get乱码处理]
    [J2EE框架][Debug]
    [SQL学习笔记][用exists代替全称量词 ]
    [Spring入门学习笔记][Spring Boot]
    [Spring入门学习笔记][maven]
  • 原文地址:https://www.cnblogs.com/wei23/p/10890609.html
Copyright © 2011-2022 走看看