数据集官网下载;
jupyter notebook 实现;
import numpy as np import pandas as pd import matplotlib.pyplot as plt fname = 'E:\pythonwork\project\Deeplearning\Task\data\iris.data' with open(fname, 'r+', encoding='utf-8') as f: s = [i[:-1].split(',') for i in f.readlines()] # 读取TXT,逗号为分隔符 # pandas读取数据 样本数为各50个 names=['slength','swidth','plength','pwidth','name'] iris = pd.DataFrame(data=s, columns=names) # 删除一个莫名其妙的空行: iris.dropna(axis=0, how='any', inplace=True) # 有三种类别: seto = iris.iloc[0:50,:] vers = iris.iloc[50:100,:] virg = iris.iloc[100:150,:] seto.shape vers.shape # 统计每个品种有多少个样本 iris['name'].value_counts() # 字符串类型的数据变成float(否则不能画图) iris.iloc[:,:4]=iris.iloc[:,:4].astype('float') # 画出slength和swidth的关系图 plt.scatter(x=iris['slength'],y=iris['swidth']) plt.show()
#------------------- # 按颜色不同分类 画图 plt.scatter(x=seto['slength'],y=seto['swidth'],color='red') plt.scatter(x=vers['slength'],y=seto['swidth'],color='blue',marker="+") plt.scatter(x=virg['slength'],y=seto['swidth'],color='green',marker='*') plt.xlabel('s length') plt.ylabel('s width') plt.show()