''' 转换数据 ''' import pandas as pd df = pd.DataFrame([ ['green', 'M', '10.2', 'class1'], ['red', 'L', '13.5', 'class2'], ['blue', 'XL', '15.3', 'class1'], ]) df.columns = ['color', 'size', 'prize', 'class label'] print(df) size_mapping = { 'XL': 3, 'L': 2, 'M': 1 } df['size'] = df['size'].map(size_mapping) print(df) class_mapping = {label: idx for idx, label in enumerate(set(df['class label']))} df['class label'] = df['class label'].map(class_mapping) print(df) result = pd.get_dummies(df) print(result) 输出结果: color size prize class label 0 green M 10.2 class1 1 red L 13.5 class2 2 blue XL 15.3 class1 color size prize class label 0 green 1 10.2 class1 1 red 2 13.5 class2 2 blue 3 15.3 class1 color size prize class label 0 green 1 10.2 1 1 red 2 13.5 0 2 blue 3 15.3 1 size class label color_blue ... prize_10.2 prize_13.5 prize_15.3 0 1 1 0 ... 1 0 0 1 2 0 0 ... 0 1 0 2 3 1 1 ... 0 0 1
''' 转换数据----连续数据离散化 ''' import pandas as pd import matplotlib.pyplot as mp ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32] # 有一组人员年龄数据,希望将这些数据划分为'18-25','25-35','35-60','60以上'几个部分 bins = [0, 25, 35, 60, 100] cut_1 = pd.cut(ages, bins) print(cut_1) data = pd.value_counts(cut_1) data.plot(kind='bar', rot=30) mp.show() 输出结果: [(0, 25], (0, 25], (0, 25], (25, 35], (0, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]] Length: 12 Categories (4, interval[int64]): [(0, 25] < (25, 35] < (35, 60] < (60, 100]]