#字典特征提取 def dict_dome(): data=[{"city":"北京","temperature":100},{"city":"上海","temperature":100},{"city":"深圳","temperature":100}] #1.实例化一个转换类器 transfer=DictVectorizer(sparse=False)#sparse稀疏矩阵 将非零值按位置表示出来 #2.调用fit_transform() data_new=transfer.fit_transform(data) print(data_new) print(transfer.get_feature_names()) #对文章进行特征提取 def count_dome(): data=["Life is short,i like python","Life is too long,i dislike python"] #1.实例化一个转换器对象 transfer=CountVectorizer(stop_words=[])#停用词 #2.调用fit_transform() data_new=transfer.fit_transform(data) print(data_new.toarray()) print(transfer.get_feature_names())
#数据预处理:归一化 def minmax_demo(): #1.获取文件 data=pd.read_csv("dating.txt") data=data.iloc[:, :3] #2.实例化一个转换器类 transfer=MinMaxScaler() #3.调用fit_transform data_new=transfer.fit_transform(data) print(data_new) #数据预处理:标准化 def stand_demo(): # 1.获取文件 data = pd.read_csv("dating.txt") data = data.iloc[:, :3] # 2.实例化一个转换器类 transfer = StandardScaler() # 3.调用fit_transform data_new = transfer.fit_transform(data) print(data_new)