直接上代码了:
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
def load_data(filename):
data = np.genfromtxt(filename, delimiter=' ')
x = data[:, 1:] # 数据特征
y = data[:, 0].astype(int) # 标签
scaler = StandardScaler() # 采用标准化形式
x_std = scaler.fit_transform(x) # 标准化
# 将数据划分为训练集和测试集,test_size=.5表示50%的测试集
x_train, x_test, y_train, y_test = train_test_split(x_std, y, test_size=.5)
print(len(x_train), len(x_test), len(y_train), len(y_test))
return x_train, x_test, y_train, y_test
def svm_c(x_train, x_test, y_train, y_test):
predictor = SVC(gamma='scale', C=1.0, decision_function_shape='ovr', kernel='rbf')
predictor.fit(x_train, y_train)
# answer = predictor.predict(x_test) 预测
print(predictor.score(x_test, y_test))
# print(predictor.support_vectors_) # 获取支持向量
# print(predictor.support_) # 获取支持向量的索引
# print(predictor.n_support_) # 获取每个类的支持向量数
if __name__ == '__main__':
svm_c(*load_data('txt/10/frame505/all.txt'))
我的txt数据集中,第一列是标签,也就是最后的分类结果,后面几列是特征,所以load_data()函数中的数据提取具体列大家应该可以看懂了