传统最小二乘法缺乏稳定性
额。就是曾加正则项
( argmin||Xw-y||2+alpha||w||2 )
对应矩阵的求解方法为
(w=(XTX+alpha*I){-1}X^Ty)
其实就是添加正则项
sklearn.linear_model.Ridge
主要参数####
- alpha
+fit_intercept
车流量分析
# -*- coding: utf-8 -*-
"""
Created on Sun May 28 12:05:23 2017
@author: sfzyk
"""
import numpy as np
from sklearn.linear_model import Ridge
from sklearn import cross_validation
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
data=np.genfromtxt(r"岭回归.csv",delimiter=',',skip_header=1)
X=data[:,1:5]
y=data[:,5]
ploy=PolynomialFeatures(6)
X=ploy.fit_transform(X)
X_train,X_test,y_train,y_test=cross_validation.train_test_split(X,y,test_size=0.2,)
R=Ridge()
R.fit(X_train,y_train)
a=R.score(X_test,y_test)
手写数字识别##
利用 full connected nn完成任务
也叫做MLP multilayers perceptron MLP
神经网络实现手写数字识别
(sklearn 实现)
# -*- coding: utf-8 -*-
"""
Created on Sun May 28 12:30:11 2017
@author: sfzyk
"""
import os
import numpy as np
import sklearn.neural_network as sklnn
def img2vector(fileName):
retMat=np.zeros([1024],int)
fr=open(fileName)
lines=fr.readlines()
for i in range(32):
for j in range(32):
retMat[i*32+j]=lines[i][j]
return retMat
def readDataSet(path):
fileList=os.listdir(path)
#获取文件夹下的所有文件
numFiles=len(fileList)
dataSet=np.zeros([numFiles,1024],int)
#存放所有数字文件
#存放所有对应的标签
hwLabels=np.zeros([numFiles,10])
for i in range(numFiles):
filepath=fileList[i]
digit=int(filepath.split("_")[0])
hwLabels[i][digit]=1.0
dataSet[i]=img2vector(path+'/'+filepath)
return dataSet,hwLabels
train_dataSet,train_hwLabels=readDataSet(r"D:mechine_learningmooc_data rainingDigits")
test_dataSet,test_hwLabels=readDataSet(r"D:mechine_learningmooc_data estDigits")
clf=sklnn.MLPClassifier(hidden_layer_sizes=(100,),activation='logistic',learning_rate_init=0.0001,max_iter=2000,solver='adam',verbose=True)
#hidden_layer_sizes是一个元组,表示第i个隐藏层神经元个数
clf.fit(train_dataSet,train_hwLabels)
res=clf.predict(test_dataSet)
error_num=0
for i in range(len(test_hwLabels)):
if np.sum(res[i]==test_hwLabels[i])<10:
error_num+=1
print("%f"%(error_num/len(test_hwLabels)))