python风控建模实战lendingClub(博主录制,catboost,lightgbm建模,2K超清分辨率)
https://study.163.com/course/courseMain.htm?courseId=1005988013&share=2&shareId=400000000398149
机器学习,统计项目联系:QQ:231469242

# -*- coding: utf-8 -*-
"""
Created on Mon Jul 10 11:04:51 2017
@author: toby
"""
# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
def fitLine(x, y, alpha=0.05, newx=[], plotFlag=1):
''' Fit a curve to the data using a least squares 1st order polynomial fit '''
# Summary data
n = len(x) # number of samples
Sxx = np.sum(x**2) - np.sum(x)**2/n
# Syy = np.sum(y**2) - np.sum(y)**2/n # not needed here
Sxy = np.sum(x*y) - np.sum(x)*np.sum(y)/n
mean_x = np.mean(x)
mean_y = np.mean(y)
# Linefit
b = Sxy/Sxx
a = mean_y - b*mean_x
# Residuals
fit = lambda xx: a + b*xx
residuals = y - fit(x)
var_res = np.sum(residuals**2)/(n-2)
sd_res = np.sqrt(var_res)
# Confidence intervals
se_b = sd_res/np.sqrt(Sxx)
se_a = sd_res*np.sqrt(np.sum(x**2)/(n*Sxx))
df = n-2 # degrees of freedom
tval = stats.t.isf(alpha/2., df) # appropriate t value
ci_a = a + tval*se_a*np.array([-1,1])
ci_b = b + tval*se_b*np.array([-1,1])
# create series of new test x-values to predict for
npts = 100
px = np.linspace(np.min(x),np.max(x),num=npts)
se_fit = lambda x: sd_res * np.sqrt( 1./n + (x-mean_x)**2/Sxx)
se_predict = lambda x: sd_res * np.sqrt(1+1./n + (x-mean_x)**2/Sxx)
print(('Summary: a={0:5.4f}+/-{1:5.4f}, b={2:5.4f}+/-{3:5.4f}'.format(a,tval*se_a,b,tval*se_b)))
print(('Confidence intervals: ci_a=({0:5.4f} - {1:5.4f}), ci_b=({2:5.4f} - {3:5.4f})'.format(ci_a[0], ci_a[1], ci_b[0], ci_b[1])))
print(('Residuals: variance = {0:5.4f}, standard deviation = {1:5.4f}'.format(var_res, sd_res)))
print(('alpha = {0:.3f}, tval = {1:5.4f}, df={2:d}'.format(alpha, tval, df)))
# Return info
ri = {'residuals': residuals,
'var_res': var_res,
'sd_res': sd_res,
'alpha': alpha,
'tval': tval,
'df': df}
if plotFlag == 1:
# Plot the data
plt.figure()
plt.plot(px, fit(px),'k', label='Regression line')
#plt.plot(x,y,'k.', label='Sample observations', ms=10)
plt.plot(x,y,'k.')
x.sort()
limit = (1-alpha)*100
plt.plot(x, fit(x)+tval*se_fit(x), 'r--', lw=2, label='Confidence limit ({0:.1f}%)'.format(limit))
plt.plot(x, fit(x)-tval*se_fit(x), 'r--', lw=2 )
plt.plot(x, fit(x)+tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2), label='Prediction limit ({0:.1f}%)'.format(limit))
plt.plot(x, fit(x)-tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2))
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('Linear regression and confidence limits')
# configure legend
plt.legend(loc=0)
leg = plt.gca().get_legend()
ltext = leg.get_texts()
plt.setp(ltext, fontsize=14)
# show the plot
outFile = 'regression_wLegend.png'
plt.savefig(outFile, dpi=200)
print('Image saved to {0}'.format(outFile))
plt.show()
if newx != []:
try:
newx.size
except AttributeError:
newx = np.array([newx])
print(('Example: x = {0}+/-{1} => se_fit = {2:5.4f}, se_predict = {3:6.5f}'
.format(newx[0], tval*se_predict(newx[0]), se_fit(newx[0]), se_predict(newx[0]))))
newy = (fit(newx), fit(newx)-se_predict(newx), fit(newx)+se_predict(newx))
return (a,b,(ci_a, ci_b), ri, newy)
else:
return (a,b,(ci_a, ci_b), ri)
def Draw_confidenceInterval(x,y):
x=np.array(x)
y=np.array(y)
goodIndex = np.invert(np.logical_or(np.isnan(x), np.isnan(y)))
(a,b,(ci_a, ci_b), ri,newy) = fitLine(x[goodIndex],y[goodIndex], alpha=0.01,newx=np.array([1,4.5]))
y=[6.47,6.13,6.19,4.89,5.63,4.52,5.89,4.79,5.27,6.08]
x=[4.03,3.76,3.77,3.34,3.47,2.92,3.20,2.71,3.53,4.51]
Draw_confidenceInterval(x,y)
https://study.163.com/provider/400000000398149/index.htm?share=2&shareId=400000000398149( 欢迎关注博主主页,学习python视频资源,还有大量免费python经典文章)

