import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
"""
1.线性预测的方式在实际应用中使用的很少
基于线性回归的分类算法-->逻辑回归,softman回归(非常好用)
2.更多的是基于梯度决策树去做预测(效果好)
"""
with open(r'F:数据分析专用数据分析与机器学习insurance.csv',) as f:
data = pd.read_csv(f)
# print(data.describe())
# print(data.head())
data_count = data['age'].value_counts()
# print(data_count)
# data_count[: 10].plot(kind='bar')
# plt.show()
# 列之间的相关性
# print(data.corr())
reg = LinearRegression()
x = data[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
y = data['charges']
x = x.apply(pd.to_numeric, errors='coerce')
y = y.apply(pd.to_numeric, errors='coerce')
x.fillna(0, inplace=True)
y.fillna(0, inplace=True)
poly_fe = PolynomialFeatures(degree=3, include_bias=False)
# print(x)
# print(y)
X_poly = poly_fe.fit_transform(x)
reg.fit(X_poly, y)
# print(reg.coef_)
# print(reg.intercept_)
y_pre = reg.predict(X_poly)
plt.plot(x['age'], y, 'b.')
plt.plot(X_poly[:, 0], y_pre, 'r.')
plt.show()