一 海洋距离与温度实例
1 导包
import numpy as np
import pandas as pd
from pandas import DataFrame,Series
import matplotlib.pyplot as plt
#让图显示中文
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
#导入机器学习库
import sklearn
from sklearn.linear_model import LinearRegression
2 数据导入
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
ferrara1 = pd.read_csv('./ferrara_150715.csv')
ferrara2 = pd.read_csv('./ferrara_250715.csv')
ferrara3 = pd.read_csv('./ferrara_270615.csv')
ferrara=pd.concat([ferrara1,ferrara1,ferrara1],ignore_index=True)
torino1 = pd.read_csv('./torino_150715.csv')
torino2 = pd.read_csv('./torino_250715.csv')
torino3 = pd.read_csv('./torino_270615.csv')
torino = pd.concat([torino1,torino2,torino3],ignore_index=True)
mantova1 = pd.read_csv('./mantova_150715.csv')
mantova2 = pd.read_csv('./mantova_250715.csv')
mantova3 = pd.read_csv('./mantova_270615.csv')
mantova = pd.concat([mantova1,mantova2,mantova3],ignore_index=True)
milano1 = pd.read_csv('./milano_150715.csv')
milano2 = pd.read_csv('./milano_250715.csv')
milano3 = pd.read_csv('./milano_270615.csv')
milano = pd.concat([milano1,milano2,milano3],ignore_index=True)
ravenna1 = pd.read_csv('./ravenna_150715.csv')
ravenna2 = pd.read_csv('./ravenna_250715.csv')
ravenna3 = pd.read_csv('./ravenna_270615.csv')
ravenna = pd.concat([ravenna1,ravenna2,ravenna3],ignore_index=True)
asti1 = pd.read_csv('./asti_150715.csv')
asti2 = pd.read_csv('./asti_250715.csv')
asti3 = pd.read_csv('./asti_270615.csv')
asti = pd.concat([asti1,asti2,asti3],ignore_index=True)
bologna1 = pd.read_csv('./bologna_150715.csv')
bologna2 = pd.read_csv('./bologna_250715.csv')
bologna3 = pd.read_csv('./bologna_270615.csv')
bologna = pd.concat([bologna1,bologna2,bologna3],ignore_index=True)
piacenza1 = pd.read_csv('./piacenza_150715.csv')
piacenza2 = pd.read_csv('./piacenza_250715.csv')
piacenza3 = pd.read_csv('./piacenza_270615.csv')
piacenza = pd.concat([piacenza1,piacenza2,piacenza3],ignore_index=True)
cesena1 = pd.read_csv('./cesena_150715.csv')
cesena2 = pd.read_csv('./cesena_250715.csv')
cesena3 = pd.read_csv('./cesena_270615.csv')
cesena = pd.concat([cesena1,cesena2,cesena3],ignore_index=True)
faenza1 = pd.read_csv('./faenza_150715.csv')
faenza2 = pd.read_csv('./faenza_250715.csv')
faenza3 = pd.read_csv('./faenza_270615.csv')
faenza = pd.concat([faenza1,faenza2,faenza3],ignore_index=True)
View Code
3 去除没用的列
city_list = [ferrara,torino,mantova,milano,ravenna,asti,bologna,piacenza,cesena,faenza]
for city in city_list:
city.drop(labels=['Unnamed: 0'],axis=1,inplace=True)
4 数据清洗(获取距离和最高温度列表)
city_temp=[]
city_dist=[]
for city in city_list:
city_temp.append(city['temp'].max())
city_dist.append(city['dist'][0])
5 绘制图像展示
plt.scatter(city_dist,city_temp)
plt.xlabel('距离')
plt.ylabel('最高温度')
plt.title('距离-最高温度之间的关系')
6 数据划分条件
-观察发现,离海近的可以形成一条直线,离海远的也能形成一条直线。
- 分别以100公里和50公里为分界点,划分为离海近和离海远的两组数据(近海:小于100 远海:大于50)
np_city_dist=np.array(city_dist)
np_city_temp=np.array(city_temp)
#获取近海数据
near_condition=np_city_dist<100
near_dist=np_city_dist[near_condition]
near_temp=np_city_temp[near_condition]
plt.scatter(near_dist,near_temp)
linner=LinearRegression() #创建线性回归模型
#规范化训练数据
feature=near_dist.reshape(-1,1)
target=near_temp
#模型训练
linner.fit(feature,target)
#评分
linner.score(feature,target)
#预测
linner.predict(10)
#绘制图像 ()
x1=np.linspace(0,75,num=100).reshape(-1,1)
y1=linner.predict(x1)
plt.scatter(near_dist,near_temp)
plt.scatter(x1,y1)
![](https://img2018.cnblogs.com/blog/1521877/201902/1521877-20190221201123756-2085987556.png)
#创建模型实例
linner2=LinearRegression()
#x训练模型
linner2.fit(far_dist.reshape(-1,1),far_temp)
#评分
linner2.score(far_dist.reshape(-1,1),far_temp)
#预测
linner.predict(120)
#绘制远海线性回归图像
x2=np.linspace(50,370,num=100)
y2=linner2.predict(x2.reshape(-1,1))
plt.scatter(far_dist,far_temp)
plt.scatter(x2,y2)
7 最终数据展示
fig=plt.figure()
plt.scatter(near_dist,near_temp)
plt.scatter(x1,y1)
plt.scatter(far_dist,far_temp)
plt.scatter(x2,y2)
fig.savefig('距离-最高温度.png') #保存图片
![](https://img2018.cnblogs.com/blog/1521877/201902/1521877-20190221201302481-746397290.png)
本文数据和代码:https://github.com/angleboygo/data_ansys