# coding: utf-8 # ### 导入graphlab # In[112]: import graphlab # In[113]: graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4) # ### 读取excel文件 # In[114]: houses = graphlab.SFrame.read_csv('/Users/Redheat/Downloads/lianjia.csv') #读取csv文件 # In[115]: print houses # ### 在浏览器打开,设定x和y轴 # In[116]: graphlab.canvas.set_target('browser')#在浏览器打开 houses.show(view="Scatter Plot", x="size", y="price") # ### 按百分比区分训练集和测试集,然后创建一个线性回归模型 # In[117]: train_data,test_data = houses.random_split(.8,seed=0)#按80%分成测试集和训练集 # In[118]: sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['size'],validation_set=None)#创建一个线性回归模型 # In[119]: print test_data['price'].mean() #平均值 # In[120]: print sqft_model.evaluate(test_data)#模型评估函数 # ### 绘制图形 # In[121]: #绘图 import matplotlib.pyplot as plt #在notebook绘图 get_ipython().magic(u'matplotlib inline') # In[122]: #分别以.和-绘制图形 plt.plot(test_data['size'],test_data['price'],'.', test_data['size'],sqft_model.predict(test_data),'-') # In[123]: sqft_model.get('coefficients') #获取权重 # ### 增加新特征 # In[124]: house_features = ['village', 'room', 'size', 'direction', 'age','area','position','id'] # In[125]: houses[house_features].show() # In[99]: houses.show(view='BoxWhisker Plot', x='area', y='price') # ### 创建基于新特征的线性回归 # In[110]: house_features_model = graphlab.linear_regression.create(train_data,target='price',features=house_features,validation_set=None) #更多特征 # In[101]: print sqft_model.evaluate(test_data) #第一个模型评估 print house_features_model.evaluate(test_data) #多特征模型评估 # ### 价格获取和预测 # In[102]: house1 = houses[houses['id']=='BJ0004399001'] # In[103]: print house1['price'] #真实价格 # In[104]: print sqft_model.predict(house1)#单特征预测价格 # In[111]: print house_features_model.predict(house1) #多特征预测价格 # <img src="https://image1.ljcdn.com/lianjia-data-sync/ziroom/15289570226877_2440114952_0.jpg.600x450.jpg"> # In[ ]:
代码地址(附作业答案): https://github.com/RedheatWei/aiproject/tree/master/Machine%20Learning%20Specialization/week2