zoukankan      html  css  js  c++  java
  • TensorFlow(六) KNN最近邻域法的使用

    import tensorflow as tf
    import numpy as np
    import matplotlib.pyplot as plt
    import requests
    
    sess=tf.Session()
    house_url='https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
    file=requests.get(house_url)
    housing_header=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT','MEDV']
    col_used=['CRIM','INDUS','NOX','RM','AGE','DIS','TAX','PTRATIO','B','LSTAT']
    num_features=len(col_used)
    
    housing_data=[ [ float(x) for x in y.split(' ') if len(x) >=1 ] for y in file.text.split('
    ') if len(y)>=1]
    y_vals=np.transpose([np.array([y[13] for y in housing_data ]) ])
    x_vals=np.array([[ x for i,x in enumerate(y) if  housing_header[i] in col_used ] for y in  housing_data ])
    
    x_vals=(x_vals-x_vals.min(0))/x_vals.ptp(0)
    
    
    
    #分割训练集 测试集
    train_indices=np.random.choice(len(x_vals),round(len(x_vals)*0.8),replace=False)
    test_indices=list(set(range(len(x_vals)))-set(train_indices))
    #数组分片操作 使得 x_vals必须要array类型
    x_vals_train=x_vals[train_indices]
    y_vals_trian=y_vals[train_indices]
    
    x_vals_test=x_vals[test_indices]
    y_vals_test=y_vals[test_indices]
    
    k=4
    batch_size=len(x_vals_test)
    
    # Initialize placeholders
    x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
    x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
    y_data_train = tf.placeholder(shape=[None, 1], dtype=tf.float32)
    y_data_test = tf.placeholder(shape=[None, 1], dtype=tf.float32)
    
    #创建L1距离  训练集405 测试集 101  目标是获得shape=(101,405)
    distance=tf.reduce_sum( tf.abs( tf.subtract(x_data_train,tf.expand_dims(x_data_test,1))),reduction_indices=2)
    
    #创建预测函数(101,4) k=4
    top_k_xvals,top_k_indices=tf.nn.top_k(tf.negative(distance),k=k)
    
    #(101,)
    x_middle=tf.reduce_sum(top_k_xvals,1)
    #(101,1) 求和 4个元素 相加 得到
    x_sums=tf.expand_dims(x_middle,1)
    #(101,4) 因为要除以和 所以复制为4份
    x_sum_repeats=tf.matmul(x_sums,tf.ones([1,k],tf.float32))
    #(101,1,4) 因为 要与(101,4,1)做乘法运算 所以升维
    x_vals_weight=tf.expand_dims(tf.div(top_k_xvals,x_sum_repeats),1)
    #(101,4,1)    =gather( (101,1)   ,  (101,4))
    top_k_yvals=tf.gather(y_data_train,top_k_indices)
    #(101,1)    squeeze(  (101,1,1) )
    prediction=tf.squeeze(tf.matmul(x_vals_weight,top_k_yvals),squeeze_dims=1)
    
    #均方误差
    mes=tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction,y_data_test))),batch_size)
    num_loops=int(np.ceil(len(x_vals_test)/batch_size))
    
    for i in range(num_loops):
        min_index=i*batch_size
        max_index=min((i+1)*batch_size,len(x_vals_test))
        x_batch=x_vals_test[min_index:max_index]
        y_batch=y_vals_test[min_index:max_index]
        predictions=sess.run(prediction, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian})
        batch_mse=sess.run(mes, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian,y_data_test:y_batch})
        print('Batch #'+str(i+1)+' MSE: '+str(np.round(batch_mse,3)))
    
    
    #频率直方图展示
    bins=np.linspace(5,50,101)
    
    plt.hist(predictions, bins, alpha=0.5, label='Prediciton ')
    plt.hist(y_batch, bins, alpha=0.5, label='Actual')
    plt.title('Histogram of Predicted and Actual Value')
    plt.xlabel('home value')
    plt.ylabel('Frequency')
    plt.legend(loc='lower right')
    
    plt.show()

  • 相关阅读:
    sqlhelper使用指南
    大三学长带我学习JAVA。作业1. 第1讲.Java.SE入门、JDK的下载与安装、第一个Java程序、Java程序的编译与执行 大三学长带我学习JAVA。作业1.
    pku1201 Intervals
    hdu 1364 king
    pku 3268 Silver Cow Party
    pku 3169 Layout
    hdu 2680 Choose the best route
    hdu 2983
    pku 1716 Integer Intervals
    pku 2387 Til the Cows Come Home
  • 原文地址:https://www.cnblogs.com/x0216u/p/9225560.html
Copyright © 2011-2022 走看看