zoukankan      html  css  js  c++  java
  • TensorFlow(六) KNN最近邻域法的使用

    import tensorflow as tf
    import numpy as np
    import matplotlib.pyplot as plt
    import requests
    
    sess=tf.Session()
    house_url='https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
    file=requests.get(house_url)
    housing_header=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT','MEDV']
    col_used=['CRIM','INDUS','NOX','RM','AGE','DIS','TAX','PTRATIO','B','LSTAT']
    num_features=len(col_used)
    
    housing_data=[ [ float(x) for x in y.split(' ') if len(x) >=1 ] for y in file.text.split('
    ') if len(y)>=1]
    y_vals=np.transpose([np.array([y[13] for y in housing_data ]) ])
    x_vals=np.array([[ x for i,x in enumerate(y) if  housing_header[i] in col_used ] for y in  housing_data ])
    
    x_vals=(x_vals-x_vals.min(0))/x_vals.ptp(0)
    
    
    
    #分割训练集 测试集
    train_indices=np.random.choice(len(x_vals),round(len(x_vals)*0.8),replace=False)
    test_indices=list(set(range(len(x_vals)))-set(train_indices))
    #数组分片操作 使得 x_vals必须要array类型
    x_vals_train=x_vals[train_indices]
    y_vals_trian=y_vals[train_indices]
    
    x_vals_test=x_vals[test_indices]
    y_vals_test=y_vals[test_indices]
    
    k=4
    batch_size=len(x_vals_test)
    
    # Initialize placeholders
    x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
    x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
    y_data_train = tf.placeholder(shape=[None, 1], dtype=tf.float32)
    y_data_test = tf.placeholder(shape=[None, 1], dtype=tf.float32)
    
    #创建L1距离  训练集405 测试集 101  目标是获得shape=(101,405)
    distance=tf.reduce_sum( tf.abs( tf.subtract(x_data_train,tf.expand_dims(x_data_test,1))),reduction_indices=2)
    
    #创建预测函数(101,4) k=4
    top_k_xvals,top_k_indices=tf.nn.top_k(tf.negative(distance),k=k)
    
    #(101,)
    x_middle=tf.reduce_sum(top_k_xvals,1)
    #(101,1) 求和 4个元素 相加 得到
    x_sums=tf.expand_dims(x_middle,1)
    #(101,4) 因为要除以和 所以复制为4份
    x_sum_repeats=tf.matmul(x_sums,tf.ones([1,k],tf.float32))
    #(101,1,4) 因为 要与(101,4,1)做乘法运算 所以升维
    x_vals_weight=tf.expand_dims(tf.div(top_k_xvals,x_sum_repeats),1)
    #(101,4,1)    =gather( (101,1)   ,  (101,4))
    top_k_yvals=tf.gather(y_data_train,top_k_indices)
    #(101,1)    squeeze(  (101,1,1) )
    prediction=tf.squeeze(tf.matmul(x_vals_weight,top_k_yvals),squeeze_dims=1)
    
    #均方误差
    mes=tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction,y_data_test))),batch_size)
    num_loops=int(np.ceil(len(x_vals_test)/batch_size))
    
    for i in range(num_loops):
        min_index=i*batch_size
        max_index=min((i+1)*batch_size,len(x_vals_test))
        x_batch=x_vals_test[min_index:max_index]
        y_batch=y_vals_test[min_index:max_index]
        predictions=sess.run(prediction, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian})
        batch_mse=sess.run(mes, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian,y_data_test:y_batch})
        print('Batch #'+str(i+1)+' MSE: '+str(np.round(batch_mse,3)))
    
    
    #频率直方图展示
    bins=np.linspace(5,50,101)
    
    plt.hist(predictions, bins, alpha=0.5, label='Prediciton ')
    plt.hist(y_batch, bins, alpha=0.5, label='Actual')
    plt.title('Histogram of Predicted and Actual Value')
    plt.xlabel('home value')
    plt.ylabel('Frequency')
    plt.legend(loc='lower right')
    
    plt.show()

  • 相关阅读:
    phpmyadmin详细的图文使用教程
    从入门到深入FIDDLER 2
    TestNG学习-001-基础理论知识
    TestNG学习-002-annotaton 注解概述及其执行顺序
    自动化测试如何解决验证码的问题
    自动化测试 -- 通过Cookie跳过登录验证码
    JMeter学习-012-JMeter 配置元件之-HTTP Cookie管理器-实现 Cookie 登录
    自动化测试框架
    并发和并行概念及原理
    匿名内部实现多线程的两种方式创建
  • 原文地址:https://www.cnblogs.com/x0216u/p/9225560.html
Copyright © 2011-2022 走看看