zoukankan      html  css  js  c++  java
  • 归一化问题

    原理

     进行缩放的原因和使用神经网络时的考虑是一样的,由于RBF网络中采用样本数据的欧式距离来计算。主要优点就是避免数值范围较大的属性控制数值范围较小的属性。另一个优点就是避免计算时的numerical difficulties. 因为核值通常依赖特征向量的内积(inner product),而较大的属性值可能导致numerical问题。因此推荐把每个属性缩放到[-1, 1]或者[0, 1]之间,而且前一个范围要比后一个好,即对列向量进行规范化,其详细解释和计算公式见http://www.faqs.org/faqs/ai-faq/neural-nets/part2/中的“Should I standardize the input variables (column vectors)?”。libsvm中没有考虑属性的类型(效益、成本、固定、偏离、区间、偏离区间 6 种不同的属性类型的规范化计算公式是不一样的,详见:徐泽水,《不确定多属性决策方法及应用》,清华大学出版社,2004。)而采用了统一的线性缩放,作者以为此处可以改进一下。

        需要注意的是,在进行测试之前,要对测试数据进行同样的缩放操作。其实在libsvm中有程序(svmscale.exe)来进行缩放操作。

        上面这两种方法基本上可以完成所有的样本的预处理了。

    解决方法:

    源:A[]
    结果:B[]
    A的最大最小值 MaxVal,MinVal
    B中希望的最大最小值 MaxOut,MinOut

    循环
    {
      B[] = (a[] - MinVal) / (MaxVal - MinVal);

    }

    思路:准备把数据从txt读入vector二维数组进行处理!

    一点想法,想保存起来:

    主要是得到了转置矩阵,不过后来想想貌似不起神马作用:

    #include <iostream>
    #include <string>
    #include <fstream>
    #include <sstream>
    #include <exception>
    #include <stdexcept>
    #include<vector>
    using namespace std;
    template <class T>
    ostream& operator << (ostream& os, const vector<T> vec)
    {
    	copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
    	return os;
    }
    template <class T>
    ostream& operator << (ostream& os, vector<vector<T> > vec)
    {
    	for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
    	{
    		os << *iter << endl;
    	}
    	return os;
    }
    template <class T>
    T normalization(T *minval,T *maxval,int *data)
    {
        return (*data-*minval)/(maxval-minval);
    }
    template <class T>
    vector<vector<T> > InverseMatrix(vector<vector<T> > ivecvec)
    {
    	//存储数列行数row
    	int row = ivecvec.size();
    	//数列列数
    	int line = ivecvec[0].size();
       vector<vector<T> > invers_vec(line,row);
       for (int i = 0; i < line; ++i)
       {
    	   for (int j = 0; j < row; ++j)
    	   {
    		   invers_vec[i][j] = ivecvec[j][i];
    	   }
       }
       return invers_vec;
    }
    int main()
    {
    	vector<double> ivec;
    	vector<double>::iterator iter;
    	vector<vector<double> > ivecvec;
    	vector<vector<double> >::iterator iiter;
    	ifstream infile("e:\\test_data.txt");
    	string temp;
    	double a;
    	while(getline(infile, temp))
    	{
    		stringstream line(temp);
    		while(line >> a)
    		{
    			ivec.push_back(a);
    		}
    		ivecvec.push_back(ivec);
    		ivec.clear();
    	}
    	//存储数列行数row
    	int row = ivecvec.size();
    	//数列列数
    	int line = ivecvec[0].size();
    	//存储每列的最值
    	vector<vector<double> > m_val(line-1,2);
        cout << InverseMatrix(ivecvec);
    	return 0;
    }
    

      修改思路,终于搞定,就是麻烦点,算法效率低点吧,继续改进!

    #include <iostream>
    #include <string>
    #include <fstream>
    #include <sstream>
    #include <exception>
    #include <stdexcept>
    #include<vector>
    using namespace std;
    template <class T>
    ostream& operator << (ostream& os, const vector<T> vec)
    {
    	copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
    	return os;
    }
    template <class T>
    ostream& operator << (ostream& os, vector<vector<T> > vec)
    {
    	for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
    	{
    		os << *iter << endl;
    	}
    	return os;
    }
    template <class T>
    T normalization(T minval,T maxval,T data)
    {
        return (data-minval)/(maxval-minval);
    }
    template <class T>
    vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
    {
    	//存储数列行数row
    	int row = ivecvec.size();
    	//数列列数
    	int line = ivecvec[0].size();
       vector<vector<T> > m_val(line,2);
       T max_val,min_val,temp;
       //计算每列最值
       for (int i = 0; i < line; ++i)
       {
    	   max_val = min_val = 0;
    	   for (int j = 0; j < row; ++j)
    	   {
    		   temp = ivecvec[j][i];
    		   if (max_val<temp)
    		       max_val = temp; 
    		   else
    			   if(min_val > temp)
    				   min_val = temp;   
    	   }
    	   m_val[i][0]=min_val;
    	   m_val[i][1] = max_val;
       }
       //归一化
       for (int i = 0; i < line; ++i)
       {
    	   for (int j = 0; j < row; ++j)
    	   {
              ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i]);
    	   }
       }
       return ivecvec;
    }
    
    int main()
    {
    	vector<double> ivec;
    	vector<double>::iterator iter;
    	vector<vector<double> > ivecvec;
    	vector<vector<double> >::iterator iiter;
    	ifstream infile("e:\\train.txt");
    	string temp;
    	double a;
    	while(getline(infile, temp))
    	{
    		stringstream line(temp);
    		while(line >> a)
    		{
    			ivec.push_back(a);
    		}
    		ivecvec.push_back(ivec);
    		ivec.clear();
    	}
        cout << get_vec_normalization(ivecvec);
    	return 0;
    }
    

      

    还是存储到文件比较以后进行处理,继续改:

    #include <iostream>
    #include <string>
    #include <fstream>
    #include <sstream>
    #include <exception>
    #include <stdexcept>
    #include<vector>
    using namespace std;
    template <class T>
    ostream& operator << (ostream& os, const vector<T> vec)
    {
    	copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
    	return os;
    }
    template <class T>
    ostream& operator << (ostream& os, vector<vector<T> > vec)
    {
    	for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
    	{
    		os << *iter << endl;
    	}
    	return os;
    }
    template <class T>
    T normalization(T minval,T maxval,T data)
    {
    	return (data-minval)/(maxval-minval);
    }
    template <class T>
    vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
    {
    	ofstream outfile("e:\\outfile.txt");
    	if(!outfile)
    		throw runtime_error("openfile error");
    
    	//存储数列行数row
    	int row = ivecvec.size();
    	//数列列数
    	int line = ivecvec[0].size();
    	vector<vector<T> > m_val(line,2);
    	T max_val,min_val,temp;
    	//计算每列最值
    	for (int i = 0; i < line; ++i)
    	{
    		max_val = min_val = 0;
    		for (int j = 0; j < row; ++j)
    		{
    			temp = ivecvec[j][i];
    			if (max_val<temp)
    				max_val = temp;
    			else
    				if(min_val > temp)
    					min_val = temp;  
    		}
    		m_val[i][0]=min_val;
    		m_val[i][1] = max_val;
    	}
    	//归一化
    	for (int i = 0; i < line; ++i)
    	{
    		for (int j = 0; j < row; ++j)
    		{
    			ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i]);
    		}
    	}
    	for (int i = 0; i < row; ++i)
    	{
    		for (int j = 0; j < line; ++j)
    		{
    			outfile << ivecvec[i][j] << " ";
    		}
    		outfile << endl;
    	}
    	outfile.close();
    	return ivecvec;
    }
    
    int main()
    {
    	vector<double> ivec;
    	vector<double>::iterator iter;
    	vector<vector<double> > ivecvec;
    	vector<vector<double> >::iterator iiter;
    	ifstream infile("e:\\train.txt");
    	string temp;
    	double a;
    	while(getline(infile, temp))
    	{
    		stringstream line(temp);
    		while(line >> a)
    		{
    			ivec.push_back(a);
    		}
    		ivecvec.push_back(ivec);
    		ivec.clear();
    	}
    	cout << get_vec_normalization(ivecvec);
    	return 0;
    }
    

      根据公式

    对原来的代码进行修改:

    #include <iostream>
    #include <string>
    #include <fstream>
    #include <sstream>
    #include <exception>
    #include <stdexcept>
    #include<vector>
    using namespace std;
    template <class T>
    ostream& operator << (ostream& os, const vector<T> vec)
    {
    	copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
    	return os;
    }
    template <class T>
    ostream& operator << (ostream& os, vector<vector<T> > vec)
    {
    	for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
    	{
    		os << *iter << endl;
    	}
    	return os;
    }
    template <class T>
    T normalization(T y_min,T y_max,T value,int y_upper,int y_lower)
    {
    	if(value == y_min)
    		value = y_lower;
    	else if(value == y_max)
    		value = y_upper;
    	else value = y_lower + (y_upper-y_lower) *
    		(value - y_min)/(y_max-y_min);
       return value;
    }
    template <class T>
    vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
    {
    	ofstream outfile("e:\\outfile.txt");
    	if(!outfile)
    		throw runtime_error("openfile error");
    
    	//存储数列行数row
    	int row = ivecvec.size();
    	//数列列数
    	int line = ivecvec[0].size();
    	vector<vector<T> > m_val(line,2);
    	T max_val,min_val,temp;
    	//计算每列最值
    	for (int i = 0; i < line; ++i)
    	{
    		max_val = min_val = 0;
    		for (int j = 0; j < row; ++j)
    		{
    			temp = ivecvec[j][i];
    			if (max_val<temp)
    				max_val = temp;
    			else
    				if(min_val > temp)
    					min_val = temp;  
    		}
    		m_val[i][0]=min_val;
    		m_val[i][1] = max_val;
    	}
    	//归一化
    	for (int i = 0; i < line; ++i)
    	{
    		for (int j = 0; j < row; ++j)
    		{
    			ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i],1,-1);
    		}
    	}
    	for (int i = 0; i < row; ++i)
    	{
    		for (int j = 0; j < line; ++j)
    		{
    			outfile << ivecvec[i][j] << " ";
    		}
    		outfile << endl;
    	}
    	outfile.close();
    	return ivecvec;
    }
    
    int main()
    {
    	vector<double> ivec;
    	vector<double>::iterator iter;
    	vector<vector<double> > ivecvec;
    	vector<vector<double> >::iterator iiter;
    	ifstream infile("e:\\features.txt");
    	string temp;
    	double a;
    	while(getline(infile, temp))
    	{
    		stringstream line(temp);
    		while(line >> a)
    		{
    			ivec.push_back(a);
    		}
    		ivecvec.push_back(ivec);
    		ivec.clear();
    	}
    	cout << get_vec_normalization(ivecvec);
    	return 0;
    }
    

      


  • 相关阅读:
    关于线程池,那些你还不知道的事
    Java发送邮件
    原来实现项目多环境打包部署是如此的简单
    史上最全的maven的pom.xml文件详解
    Linux系统基础知识整理(一)
    计算机启动过程的简单介绍 计算机启动流程 计算机BIOS作用 POST 开机自检 计算机启动顺序 分区表 操作系统启动
    交换机工作原理、MAC地址表、路由器工作原理详解
    $(function(){})和$(document).ready(function(){}) 的区别
    关于RAM与ROM的区别与理解
    CDN的作用与基本过程
  • 原文地址:https://www.cnblogs.com/xiangshancuizhu/p/2165428.html
Copyright © 2011-2022 走看看