zoukankan      html  css  js  c++  java
  • 【作业一】林轩田机器学习技术

    关注的是编程题目Q15~Q20

    这里需要借用libsvm的python包,需要一下几个处理步骤:

    (1)到libsvm官网(http://www.csie.ntu.edu.tw/~cjlin/libsvm/)下载.gz包,解压缩到一个目录

    (2)在解压的根目录make命令

    (3)再到解压缩根目录下的python文件夹make命令

    (4)把根目录下的libsvm.so.2,python文件夹下的svm.py和svmutil.py,三个文件提取出来,存放到一个新的文件夹(我的文件夹命名为libsvm)下面

    (5)为了让libsvm目录被识别为一个包,需要建立名为__init__.py的空文件

    (6)修改svm.py的Line 19为 “libsvm = CDLL(path.join(dirname, 'libsvm.so.2'))”

    (7)如果要在新建的.py文件中使用libsvm的包,需要加入如下的语句(蓝色字的部分为你建立libsvm目录的路径):  

          import sys
          sys.path.append('/Users/xiabofei/Documents/libsvm')
          from svmutil import *

    整体代码如下(读数据公用一个函数,其余的按照每个Question的要求,各自写成一块)

    #encoding=utf8
    import sys
    sys.path.append('/Users/xiabofei/Documents/libsvm')
    from svmutil import *
    import numpy as np
    import math
    from random import *
    
    # read raw data from local file
    # return scaled data
    def read_input_data(path, target_class):
        x = []
        y = []
        # read first line & get x column size & prepare for scaling
        x_size = -1
        f = open(path)
        for line in f.readlines():
            fields = line.strip().split(' ')
            for field in fields:
                if field!='':
                    x_size = x_size + 1
            break;
        f.close()
        f = open(path)
        # record each x column's max value for scaling 
        x_min_max = []
        for i in range(x_size):
            x_min_max.append([float("inf"),float("-inf")])
        for line in f.readlines():
            fields = line.strip().split(' ')
            items = []
            for field in fields:
                if field!='':
                    items.append(field)
            y.append(1 if float(items[0])==target_class else -1)
            tmp_x = []
            for i in range(1,len(items)):
                val = float(items[i])
                # check each column's min value and max value
                if val<x_min_max[i-1][0]:
                    x_min_max[i-1][0] = val
                elif val>x_min_max[i-1][1]:
                    x_min_max[i-1][1] = val
                tmp_x.append(val)
            x.append(tmp_x)
        #for min_max in x_min_max:
        #    print "min:"+str(min_max[0])+";max:"+str(min_max[1])
        # scaling x & y
        f.close()
        return x,y,x_size
        for non_scaled in x:
            for i in range(len(non_scaled)):
                non_scaled[i] = (non_scaled[i]-x_min_max[i][0]) / (x_min_max[i][1]-x_min_max[i][0])
        return x,y,x_size
    
    
    if __name__ == '__main__':
        '''
        print "success import"
        y,x = svm_read_problem('heart_scale')
        model = svm_train(y,x,'-c 4')
        print "success train"
        p_label, p_acc, p_val = svm_predict(y, x, model)
        print "success predict"
        '''
        ## Q15~Q17
        '''
        fw = open('result','w')
        # read raw data & reset labels according to the problem
        max_sum_of_alhpha = float("-inf")
        for target_class in range(0,10,2):
            x,y,x_size = read_input_data('train.dat',target_class)
            problem = svm_problem(y,x)
            # set SVM parameter
            params = svm_parameter('-c 0.01 -t 1 -g 1 -r 1')
            model = svm_train(problem, params)
            svm_save_model('model',model)
            # get W
            f = open('model')
            W = [0 for i in range(x_size)];
            sum_of_alpha = 0
            if_reach_SV = False
            for line in f.readlines():
                if line.strip()=="SV":
                    if_reach_SV = True
                    continue
                if if_reach_SV:
                    items = line.strip().split(' ')
                    alphan_yn = float(items[0])
                    sum_of_alpha = sum_of_alpha + abs(alphan_yn)
                    for i in range(x_size):
                        W[i] = W[i] + alphan_yn*float(items[i+1].split(':')[1].strip())
            fw.writelines(str(sum_of_alpha)+'
    ')
            max_sum_of_alhpha = sum_of_alpha if sum_of_alpha>max_sum_of_alhpha else max_sum_of_alhpha
            f.close()
            #test_x,test_y,test_x_size = read_input_data('test.dat',target_class)
            #if x_size!= test_x_size: sys.exit(-1)
            p_label, p_acc, p_val = svm_predict(y, x, model)
            fw.writelines("class:"+str(target_class)+";Ein:"+str(1-p_acc[0]/100.0)+'
    ')
        fw.writelines(str(max_sum_of_alhpha)+'
    ')
        fw.close()
        '''
        '''
        ## Q18
        fw = open('result','w')
        c = 0.001
        while c<=10:
            x,y,x_size = read_input_data('train.dat',0)
            problem = svm_problem(y,x)
            params = svm_parameter("-c "+str(c)+" -t 2 -g 100")
            model = svm_train(problem, params)
            SV = model.get_SV()
            print SV
            svm_save_model('model',model)
            test_x,test_y,test_x_size = read_input_data('test.dat',0)
            if x_size!= test_x_size: sys.exit(-1)
            # calculate Eout
            p_label, p_acc, p_val = svm_predict(test_y, test_x, model)
            fw.writelines("Eout:"+str(1-p_acc[0]/100.0)+'
    ')
            # calculate sum of 
            p_label, p_acc, p_val = svm_predict(test_y, test_x, model)
            c = c*10
        fw.close()
        '''
        '''
        ## Q19
        fw = open('result','w')
        gamma = 1
        x,y,x_size = read_input_data('train.dat',0)
        test_x,test_y,test_x_size = read_input_data('test.dat',0)
        if test_x_size!=x_size: sys.exit(-1)
        while gamma<=10000:
            problem = svm_problem(y,x)
            params = svm_parameter("-c 0.1 -t 2 -g "+str(gamma))
            model = svm_train(problem, params)
            p_label, p_acc, p_val = svm_predict(test_y, test_x, model)
            fw.writelines("gamma:"+str(gamma)+"	Eout:"+str(1-p_acc[0]/100.0)+'
    ')
            gamma = gamma * 10
        fw.close()
        '''
        ## Q20
        fw = open('result','w')
        T = 50
        test_size = 1000
        gamma_minEvalTimes = dict()
        x,y,x_size = read_input_data('train.dat',0)
        for i in range(T):
            # prepare train and test data
            test_indexs = np.random.random_integers(0,len(x)-1,test_size)
            train_x = []
            train_y = []
            for i in range(len(x)):
                if not (i in test_indexs):
                    train_x.append(x[i])
                    train_y.append(y[i])
            test_x = np.array(x)[test_indexs].tolist()
            test_y = np.array(y)[test_indexs].tolist()
            problem = svm_problem(train_y,train_x)
            # find which gamma perfroms best Eval
            min_Eval = float("inf")
            min_gamma = -1
            gamma = 1
            while gamma<=10000: 
                params = svm_parameter("-c 0.1 -t 2 -g "+str(gamma))
                model = svm_train(problem, params)
                p_label, p_acc, p_val = svm_predict(test_y, test_x, model)
                Eval = 1-p_acc[0]/100.0
                fw.writelines("gamma:"+str(gamma)+"	 Eval:"+str(Eval)+'
    ')
                if min_Eval>Eval:
                    min_Eval = Eval
                    min_gamma = gamma
                gamma = gamma * 10
            # update each gamma's best perform times
            if gamma_minEvalTimes.has_key(min_gamma):
                gamma_minEvalTimes[min_gamma] += 1
            else:
                gamma_minEvalTimes[min_gamma] = 1
        for k,v in gamma_minEvalTimes.items():
            fw.writelines("gamma:"+str(k)+"	times:"+str(v))
        fw.close()
        '''
        print W
        w_1_F = 0
        for i in range(x_size):
            w_1_F = w_1_F + math.pow(W[i],2)
        print math.sqrt(w_1_F)
        '''
    
        '''
        print len(x)
        print len(y)
        for i in range(len(x)):
            out_str = str(y[i])
            for j in range(len(x[i])):
                out_str = out_str + '	' + str(x[i][j])
            print out_str
        '''
  • 相关阅读:
    帧框架总结
    Java中Excel表格的上传与下载
    一种解决eclipse中安装maven出错的方法
    Eclipse中如何忽略报错的js文件
    如何解决jQuery easyui中locale文件下easyui-lang-zh_CN中文乱码问题
    java面试题
    登录功能(MD5加密)
    CSS基础
    java框架之spring
    基于Docker搭建Maven私服环境
  • 原文地址:https://www.cnblogs.com/xbf9xbf/p/4628737.html
Copyright © 2011-2022 走看看