zoukankan      html  css  js  c++  java
  • SVM

    支持向量机

    本节将依SMO算法训练线性SVM, 核方法的使用可以很方便的进行扩展.

    import numpy as np
    import matplotlib.pyplot as plt
    

    下面的Point的类中的:

    [g(x) = sum_{i=1}^N alpha_i y_i k(x_i, x) + b, ]

    [e = g - y, yg = y * g ]

    class Point:
        """
        因为在SM0算法中,我们需要不断更新以及判断数据点的
        各种属性,所以创建了一个新的数据类型Point.
        """
        def __init__(self, x, y, k, order,
                     g, alpha, c):
            assert y == 1 or y == -1, "Invalid y"
            self.x = x  #数据向量
            self.y = y  # 类
            self.order = order  #序
            self.k = k  #距离矩阵
            self.c = c  
            self.g = g
            self.yg = g * y #衡量违背条件的指标
            self.alpha = alpha
            self.e = g - y
    
        def kkt(self, epsilon=0.001):
            """
            检查是否符合KKT条件
            :epsilon: 条件判断时允许的误差
            :return: 0 表示符合, 1 表示 不符合 其原因是便于统计不满足kkt的数据点的总量
            """
            alpha = self.alpha
            cond = self.y * self.g
            if alpha == 0. and cond < 1 - epsilon:
                self.satisfy_kkt = False
            elif 0 < alpha < self.c and (cond <1 - epsilon or cond > 1 + epsilon):
                self.satisfy_kkt = False
            elif alpha == self.c and cond > 1 + epsilon:
                self.satisfy_kkt = False
            else:
                self.satisfy_kkt = True
            if self.satisfy_kkt:
                return 0
            else:
                return 1
        def update_g_e(self, dog1, dog2, oldb, newb, newalpha1, newalpha2):
            """
            更新g, e参数
            """
            k1 = self.k[self.order, dog1.order]
            k2 = self.k[self.order, dog2.order]
            self.g = self.g + (newalpha1- dog1.alpha) * dog1.y * k1 
                            + (newalpha2- dog2.alpha) * dog2.y * k2 
                            + newb - oldb
            self.e = self.g - self.y
            self.yg = self.g * self.y
    
    

    下面是Points类的定义, 用训练和预测,值得一提的是在initial函数中对于alpha的初始化,从([0, self.c / self.size])中依照均匀分布选取,并且对最后一个样本的进行特别处理,以保证(sum alpha_i y_i=0), 这意味着(alpha)将是一个可行解, 一些文章中似乎是将alpha初始化为0的,不知道按照这种方式怎么进行更新迭代,因为:

    [alpha_2^{new, unc} = alpha_2^{old} + frac{y_2(E_1-E_2)}{eta}, ]

    (alpha^{old} = 0, b = 0, Rightarrow E=0 Rightarrow alpha^{new}=0). 意味着样本的参数(alpha)是不会变化的.

    另外calc_func_value,计算的是:

    [frac{1}{2} sum_{i=1}^Nsum_{j=1}^N alpha_i alpha_j y_i y_j k(x_i, x_j) - sum_{i=1}^N alpha_i ]

    这个值越小收敛的越好

    class Points:
        """
        数据点的集合
        """
        def __init__(self, points, labels, c=1.):
            self.points = []  #用于保存Point对象
            self.size = len(points)
            self.initial(points, labels, c=c)
    
    
        def distance(self, point1, point2):
            """
            计算内积矩阵, 可以在这个函数中加入核方法而达到
            核SVM的目的.
            """
            return point1 @ point2
    
        def initial(self, points, labels, b=0., c=1.):
            """
            初始化 w, b
            w: 权重
            b: 截距
            c: 正则化项的系数
            :return:
            """
            self.b =  b
            self.c = c
            
            # alpha 进行初始化
            alpha = np.random.uniform(0, self.c / self.size, size=self.size)
            y = labels
            the_sum = np.sum(y * alpha)
            alpha[-1] = alpha[-1] - the_sum * y[-1]
            while not (0 < alpha[-1] < self.c):
                alpha = np.random.uniform(0, self.c / self.size, size=self.size)
                the_sum = np.sum(y * alpha)
                alpha[-1] = alpha[-1] - the_sum * y[-1]
                
            alphay = alpha * y
            k = np.zeros((self.size, self.size))
            for i in range(self.size):
                for j in range(i, self.size):
                    k[i, j] = self.distance(points[i], points[j])
                    k[j, i] = self.distance(points[j], points[i])
            self.k = k #内积矩阵
            g = k @ alphay + self.b
            for i in range(self.size):
                point = Point(points[i], labels[i], k, i,
                              g[i], alpha[i], c)
                self.points.append(point)
    
    
        def find_dog1(self, points):
            """
            寻找第一个点
            """
            lucky_dog1 = None
            lucky_value1 = 99999999999.
            dogs = points.copy()
            #首先我们在不满足的kkt条件的支持向量中寻找
            for dog in dogs:
                if not dog.satisfy_kkt and dog.alpha > 0:
                    cond =np.abs(dog.yg - 1)
                    if cond < lucky_value1:
                        lucky_dog1 = dog
                        lucky_value1 = cond
                        
            #如果没找到,再在整个训练集中找违背kkt最严重的点.
            if lucky_dog1 is None:
                for dog in dogs:
                    if not dog.satisfy_kkt:
                        if dog.yg < lucky_value1:
                            lucky_dog1 = dog
                            lucky_value1 = dog.yg
            return lucky_dog1
    
        def find_dog2(self, points, dog1):
            """
            寻找第二个点
            """
            dogs = points.copy()
            e1 = dog1.e
            lucky_dog2 = None
            lucky_value2 = -1.
            #寻找使得|e2-e1|最大的点
            for dog in dogs:
                e2 = dog.e
                value = np.abs(e2 - e1)
                if value > lucky_value2:
                    lucky_dog2 = dog
                    lucky_value2 = value
            return lucky_dog2
    
    
        def calc_l_h(self, dogs):
            """
            alpha需要裁剪,所以要计算L, H
            :param dogs:
            :return:
            """
            dog1 = dogs[0]
            dog2 = dogs[1]
            if dog1.y != dog2.y:
                l = max(0, dog2.alpha-dog1.alpha)
                h = min(self.c, self.c + dog2.alpha-dog1.alpha)
            else:
                l = max(0, dog2.alpha+dog1.alpha-self.c)
                h = min(self.c, dog2.alpha+dog1.alpha)
            return (l, h)
    
        def update(self, dogs, l, h):
            """
            更新alpha, b, g, e
            """
            dog1 = dogs[0]
            dog2 = dogs[1]
            e1 = dog1.e
            e2 = dog2.e
            k11 = self.k[dog1.order, dog1.order]
            k12 = self.k[dog1.order, dog2.order]
            k21 = self.k[dog2.order, dog1.order]
            k22 = self.k[dog2.order, dog2.order]
            eta =  k11 + k22 - 2 * k12
            alpha2_unc = dog2.alpha + dog2.y * (e1 - e2) / eta  #未裁剪的alpha2
            if alpha2_unc < l:
                alpha2 = l
            elif l < alpha2_unc < h:
                alpha2 = alpha2_unc
            else:
                alpha2 = h
            alpha1 = dog1.alpha + dog1.y * dog2.y * (dog2.alpha - alpha2)
            b1 = -e1 - dog1.y * k11 * (alpha1 - dog1.alpha) - 
                 dog2.y * k21 * (alpha2 - dog2.alpha) + self.b
            b2 = -e2 - dog1.y * k12 * (alpha1 - dog1.alpha) - 
                 dog2.y * k22 * (alpha2 - dog2.alpha) + self.b
            b = (b1 + b2) / 2
            #更新g, e
            for point in self.points:
                point.update_g_e(dog1, dog2, self.b, b, alpha1, alpha2)
    
            #更新alpha, b
            dog1.alpha = alpha1
            dog2.alpha = alpha2
            self.b = b
            return (alpha1, alpha2, b)
    
        def update_kkt(self, epsilon):
            """
            检查每个Point是否符合kkt条件,并返回
            不合格的Point的数量
            """
            count = 0
            for point in self.points:
                count += point.kkt(epsilon)
            return count
    
        def calc_func_value(self):
            """
            计算函数的值
            """
            alpha = np.array([point.alpha for point in self.points])
            y = np.array([point.y for point in self.points])
            alphay = alpha * y
            value = alphay @ self.k @ alphay - np.sum(alpha)
            return value
    
        def calc_w(self):
            """
            计算w
            """
            w = 0.0
            for point in self.points:
                w += point.alpha * point.y * point.x
            self.w = w
            return w
    
    
        def smo(self, max_iter, x, y1, y2, epsilon=1e-3, enough_num=10):
            """
            事实上,并不需要传入x, y1, y2, 这里只是做可视化用,
            w 也仅在线性分类器时是有用的.
            enough_num: 当不满足kkt条件的数目小于enough_num是退出
            """
            iteration = 0
            value_pre = None
            points1 = self.points.copy()
            points2 = self.points.copy()
            while True:
                iteration += 1
                count = self.update_kkt(epsilon)
                value = self.calc_func_value()
                #print(iteration, value, count)
                if count < enough_num or iteration > max_iter:
                    break
                
                #下面部分的含义是如果我们所选的Point不能函数值下降,那么遍历其它的点
                #直到目标函数有足够的下降,如果还不行,那么抛弃第一个Point,选择其它的Point
                if value_pre == value:
                    points2.remove(dog2)
                    if len(points2) > 1:
                        dog2 = self.find_dog2(points2, dog1)
                    else:
                        points1.remove(dog1)
                        points2 = points1.copy()
                        dog1 = self.find_dog1(points1)
                        points2.remove(dog1)
                        dog2 = self.find_dog2(points2, dog1)
                else:
                    points1 = self.points.copy()
                    points2 = self.points.copy()
                    dog1 = self.find_dog1(points1)
                    points2.remove(dog1)
                    dog2 = self.find_dog2(points2, dog1)
    
                value_pre = value
                l, h = self.calc_l_h((dog1, dog2))
                self.update((dog1, dog2), l, h)
                
                """
                可视化部分
                if iteration % 10 == 9:
                    self.calc_w()
                    plt.cla()
                    plt.scatter(x, y1)
                    plt.scatter(x, y2)
                    y = -self.w[0] / self.w[1] * x - self.b
                    plt.title("SVM")
                    plt.plot(x, y, color="red")
                    plt.pause(0.01)
                """
            self.calc_w()
    
            return self.w, self.b
        
        def pre(self, x):
            """
            给定x 预测其分类, %%sh上,如果是线性分类器,
            只需计算self.w,再计算self.w @ x + self.b即可,
            为了方便扩展,这里做了一般化.
            """
            value = self.b
            for point in self.points:
                value += point.alpha * point.y * 
                        self.distance(x, point.x)
            sign = 1 if value >= 0 else -1
            return sign
    

    准备数据

    蓝色部分:

    [y1 = 2x + 5 + epsilon_1, epsilon1 sim mathcal{N}(0, sigma^2). ]

    红色部分

    [y2 = 2x - 5 + epsilon_2, epsilon2 sim mathcal{N}(0, sigma^2) ]

    np.random.seed(10086)
    x = np.linspace(0, 10, 100)
    e1 = np.random.randn(100) * 2
    e2 = np.random.randn(100) * 2
    y1 = 2 * x + 5 + e1
    y2 = 2 * x - 5 + e2
    data1 = np.hstack((x.reshape(-1, 1), y1.reshape(-1, 1)))
    data2 = np.hstack((x.reshape(-1, 1), y2.reshape(-1, 1)))
    data = np.vstack((data1, data2))
    labels = np.hstack((np.ones(100), -np.ones(100)))
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    plt.show()
    

    在这里插入图片描述

    测试

    test = Points(data, labels, c=10)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-1.25243632  0.5887332 ] -1.1498779874793863
    

    在这里插入图片描述

    有些时候结果是不会那么好的, 在收敛过程中还是会有波动的, 而且数据越是混越是波动大

    test.pre(np.array([10., 0.]))
    
    -1
    
    test.pre(np.array([0., 10.]))
    
    1
    
    test = Points(data, labels, c=1)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-1.33721113  0.83955356] -1.6680372246340474
    

    在这里插入图片描述

    test = Points(data, labels, c=0.1)
    w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
    print(w, b)
    plt.scatter(x, y1)
    plt.scatter(x, y2)
    y = -w[0] / w[1] * x - b
    plt.plot(x, y, color="red")
    plt.show()
    
    [-0.78780019  0.44522279] -0.6061702858422205
    

    在这里插入图片描述

    实验中发现, c越大波动越大,或许在训练过程可以动态调整c

  • 相关阅读:
    Python动态展示遗传算法求解TSP旅行商问题
    MOEAD算法中均匀权向量的实现---Python
    HDU 5294 多校第一场1007题 最短路+最小割
    POJ 3261 Milk Patterns sa+二分
    HDU 4292 FOOD 2012 ACM/ICPC Asia Regional Chengdu Online
    CodeForces 201A Clear Symmetry
    POJ 1679 The Unique MST 确定MST是否唯一
    POJ 3268 Silver Cow Party 最短路 基础题
    POJ 2139 SIx Degrees of Cowvin Bacon 最短路 水題
    POJ2229 Sumsets 基礎DP
  • 原文地址:https://www.cnblogs.com/MTandHJ/p/11384061.html
Copyright © 2011-2022 走看看