之前在博客中写了三个语言版本的(matlab的有点小问题)。
P.S.把代码中表达式的函数改为回调函数,这样可以自己定制方程,对于各种函数也就可以仅调用gradient_descent()方法来进行求解。
下面是一个python的对函数 $ f(vec{x}) = 2 x_1^2 + x_2^2 $ 求梯度下降的实现:
square = lambda x: x ** 2
side = lambda a, b: (a ** 2 + b ** 2) ** (1 / 2)
def d(x_1, x_2):
return -4 * x_1, -2 * x_2
def evaluate(x_1, x_2, a, b):
return -(2 * a * x_1 + b * x_2) / (2 * square(a) + square(b))
def gradient_descent(x_1, x_2, epsilon, max_iters):
iters = 0
a, b = 1, 1
while (side(a, b) > epsilon) & (iters < max_iters):
print('%d (%.6f, %.6f)' % (iters + 1, x_1, x_2))
a, b = d(x_1, x_2)
gamma = evaluate(x_1, x_2, a, b)
(x_1, x_2) = (x_1 + a * gamma, x_2 + b * gamma)
iters += 1
if __name__ == '__main__':
gradient_descent(1, 1, 0.001, 100)
C/C++版( $ epsilon = 10^{-6} = 1e-6 $ ,lambda函数就是求参数λ,因为迭代公式:$ vec{x_{i+1}} = vec{x_i} + lambda_i cdot vec{d} $ ,这里$ vec d $为函数在 $ (x_{1_i}, x_{2_i}) $ 处的梯度值):
#include <iostream>
double square(double x)
{
return x*x;
}
double part_a(double x)
{
return 4 * (-x);
}
double part_b(double x)
{
return 2 * (-x);
}
double hypotenuse(double a, double b)
{
return sqrt(square(a) + square(b));
}
double lambda(double x_1, double x_2, double a, double b)
{
return -(2 * a*x_1 + b*x_2) / (2 * square(a) + square(b));
}
void gradient_descent(
double x_1,
double x_2,
double epsilon,
unsigned max_iters)
{
double a, b, λ;
unsigned iters = 0;
do
{
printf("(%f, %f)
", x_1, x_2);
a = part_a(x_1), b = part_b(x_2);
λ = lambda(x_1, x_2, a, b);
x_1 += a*λ;
x_2 += b*λ;
iters++;
} while (hypotenuse(a, b) > epsilon && iters < max_iters);
}
int main()
{
double epsilon = 1e-6;
double x_1 = 1., x_2 = 1.;
gradient_descent(x_1, x_2, epsilon, 1000);
getchar();
return 0;
}
output:
(1.000000, 1.000000) (-0.111111, 0.444444) (0.074074, 0.074074) (-0.008230, 0.032922) (0.005487, 0.005487) (-0.000610, 0.002439) (0.000406, 0.000406) (-0.000045, 0.000181) (0.000030, 0.000030) (-0.000003, 0.000013) (0.000002, 0.000002) (-0.000000, 0.000001) (0.000000, 0.000000)