x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0 # a random guess: random value
# our model forward pass
def forward(x):
return x * w
# Loss function
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) * (y_pred - y)
# compute gradient
def gradient(x, y): # d_loss/d_w
return 2 * x * (x * w - y)
# Before training
print("predict (before training)", 4, forward(4))
# Training loop
for epoch in range(100):
for x_val, y_val in zip(x_data, y_data):
grad = gradient(x_val, y_val)
w = w - 0.01 * grad
print(" grad: ", x_val, y_val, grad)
l = loss(x_val, y_val)
print("progress:", epoch, "w=", w, "loss=", l)
# After training
print("predict (after training)", "4 hours", forward(4))
=========================================
计算过程
predict (before training) 4 4.0
grad: 1.0 2.0 -2.0
grad: 2.0 4.0 -7.84
grad: 3.0 6.0 -16.2288
progress: 0 w= 1.260688 loss= 4.919240100095999
grad: 1.0 2.0 -1.478624
grad: 2.0 4.0 -5.796206079999999
grad: 3.0 6.0 -11.998146585599997
progress: 1 w= 1.453417766656 loss= 2.688769240265834
grad: 1.0 2.0 -1.093164466688
grad: 2.0 4.0 -4.285204709416961
grad: 3.0 6.0 -8.87037374849311
progress: 2 w= 1.5959051959019805 loss= 1.4696334962911515
grad: 1.0 2.0 -0.8081896081960389
grad: 2.0 4.0 -3.1681032641284723
grad: 3.0 6.0 -6.557973756745939
progress: 3 w= 1.701247862192685 loss= 0.8032755585999681
grad: 1.0 2.0 -0.59750427561463
grad: 2.0 4.0 -2.3422167604093502
grad: 3.0 6.0 -4.848388694047353
progress: 4 w= 1.7791289594933983 loss= 0.43905614881022015
grad: 1.0 2.0 -0.44174208101320334
grad: 2.0 4.0 -1.7316289575717576
grad: 3.0 6.0 -3.584471942173538
progress: 5 w= 1.836707389300983 loss= 0.2399802903801062
grad: 1.0 2.0 -0.3265852213980338
grad: 2.0 4.0 -1.2802140678802925
grad: 3.0 6.0 -2.650043120512205
progress: 6 w= 1.8792758133988885 loss= 0.1311689630744999
grad: 1.0 2.0 -0.241448373202223
grad: 2.0 4.0 -0.946477622952715
grad: 3.0 6.0 -1.9592086795121197
progress: 7 w= 1.910747160155559 loss= 0.07169462478267678
grad: 1.0 2.0 -0.17850567968888198
grad: 2.0 4.0 -0.6997422643804168
grad: 3.0 6.0 -1.4484664872674653
progress: 8 w= 1.9340143044689266 loss= 0.03918700813247573
grad: 1.0 2.0 -0.13197139106214673
grad: 2.0 4.0 -0.5173278529636143
grad: 3.0 6.0 -1.0708686556346834
progress: 9 w= 1.9512159834655312 loss= 0.021418922423117836
grad: 1.0 2.0 -0.09756803306893769
grad: 2.0 4.0 -0.38246668963023644
grad: 3.0 6.0 -0.7917060475345892
progress: 10 w= 1.9639333911678687 loss= 0.01170720245384975
grad: 1.0 2.0 -0.07213321766426262
grad: 2.0 4.0 -0.2827622132439096
grad: 3.0 6.0 -0.5853177814148953
......
progress: 90 w= 1.9999999999988431 loss= 1.2047849775995315e-23
grad: 1.0 2.0 -2.3137047833188262e-12
grad: 2.0 4.0 -9.070078021977679e-12
grad: 3.0 6.0 -1.8779644506139448e-11
progress: 91 w= 1.9999999999991447 loss= 6.5840863393251405e-24
grad: 1.0 2.0 -1.7106316363424412e-12
grad: 2.0 4.0 -6.7057470687359455e-12
grad: 3.0 6.0 -1.3882228699912957e-11
progress: 92 w= 1.9999999999993676 loss= 3.5991747246272455e-24
grad: 1.0 2.0 -1.2647660696529783e-12
grad: 2.0 4.0 -4.957811938766099e-12
grad: 3.0 6.0 -1.0263789818054647e-11
progress: 93 w= 1.9999999999995324 loss= 1.969312363793734e-24
grad: 1.0 2.0 -9.352518759442319e-13
grad: 2.0 4.0 -3.666400516522117e-12
grad: 3.0 6.0 -7.58859641791787e-12
progress: 94 w= 1.9999999999996543 loss= 1.0761829795642296e-24
grad: 1.0 2.0 -6.914468997365475e-13
grad: 2.0 4.0 -2.7107205369247822e-12
grad: 3.0 6.0 -5.611511255665391e-12
progress: 95 w= 1.9999999999997444 loss= 5.875191475205477e-25
grad: 1.0 2.0 -5.111466805374221e-13
grad: 2.0 4.0 -2.0037305148434825e-12
grad: 3.0 6.0 -4.1460168631601846e-12
progress: 96 w= 1.999999999999811 loss= 3.2110109830478153e-25
grad: 1.0 2.0 -3.779199175824033e-13
grad: 2.0 4.0 -1.4814816040598089e-12
grad: 3.0 6.0 -3.064215547965432e-12
progress: 97 w= 1.9999999999998603 loss= 1.757455879087579e-25
grad: 1.0 2.0 -2.793321129956894e-13
grad: 2.0 4.0 -1.0942358130705543e-12
grad: 3.0 6.0 -2.2648549702353193e-12
progress: 98 w= 1.9999999999998967 loss= 9.608404711682446e-26
grad: 1.0 2.0 -2.0650148258027912e-13
grad: 2.0 4.0 -8.100187187665142e-13
grad: 3.0 6.0 -1.6786572132332367e-12
progress: 99 w= 1.9999999999999236 loss= 5.250973729513143e-26
predict (after training) 4 hours 7.9999999999996945
=================================
自动计算梯度
import torch
from torch import nn
from torch.autograd import Variable
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = Variable(torch.Tensor([1.0]), requires_grad=True) # Any random value
# our model forward pass
def forward(x):
return x * w
# Loss function
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) * (y_pred - y)
# Before training
print("predict (before training)", 4, forward(4).data[0])
# Training loop
for epoch in range(10):
for x_val, y_val in zip(x_data, y_data):
l = loss(x_val, y_val)
l.backward()
print(" grad: ", x_val, y_val, w.grad.data[0])
w.data = w.data - 0.01 * w.grad.data
# Manually zero the gradients after updating weights
w.grad.data.zero_()
print("progress:", epoch, l.data[0])
# After training
print("predict (after training)", 4, forward(4).data[0])
---------------------------------
predict (before training) 4 tensor(4.)
grad: 1.0 2.0 tensor(-2.)
grad: 2.0 4.0 tensor(-7.8400)
grad: 3.0 6.0 tensor(-16.2288)
progress: 0 tensor(7.3159)
grad: 1.0 2.0 tensor(-1.4786)
grad: 2.0 4.0 tensor(-5.7962)
grad: 3.0 6.0 tensor(-11.9981)
progress: 1 tensor(3.9988)
grad: 1.0 2.0 tensor(-1.0932)
grad: 2.0 4.0 tensor(-4.2852)
grad: 3.0 6.0 tensor(-8.8704)
progress: 2 tensor(2.1857)
grad: 1.0 2.0 tensor(-0.8082)
grad: 2.0 4.0 tensor(-3.1681)
grad: 3.0 6.0 tensor(-6.5580)
progress: 3 tensor(1.1946)
grad: 1.0 2.0 tensor(-0.5975)
grad: 2.0 4.0 tensor(-2.3422)
grad: 3.0 6.0 tensor(-4.8484)
progress: 4 tensor(0.6530)
grad: 1.0 2.0 tensor(-0.4417)
grad: 2.0 4.0 tensor(-1.7316)
grad: 3.0 6.0 tensor(-3.5845)
progress: 5 tensor(0.3569)
grad: 1.0 2.0 tensor(-0.3266)
grad: 2.0 4.0 tensor(-1.2802)
grad: 3.0 6.0 tensor(-2.6500)
progress: 6 tensor(0.1951)
grad: 1.0 2.0 tensor(-0.2414)
grad: 2.0 4.0 tensor(-0.9465)
grad: 3.0 6.0 tensor(-1.9592)
progress: 7 tensor(0.1066)
grad: 1.0 2.0 tensor(-0.1785)
grad: 2.0 4.0 tensor(-0.6997)
grad: 3.0 6.0 tensor(-1.4485)
progress: 8 tensor(0.0583)
grad: 1.0 2.0 tensor(-0.1320)
grad: 2.0 4.0 tensor(-0.5173)
grad: 3.0 6.0 tensor(-1.0709)
progress: 9 tensor(0.0319)
predict (after training) 4 tensor(7.8049)