1.0版
import torch dtype = torch.float #device = torch.device("cpu") device = torch.device("cuda:0") # Uncomment this to run on GPU # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = 64, 1, 100, 1 #learing rate LR = 1e-5 # Create random input and output data x = torch.randn(N, D_in,device = device,dtype = dtype) y = x*10 # Randomly initialize weights w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad = True) w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad = True) for t in range(500): """ h = x.mm(w1) #ReLu Acticivation Function h_relu = h.clamp(min=0) pred_y = h_relu.mm(w2) #calculate the loss loss = (pred_y - y).pow(2).sum().item() print(t,loss) Manually update weights using gradient descent #Backpropagation grad_y = (pred_y - y)*2 grad_w2 = h_relu.t().mm(grad_y) grad_h_relu = grad_y.mm(w2.t())#这行原理未知,可能是求逆运算 grad_h = grad_h_relu.clone() #ReLU:if x<0 :x=0 so grad_x = 0 grad_h[h<0] = 0 grad_w1 = x.t().mm(grad_h) # Update weights using gradient descent w1 -= LR * grad_w1 w2 -= LR * grad_w2 """ pred_y = x.mm(w1).clamp(min = 0).mm(w2) loss = (pred_y - y).pow(2).sum() print(t, loss.item()) loss.backward() with torch.no_grad(): w1 -= LR*w1.grad w2 -= LR*w2.grad # Manually zero the gradients after updating weights w1.grad.zero_() w2.grad.zero_() #show the reasult print(w1) print(w2) print(w1.mm(w2)) #test test_x = torch.randn(N, D_in,device = device,dtype = dtype) h = test_x.mm(w1) h_relu = h.clamp(min=0) pred_y = h_relu.mm(w2) print(pred_y) print(test_x)
2.0版
import torch import torch.nn as nn import random #device = torch.device("cpu") device = torch.device("cuda:0") # Uncomment this to run on GPU # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = 64, 1, 100, 1 #learing rate LR = 1e-4 # Create random input and output data x = torch.randn(N, D_in,device = device) y = x*10 class MyLinearRegression(nn.Module): def __init__(self,D_in,H,D_out): super(MyLinearRegression,self).__init__() self.input_layer = nn.Linear(D_in,H).to(device) self.middle_layer = nn.Linear(H,H).to(device) self.output_layer = nn.Linear(H,D_out).to(device) def forward(self, x): x = x.to(device) h_relu = self.input_layer(x).clamp(min=0) for _ in range(random.randint(0, 3)): h_relu = self.middle_layer(h_relu).clamp(min=0) y_pred = self.output_layer(h_relu) return y_pred model = MyLinearRegression(D_in,H,D_out) criterion = nn.MSELoss(size_average=False) optimzer = torch.optim.SGD(model.parameters(),lr=LR) for t in range(500): pred_y = model(x) loss = criterion(pred_y,y) print(t,loss.item()) optimzer.zero_grad() loss.backward() optimzer.step() #test test_x = torch.randn(N, D_in,device = device) pred_y = model(test_x) print(pred_y-10*test_x)