基本运算
▪ Add/minus/multiply/divide
▪ Matmul
▪ Pow
▪ Sqrt/rsqrt
▪ Round
加法
import torch
a=torch.rand(3,4)
b=torch.rand(4)
a+b
tensor([[0.4990, 1.6506, 1.1205, 0.9656],
[0.5472, 1.9627, 0.8368, 0.5020],
[0.6576, 1.1725, 0.8564, 1.0940]])
torch.add(a, b)
tensor([[0.4990, 1.6506, 1.1205, 0.9656],
[0.5472, 1.9627, 0.8368, 0.5020],
[0.6576, 1.1725, 0.8564, 1.0940]])
torch.all(torch.eq(a-b, torch.sub(a,b)))
tensor(True)
torch.all(torch.eq(a*b, torch.mul(a,b)))
tensor(True)
torch.all(torch.eq(a/b, torch.div(a,b)))
tensor(True)
乘法
a = torch.tensor([[3.,3.],[3.,3.]])
a
tensor([[3., 3.],
[3., 3.]])
b=torch.ones(2,2)
b
tensor([[1., 1.],
[1., 1.]])
torch.mm(a,b) # only for 2d
tensor([[6., 6.],
[6., 6.]])
torch.matmul(a,b)
tensor([[6., 6.],
[6., 6.]])
a@b
tensor([[6., 6.],
[6., 6.]])
An example of matmul
a=torch.rand(4, 784)
x=torch.rand(4, 784)
w=torch.rand(512, 784)
# [4,784] @ [512,784].t() => [4,784] @ [784, 512]
(x@w.t()).shape
torch.Size([4, 512])
a=torch.rand(4,3,28,64)
b=torch.rand(4,3,64,32)
torch.mm(a,b).shape # only for 2d
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-23-2699c069c540> in <module>
1 a=torch.rand(4,3,28,64)
2 b=torch.rand(4,3,64,32)
----> 3 torch.mm(a,b).shape
RuntimeError: matrices expected, got 4D, 4D tensors at ..\aten\src\TH/generic/THTensorMath.cpp:36
# 4,3,28,64
# 4,3,64,32
# [28,64] @ [64,32] => [28,32]
torch.matmul(a,b).shape
torch.Size([4, 3, 28, 32])
# 4,3,28,64
# 4,1,64,32
# 4,1,64,32 => broadingcast:4,3,64,32
b=torch.rand(4,1,64,32)
torch.matmul(a,b).shape
torch.Size([4, 3, 28, 32])
# 4,3,28,64
# 4,64,32
b=torch.rand(4,64,32)
torch.matmul(a,b).shape
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-28-5feacf49e007> in <module>
1 b=torch.rand(4,64,32)
----> 2 torch.matmul(a,b).shape
RuntimeError: The size of tensor a (3) must match the size of tensor b (4) at non-singleton dimension 1
Power 次方
a=torch.full([2,2],3.0)
print(a)
a.pow(2)
tensor([[3., 3.],
[3., 3.]])
tensor([[9., 9.],
[9., 9.]])
a**2
tensor([[9., 9.],
[9., 9.]])
aa = a**2
aa.sqrt()
tensor([[3., 3.],
[3., 3.]])
aa.rsqrt() # 开根号后求导
tensor([[0.3333, 0.3333],
[0.3333, 0.3333]])
aa**(0.5)
tensor([[3., 3.],
[3., 3.]])
exp log
a=torch.exp(torch.ones(2,2))
a
tensor([[2.7183, 2.7183],
[2.7183, 2.7183]])
torch.log(a)
tensor([[1., 1.],
[1., 1.]])
Approximation
▪ .floor() .ceil()
▪ .round()
▪ .trunc() .frac()
a=torch.tensor(3.14)
a
tensor(3.1400)
a.floor() # 向下取整
tensor(3.)
a.ceil() # 向上取整
tensor(4.)
a.round() # 四舍五入
tensor(3.)
a.frac() # 取小数部分
tensor(0.1400)
a.trunc() # 取整数部分
tensor(3.)
a=torch.tensor(3.499)
a.floor() # 向下取整
tensor(3.)
a.ceil()
tensor(4.)
a.trunc() # 取整数部分
tensor(3.)
a.frac() # 取小数部分
tensor(0.5000)
a.round() # 四舍五入
tensor(3.)
a=torch.tensor(3.5)
a
tensor(3.5000)
a.round() # 四舍五入
tensor(4.)
clamp
▪ gradient clipping
▪ (min)
▪ (min, max)
grad = torch.rand(2,3)*15
grad
tensor([[ 1.9749, 5.4683, 3.6834],
[14.8391, 1.1953, 9.1021]])
grad.max()
tensor(14.8391)
grad.median()
tensor(3.6834)
grad.clamp(10) # 小于10的,替换为10
tensor([[10.0000, 10.0000, 10.0000],
[14.8391, 10.0000, 10.0000]])
grad
tensor([[ 1.9749, 5.4683, 3.6834],
[14.8391, 1.1953, 9.1021]])
grad.clamp(0, 10) # 限定到(0, 10)之间
tensor([[ 1.9749, 5.4683, 3.6834],
[10.0000, 1.1953, 9.1021]])