卷积层 图像卷积 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 import torchfrom torch import nnfrom d2l import torch as d2ldef corr2d (X, K ): h, w = K.shape Y = torch.zeros((X.shape[0 ]-h+1 , X.shape[1 ]-w+1 )) for i in range (Y.shape[0 ]): for j in range (Y.shape[1 ]): Y[i, j] = (X[i:i+h, j:j+w]*K).sum () return Y
试验一下
1 2 3 4 X = torch.tensor([[0.0 , 1.0 , 2.0 ], [3.0 , 4.0 , 5.0 ], [6.0 , 7.0 , 8.0 ]]) K = torch.tensor([[0.0 , 1.0 ], [2.0 , 3.0 ]]) corr2d(X, K)
tensor([[19., 25.],
[37., 43.]])
实现二维卷积层
1 2 3 4 5 6 7 8 9 class Conv2D (nn.Module): def __init__ (self, kernel_size ): super ().__init__() self.weight = nn.parameter(torch.rand(kernel_size)) self.bias = nn.parameter(torch.zeros(1 )) def forward (self, x ): return corr2d(x, self.weight)+self.bias
应用:检测图像中不同颜色的边缘
1 2 3 4 X = torch.ones((6 , 8 )) X[:, 2 :6 ] = 0 X
tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.]])
1 2 K = torch.tensor([[1.0 , -1.0 ]])
我们的目的是通过卷积将边界部分识别出来
tensor([[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.]])
可以看到交界处已经被识别出来了
但是我们这个核矩阵只能检验列向的分界线,如果想检测行向的分界线,必须要换一个核矩阵
tensor([[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0.]])
可见无法识别
现在给定X,Y,我们要通过X和Y来学习K(核矩阵)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 conv2d = nn.Conv2d(1 , 1 , kernel_size=(1 , 2 ), bias=False ) X = X.reshape((1 , 1 , 6 , 8 )) Y = Y.reshape((1 , 1 , 6 , 7 )) for i in range (10 ): Y_hat = conv2d(X) l = (Y_hat-Y)**2 conv2d.zero_grad() l.sum ().backward() conv2d.weight.data[:] -= 3e-2 *conv2d.weight.grad if (i+1 ) % 2 == 0 : print (f'batch{i+1 } ,loss{l.sum ():3f} ' )
batch2,loss4.527621
batch4,loss1.422135
batch6,loss0.509966
batch8,loss0.196711
batch10,loss0.078531
看一下我们学的卷积核的张量权重
1 2 conv2d.weight.data.reshape((1 , 2 ))
tensor([[ 0.9644, -1.0217]])
可以看到和[[1,-1]]很接近了
填充
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 import torchfrom torch import nndef comp_conv2d (conv2d, X ): X = X.reshape((1 , 1 )+X.shape) Y = conv2d(X) return Y.reshape(Y.shape[2 :]) conv2d = nn.Conv2d(1 , 1 , kernel_size=3 , padding=1 ) X = torch.rand(size=(8 , 8 )) comp_conv2d(conv2d, X).shape
torch.Size([8, 8])
并不是那么对称的情况
1 2 3 conv2d = nn.Conv2d(1 , 1 , kernel_size=(5 , 3 ), padding=(2 , 1 )) comp_conv2d(conv2d, X).shape
torch.Size([8, 8])
步幅
1 2 conv2d = nn.Conv2d(1 , 1 , kernel_size=3 , padding=1 , stride=2 ) comp_conv2d(conv2d,X).shape
torch.Size([4, 4])
1 2 3 4 conv2d = nn.Conv2d(1 , 1 , kernel_size=(3 , 5 )) comp_conv2d(conv2d, X).shape
torch.Size([6, 4])
通道 彩色图片RGB格式是三个通道
灰度图片是单通道
下面开始从零开始实现
1 2 3 4 5 6 7 8 import torchfrom d2l import torch as d2ldef corr2d_multi_in (X, K ): return sum (d2l.corr2d(x, k) for x, k in zip (X, K))
1 2 3 4 5 6 7 X = torch.tensor([[[0.0 , 1.0 , 2.0 ], [3.0 , 4.0 , 5.0 ], [6.0 , 7.0 , 8.0 ]], [[1.0 , 2.0 , 3.0 ], [4.0 , 5.0 , 6.0 ], [7.0 , 8.0 , 9.0 ]]]) K = torch.tensor([[[0.0 , 1.0 ], [2.0 , 3.0 ]], [[1.0 , 2.0 ], [3.0 , 4.0 ]]]) corr2d_multi_in(X, K)
tensor([[ 56., 72.],
[104., 120.]])
1 2 3 4 5 6 7 8 9 10 11 12 13 from inspect import stackdef corr2d_multi_in_out (X, K ): return torch.stack([corr2d_multi_in(X, k) for k in K], 0 ) K = torch.stack((K, K + 1 , K + 2 ), 0 ) K.shape
torch.Size([3, 2, 2, 2])
1 corr2d_multi_in_out(X, K)
tensor([[[ 56., 72.],
[104., 120.]],
[[ 76., 100.],
[148., 172.]],
[[ 96., 128.],
[192., 224.]]])
1×1的卷积
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 def corr2d_multi_in_out_1x1 (X, K ): c_i, h, w = X.shape c_o = K.shape[0 ] X = X.reshape((c_i, h*w)) K = K.reshape((c_o, c_i)) Y = torch.matmul(K, X) return Y.reshape(c_o, h, w) X = torch.normal(0 , 1 , (3 , 3 , 3 )) K = torch.normal(0 , 1 , (2 , 3 , 1 , 1 )) Y1 = corr2d_multi_in_out_1x1(X, K) Y2 = corr2d_multi_in_out(X, K) assert float (torch.abs (Y1-Y2).sum ()) < 1e-6
池化 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 import torchfrom torch import nnfrom d2l import torch as d2ldef pool2d (X, pool_size, mode='max' ): p_h, p_w = pool_size Y = torch.zeros((X.shape[0 ]-p_h+1 , X.shape[1 ]-p_w+1 )) for i in range (Y.shape[0 ]): for j in range (Y.shape[1 ]): if mode == 'max' : Y[i, j] = X[i:i+p_h, j:j+p_w].max () elif mode == 'avg' : Y[i, j] = X[i:i+p_h, j:j+p_w].mean() return Y
验证值
1 2 X = torch.tensor([[0.0 , 1.0 , 2.0 ], [3.0 , 4.0 , 5.0 ], [6.0 , 7.0 , 8.0 ]]) pool2d(X, (2 , 2 ),mode='max' )
tensor([[4., 5.],
[7., 8.]])
1 pool2d(X, (2 , 2 ), mode='avg' )
tensor([[2., 3.],
[5., 6.]])
tensor([[0., 1., 2.],
[3., 4., 5.],
[6., 7., 8.]])
tensor([[[[0., 1., 2.],
[3., 4., 5.],
[6., 7., 8.]]]])
1 2 3 X = torch.arange(16 , dtype=torch.float32).reshape((1 , 1 , 4 , 4 )) X
tensor([[[[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]]]])
使用框架
深度学习框架的步幅和池化窗口的大小相同
1 2 3 4 pool2d = nn.MaxPool2d(3 ) pool2d(X)
tensor([[[[10.]]]])
1 2 3 4 pool2d = nn.MaxPool2d((2 , 3 ), padding=(1 , 1 ), stride=(2 , 3 )) pool2d(X)
tensor([[[[ 1., 3.],
[ 9., 11.],
[13., 15.]]]])
1 2 3 4 X = torch.cat((X, X+1 ), 1 ) X
tensor([[[[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]],
[[ 1., 2., 3., 4.],
[ 5., 6., 7., 8.],
[ 9., 10., 11., 12.],
[13., 14., 15., 16.]]]])
1 2 pool2d == nn.MaxPool2d(3 , padding=1 , stride=2 ) pool2d(X)
tensor([[[[ 1., 3.],
[ 9., 11.],
[13., 15.]],
[[ 2., 4.],
[10., 12.],
[14., 16.]]]])