叠加卷积层似乎很有用！ # 从0实现

import torch
from torch import nn

def corr2d(X, K):
    # 位相关运算
    h, w = X.shape[-2], X.shape[-1]
    kh, kw = K.shape[0], K.shape[1]
    Y = torch.zeros(h-kh+1, w-kw+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i+kh, j:j+kh]*K).sum()
    
    return Y

class Conv2D(nn.Module):
    def __init__(self, kernel_size) -> None:
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

# 下面y相当于是边缘检查核，是早期的图像特征提取的方法
# 但是注意卷积神经网络的核都是训练出来的
x = torch.cat([torch.ones((6, 3)), torch.zeros((6, 3))], axis = 1)
k = torch.tensor([[-1,1], [-1,1]])

y = corr2d(x, k)


# train 得到上面的k
net = Conv2D((2,2))
lr = 0.02

for i in range(10):
    y_hat = net(x)
    l = (y-y_hat)**2
    net.zero_grad()
    l.sum().backward()
    net.weight.data[:] -= lr*net.weight.grad

    print('epoch:{}, loss:{}'.format(i, l.sum()))

train FashionMNIST

import torch
from torch import nn
import torchvision
from torchvision import transforms

# 定义net来train FashionMNIST
mnist_train = torchvision.datasets.FashionMNIST(
    root='../data', train=True, transform=transforms.ToTensor(), download=True
)
mnist_test = torchvision.datasets.FashionMNIST(
    root='../data', train=False, transform=transforms.ToTensor(), download=True
)

class CnnNet(nn.Module):
    def __init__(self, input_channels=1) -> None:
        super().__init__()
        self.is_training = True
        self.drop_layer = nn.Dropout(0.5)
        self.conn2d1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3,3), stride=1, padding=1),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        self.conn2d2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=(5,5), stride=1, padding=1),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.conn2d3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(5,5), stride=1, padding=1),
            nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.conn2d4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(2,2), stride=1, padding=1),
            # nn.MaxPool2d(kernel_size=(2,2), stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        # self.conv1 = nn.Conv2d(1, 6, kernel_size=(3,3), stride=1, padding=1)
        # self.maxpool = nn.MaxPool2d(kernel_size=(2,2), stride=2)
        # self.conv2 = nn.Conv2d(6, 16, kernel_size=(3,3), stride=1, padding=1)
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        # self.line1 = nn.Sequential(
        #     nn.Linear(784, 120),
        #     nn.ReLU(),
        #     nn.Linear(120, 84),
        #     nn.ReLU(),
        #     nn.Linear(84, 10)
        # )
        self.line1 = nn.Linear(1152, 256)
        self.line2 = nn.Linear(256, 10)
        # self.line1 = nn.Linear(1152, 240)
        # self.line2 = nn.Linear(240, 84)
        # self.line3 = nn.Linear(84, 10)

    
    def forward(self, x):
        # c1 = self.conv1(x)
        # c1 = self.relu(self.maxpool(c1))
        # c2 = self.conv2(c1)
        # c2 = self.relu(self.maxpool(c2))
        c1 = self.conn2d1(x)
        c2 = self.conn2d2(c1)
        c3 = self.conn2d3(c2)
        c4 = self.conn2d4(c3)
        o1 = self.flatten(c4)
        # return o1
        # l = self.line(o1)
        l1 = self.relu(self.line1(o1))
        if self.is_training:
            l1 = self.drop_layer(l1)
        
        # l2 = self.relu(self.line2(l1))
        # if self.is_training:
        #     l2 = self.drop_layer(l2)
        # l3 = self.line3(l2)
        o = self.line2(l1)
        return o

    def predict(self, x):
        self.is_training = False
        o = self.forward(x)
        self.is_training = True
        return o

from torch.utils import data

def ac(data_iter, net, device):
    num_acs = []
    for x, y in data_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net.predict(x)
        maxs, indexs = torch.max(y_hat, dim=1)
        num_acs.append(y.eq(indexs).sum()/indexs.shape[0])
    return sum(num_acs)/len(num_acs)

# 参数
batch_size = 256
num_epochs = 20
lr = 0.1

train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_iter = data.DataLoader(mnist_test, batch_size,shuffle=True, num_workers=4)

net = CnnNet()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net.to(device)
loss = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=lr)

# 训练
for i in range(num_epochs):
    for x,y in train_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net(x)
        l = loss(y_hat, y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    print(l)
    print(ac(test_iter, net, device))


from torchinfo import summary

net = CnnNet()
summary(net, input_size=(1,1,28,28))

# 输出net大小
==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
CnnNet                                   [1, 10]                   --
├─Sequential: 1-1                        [1, 16, 14, 14]           --
│    └─Conv2d: 2-1                       [1, 16, 28, 28]           160
│    └─MaxPool2d: 2-2                    [1, 16, 14, 14]           --
│    └─BatchNorm2d: 2-3                  [1, 16, 14, 14]           32
│    └─ReLU: 2-4                         [1, 16, 14, 14]           --
├─Sequential: 1-2                        [1, 32, 6, 6]             --
│    └─Conv2d: 2-5                       [1, 32, 12, 12]           12,832
│    └─MaxPool2d: 2-6                    [1, 32, 6, 6]             --
│    └─BatchNorm2d: 2-7                  [1, 32, 6, 6]             64
│    └─ReLU: 2-8                         [1, 32, 6, 6]             --
├─Sequential: 1-3                        [1, 64, 2, 2]             --
│    └─Conv2d: 2-9                       [1, 64, 4, 4]             51,264
│    └─MaxPool2d: 2-10                   [1, 64, 2, 2]             --
│    └─BatchNorm2d: 2-11                 [1, 64, 2, 2]             128
│    └─ReLU: 2-12                        [1, 64, 2, 2]             --
├─Sequential: 1-4                        [1, 128, 3, 3]            --
│    └─Conv2d: 2-13                      [1, 128, 3, 3]            32,896
│    └─BatchNorm2d: 2-14                 [1, 128, 3, 3]            256
│    └─ReLU: 2-15                        [1, 128, 3, 3]            --
├─Flatten: 1-5                           [1, 1152]                 --
├─Linear: 1-6                            [1, 256]                  295,168
├─ReLU: 1-7                              [1, 256]                  --
├─Dropout: 1-8                           [1, 256]                  --
├─Linear: 1-9                            [1, 10]                   2,570
==========================================================================================
Total params: 395,370
Trainable params: 395,370
Non-trainable params: 0
Total mult-adds (M): 3.39
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.20
Params size (MB): 1.58
Estimated Total Size (MB): 1.79
==========================================================================================