1.resnet内部结构
import torch
import torchvision.models as models
from torchinfo import summary #之前的内容说了,推荐用他来可视化模型结构,信息最全
# 加载 ResNet18(预训练)
model = models.resnet18(pretrained=True)
model.eval()
# 输出模型结构和参数概要
summary(model, input_size=(1, 3, 224, 224))
2.加入cbam
import torch
import torch.nn as nn
from torchvision import models
# 自定义ResNet18模型,插入CBAM模块
class ResNet18_CBAM(nn.Module):
def __init__(self, num_classes=10, pretrained=True, cbam_ratio=16, cbam_kernel=7):
super().__init__()
# 加载预训练ResNet18
self.backbone = models.resnet18(pretrained=pretrained)
# 修改首层卷积以适应32x32输入(CIFAR10)
self.backbone.conv1 = nn.Conv2d(
in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False
)
self.backbone.maxpool = nn.Identity() # 移除原始MaxPool层(因输入尺寸小)
# 在每个残差块组后添加CBAM模块
self.cbam_layer1 = CBAM(in_channels=64, ratio=cbam_ratio, kernel_size=cbam_kernel)
self.cbam_layer2 = CBAM(in_channels=128, ratio=cbam_ratio, kernel_size=cbam_kernel)
self.cbam_layer3 = CBAM(in_channels=256, ratio=cbam_ratio, kernel_size=cbam_kernel)
self.cbam_layer4 = CBAM(in_channels=512, ratio=cbam_ratio, kernel_size=cbam_kernel)
# 修改分类头
self.backbone.fc = nn.Linear(in_features=512, out_features=num_classes)
def forward(self, x):
# 主干特征提取
x = self.backbone.conv1(x)
x = self.backbone.bn1(x)
x = self.backbone.relu(x) # [B, 64, 32, 32]
# 第一层残差块 + CBAM
x = self.backbone.layer1(x) # [B, 64, 32, 32]
x = self.cbam_layer1(x)
# 第二层残差块 + CBAM
x = self.backbone.layer2(x) # [B, 128, 16, 16]
x = self.cbam_layer2(x)
# 第三层残差块 + CBAM
x = self.backbone.layer3(x) # [B, 256, 8, 8]
x = self.cbam_layer3(x)
# 第四层残差块 + CBAM
x = self.backbone.layer4(x) # [B, 512, 4, 4]
x = self.cbam_layer4(x)
# 全局平均池化 + 分类
x = self.backbone.avgpool(x) # [B, 512, 1, 1]
x = torch.flatten(x, 1) # [B, 512]
x = self.backbone.fc(x) # [B, 10]
return x
# 初始化模型并移至设备
model = ResNet18_CBAM().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)