python打卡训练营打卡记录day54-EW帮帮网

作业： 一次稍微有点学术感觉的作业：

对inception网络在cifar10上观察精度
消融实验：引入残差机制和cbam模块分别进行消融

精度测试

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time

 # 设置中文字体（解决中文显示问题）
plt.rcParams['font.sans-serif'] = ['SimHei']  # Windows系统常用黑体字体
plt.rcParams['axes.unicode_minus'] = False    # 正常显示负号

# 数据预处理
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载CIFAR-10数据集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# 基础Inception模块
class Inception(nn.Module):
    def __init__(self, in_channels):
        super(Inception, self).__init__()
        self.branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 48, kernel_size=3, padding=1),
            nn.BatchNorm2d(48),
            nn.ReLU()
        )
        
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 24, kernel_size=5, padding=2),
            nn.BatchNorm2d(24),
            nn.ReLU()
        )
        
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        branch3x3 = self.branch3x3(x)
        branch5x5 = self.branch5x5(x)
        branch_pool = self.branch_pool(x)
        return torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], dim=1)

# 基础InceptionNet
class InceptionNet(nn.Module):
    def __init__(self, num_classes=10):
        super(InceptionNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.inception2 = Inception(64)
        self.inception3 = Inception(136)
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(136, 256, kernel_size=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception2(x)
        x = self.inception3(x)
        x = self.conv4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# 训练与测试函数
def train_model(model, criterion, optimizer, scheduler, num_epochs, device):
    train_losses = []
    test_accuracies = []
    start_time = time.time()
    
    for epoch in range(num_epochs):
        # 训练阶段
        model.train()
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(trainloader)
        train_losses.append(epoch_loss)
        
        # 测试阶段
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        test_accuracies.append(accuracy)
        
        # 学习率调度
        if scheduler:
            scheduler.step()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Acc: {accuracy:.2f}%')
    
    print(f"训练完成，总耗时: {time.time() - start_time:.2f}秒")
    return train_losses, test_accuracies

# 主函数
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    model = InceptionNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    
    print("=== 基础Inception网络训练 ===")
    train_losses, test_accuracies = train_model(
        model, criterion, optimizer, scheduler, num_epochs=30, device=device
    )
    
    np.savez("inception_cifar10_results.npz", train_loss=train_losses, test_acc=test_accuracies)
    
    # 绘制曲线
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    
    plt.subplot(1, 2, 2)
    plt.plot(test_accuracies)
    plt.title('Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.tight_layout()
    plt.savefig('inception_cifar10_results.png')
    plt.show()

if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
使用设备: cuda
=== 基础Inception网络训练 ===
Epoch 1/30, Loss: 1.3211, Acc: 60.02%
Epoch 2/30, Loss: 0.9590, Acc: 64.79%
Epoch 3/30, Loss: 0.8217, Acc: 65.41%
Epoch 4/30, Loss: 0.7225, Acc: 74.71%
Epoch 5/30, Loss: 0.6500, Acc: 72.88%
Epoch 6/30, Loss: 0.5940, Acc: 75.04%
Epoch 7/30, Loss: 0.5424, Acc: 73.14%
Epoch 8/30, Loss: 0.4988, Acc: 77.44%
Epoch 9/30, Loss: 0.4590, Acc: 78.95%
Epoch 10/30, Loss: 0.4292, Acc: 78.36%
Epoch 11/30, Loss: 0.3947, Acc: 79.96%
Epoch 12/30, Loss: 0.3637, Acc: 79.75%
Epoch 13/30, Loss: 0.3385, Acc: 81.03%
Epoch 14/30, Loss: 0.3129, Acc: 81.56%
Epoch 15/30, Loss: 0.2921, Acc: 82.36%
Epoch 16/30, Loss: 0.2748, Acc: 82.65%
Epoch 17/30, Loss: 0.2577, Acc: 82.72%
Epoch 18/30, Loss: 0.2485, Acc: 83.03%
Epoch 19/30, Loss: 0.2403, Acc: 83.08%
Epoch 20/30, Loss: 0.2362, Acc: 83.29%
Epoch 21/30, Loss: 0.2349, Acc: 83.04%
Epoch 22/30, Loss: 0.2376, Acc: 83.17%
Epoch 23/30, Loss: 0.2394, Acc: 83.07%
Epoch 24/30, Loss: 0.2408, Acc: 82.98%
Epoch 25/30, Loss: 0.2477, Acc: 82.85%
Epoch 26/30, Loss: 0.2512, Acc: 82.55%
Epoch 27/30, Loss: 0.2565, Acc: 82.25%
Epoch 28/30, Loss: 0.2598, Acc: 80.66%
Epoch 29/30, Loss: 0.2669, Acc: 79.90%
Epoch 30/30, Loss: 0.2695, Acc: 79.95%
训练完成，总耗时: 583.32秒

残差消融实验

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time

# 数据预处理
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载CIFAR-10数据集
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# 基础Inception模块
class Inception(nn.Module):
    def __init__(self, in_channels):
        super(Inception, self).__init__()
        self.branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 48, kernel_size=3, padding=1),
            nn.BatchNorm2d(48),
            nn.ReLU()
        )
        
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 24, kernel_size=5, padding=2),
            nn.BatchNorm2d(24),
            nn.ReLU()
        )
        
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        branch3x3 = self.branch3x3(x)
        branch5x5 = self.branch5x5(x)
        branch_pool = self.branch_pool(x)
        return torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], dim=1)

# 带残差连接的Inception模块
class InceptionWithResidual(nn.Module):
    def __init__(self, in_channels):
        super(InceptionWithResidual, self).__init__()
        self.inception = Inception(in_channels)
        
        # 残差连接：处理通道数不匹配
        if in_channels != 136:
            self.residual = nn.Sequential(
                nn.Conv2d(in_channels, 136, kernel_size=1),
                nn.BatchNorm2d(136)
            )
        else:
            self.residual = nn.Identity()
        
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.inception(x)
        residual = self.residual(x)
        out += residual
        out = self.relu(out)
        return out

# 带残差的InceptionNet
class InceptionResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(InceptionResNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.inception2 = InceptionWithResidual(64)
        self.inception3 = InceptionWithResidual(136)
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(136, 256, kernel_size=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception2(x)
        x = self.inception3(x)
        x = self.conv4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# 训练与测试函数
def train_model(model, criterion, optimizer, scheduler, num_epochs, device):
    train_losses = []
    test_accuracies = []
    start_time = time.time()
    
    for epoch in range(num_epochs):
        # 训练阶段
        model.train()
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(trainloader)
        train_losses.append(epoch_loss)
        
        # 测试阶段
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        test_accuracies.append(accuracy)
        
        # 学习率调度
        if scheduler:
            scheduler.step()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Acc: {accuracy:.2f}%')
    
    print(f"训练完成，总耗时: {time.time() - start_time:.2f}秒")
    return train_losses, test_accuracies

# 主函数
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    model = InceptionResNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    
    print("=== 带残差连接的Inception网络训练 ===")
    train_losses, test_accuracies = train_model(
        model, criterion, optimizer, scheduler, num_epochs=30, device=device
    )
    
    np.savez("inception_residual_cifar10_results.npz", train_loss=train_losses, test_acc=test_accuracies)
    
    # 绘制曲线
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    
    plt.subplot(1, 2, 2)
    plt.plot(test_accuracies)
    plt.title('Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.tight_layout()
    plt.savefig('inception_residual_results.png')
    plt.show()

if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
使用设备: cuda
=== 带残差连接的Inception网络训练 ===
Epoch 1/30, Loss: 1.3716, Acc: 52.39%
Epoch 2/30, Loss: 0.9994, Acc: 58.58%
Epoch 3/30, Loss: 0.8643, Acc: 63.51%
Epoch 4/30, Loss: 0.7778, Acc: 63.31%
Epoch 5/30, Loss: 0.7052, Acc: 64.05%
Epoch 6/30, Loss: 0.6486, Acc: 73.64%
Epoch 7/30, Loss: 0.6004, Acc: 74.42%
Epoch 8/30, Loss: 0.5514, Acc: 71.82%
Epoch 9/30, Loss: 0.5091, Acc: 75.17%
Epoch 10/30, Loss: 0.4714, Acc: 76.08%
Epoch 11/30, Loss: 0.4362, Acc: 76.79%
Epoch 12/30, Loss: 0.4053, Acc: 78.26%
Epoch 13/30, Loss: 0.3799, Acc: 79.23%
Epoch 14/30, Loss: 0.3547, Acc: 78.54%
Epoch 15/30, Loss: 0.3287, Acc: 81.67%
Epoch 16/30, Loss: 0.3094, Acc: 82.08%
Epoch 17/30, Loss: 0.2942, Acc: 81.93%
Epoch 18/30, Loss: 0.2831, Acc: 82.40%
Epoch 19/30, Loss: 0.2742, Acc: 82.30%
Epoch 20/30, Loss: 0.2704, Acc: 82.51%
Epoch 21/30, Loss: 0.2681, Acc: 82.47%
Epoch 22/30, Loss: 0.2699, Acc: 82.60%
Epoch 23/30, Loss: 0.2730, Acc: 82.15%
Epoch 24/30, Loss: 0.2753, Acc: 82.27%
Epoch 25/30, Loss: 0.2789, Acc: 81.96%
Epoch 26/30, Loss: 0.2840, Acc: 81.58%
Epoch 27/30, Loss: 0.2880, Acc: 80.90%
Epoch 28/30, Loss: 0.2963, Acc: 81.09%
Epoch 29/30, Loss: 0.2984, Acc: 79.69%
Epoch 30/30, Loss: 0.3036, Acc: 78.55%
训练完成，总耗时: 617.45秒

CBAM消融实验

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time

# 数据预处理（与基础模型保持一致）
transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载CIFAR-10数据集（与基础模型保持一致）
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# 基础Inception模块（与基础模型保持一致）
class Inception(nn.Module):
    def __init__(self, in_channels):
        super(Inception, self).__init__()
        self.branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 48, kernel_size=3, padding=1),
            nn.BatchNorm2d(48),
            nn.ReLU()
        )
        
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 24, kernel_size=5, padding=2),
            nn.BatchNorm2d(24),
            nn.ReLU()
        )
        
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        branch3x3 = self.branch3x3(x)
        branch5x5 = self.branch5x5(x)
        branch_pool = self.branch_pool(x)
        return torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], dim=1)

# CBAM注意力模块
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction_ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_channels // reduction_ratio, in_channels, 1, bias=False)
        )
        
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        out = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(out)
        return self.sigmoid(out)

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels, reduction_ratio)
        self.spatial_att = SpatialAttention(kernel_size)

    def forward(self, x):
        x = x * self.channel_att(x)
        x = x * self.spatial_att(x)
        return x

# 带CBAM的Inception模块
class InceptionWithCBAM(nn.Module):
    def __init__(self, in_channels):
        super(InceptionWithCBAM, self).__init__()
        self.inception = Inception(in_channels)
        self.cbam = CBAM(136)  # 与Inception输出通道数一致

    def forward(self, x):
        x = self.inception(x)
        x = self.cbam(x)
        return x

# 带CBAM的InceptionNet
class InceptionCBAMNet(nn.Module):
    def __init__(self, num_classes=10):
        super(InceptionCBAMNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.inception2 = InceptionWithCBAM(64)
        self.inception3 = InceptionWithCBAM(136)
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(136, 256, kernel_size=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception2(x)
        x = self.inception3(x)
        x = self.conv4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# 训练与测试函数（与基础模型保持一致）
def train_model(model, criterion, optimizer, scheduler, num_epochs, device):
    train_losses = []
    test_accuracies = []
    start_time = time.time()
    
    for epoch in range(num_epochs):
        # 训练阶段
        model.train()
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(trainloader)
        train_losses.append(epoch_loss)
        
        # 测试阶段
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        test_accuracies.append(accuracy)
        
        # 学习率调度
        if scheduler:
            scheduler.step()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Acc: {accuracy:.2f}%')
    
    print(f"训练完成，总耗时: {time.time() - start_time:.2f}秒")
    return train_losses, test_accuracies

# 主函数
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")
    
    model = InceptionCBAMNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    
    print("=== 带CBAM注意力的Inception网络训练 ===")
    train_losses, test_accuracies = train_model(
        model, criterion, optimizer, scheduler, num_epochs=30, device=device
    )
    
    np.savez("inception_cbam_cifar10_results.npz", train_loss=train_losses, test_acc=test_accuracies)
    
    # 绘制曲线
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    
    plt.subplot(1, 2, 2)
    plt.plot(test_accuracies)
    plt.title('Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.tight_layout()
    plt.savefig('inception_cbam_results.png')
    plt.show()

if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
使用设备: cuda
=== 带CBAM注意力的Inception网络训练 ===
Epoch 1/30, Loss: 1.2840, Acc: 57.81%
Epoch 2/30, Loss: 0.9160, Acc: 66.33%
Epoch 3/30, Loss: 0.7706, Acc: 65.81%
Epoch 4/30, Loss: 0.6706, Acc: 71.49%
Epoch 5/30, Loss: 0.5867, Acc: 68.41%
Epoch 6/30, Loss: 0.5256, Acc: 75.48%
Epoch 7/30, Loss: 0.4689, Acc: 74.60%
Epoch 8/30, Loss: 0.4129, Acc: 72.80%
Epoch 9/30, Loss: 0.3707, Acc: 77.56%
Epoch 10/30, Loss: 0.3219, Acc: 77.48%
Epoch 11/30, Loss: 0.2784, Acc: 75.32%
Epoch 12/30, Loss: 0.2299, Acc: 77.47%
Epoch 13/30, Loss: 0.1893, Acc: 77.54%
Epoch 14/30, Loss: 0.1527, Acc: 77.38%
Epoch 15/30, Loss: 0.1218, Acc: 77.80%
Epoch 16/30, Loss: 0.1001, Acc: 77.66%
Epoch 17/30, Loss: 0.0827, Acc: 77.70%
Epoch 18/30, Loss: 0.0728, Acc: 77.55%
Epoch 19/30, Loss: 0.0636, Acc: 77.74%
Epoch 20/30, Loss: 0.0600, Acc: 77.76%
Epoch 21/30, Loss: 0.0595, Acc: 77.80%
Epoch 22/30, Loss: 0.0596, Acc: 77.72%
Epoch 23/30, Loss: 0.0611, Acc: 77.75%
Epoch 24/30, Loss: 0.0624, Acc: 77.77%
Epoch 25/30, Loss: 0.0651, Acc: 77.63%
Epoch 26/30, Loss: 0.0710, Acc: 76.49%
Epoch 27/30, Loss: 0.0885, Acc: 77.00%
Epoch 28/30, Loss: 0.1021, Acc: 76.08%
Epoch 29/30, Loss: 0.1225, Acc: 75.91%
Epoch 30/30, Loss: 0.1389, Acc: 76.24%
训练完成，总耗时: 681.57秒

对比分析：三种模型结果可视化

import torch
import numpy as np
import matplotlib.pyplot as plt

# 对比不同模型的结果
def compare_models():
    # 加载保存的结果（确保路径正确）
    try:
        base_results = np.load("inception_cifar10_results.npz")
        residual_results = np.load("inception_residual_cifar10_results.npz")
        cbam_results = np.load("inception_cbam_cifar10_results.npz")
    except FileNotFoundError:
        print("错误：未找到模型结果文件，请先运行基础模型、残差模型和CBAM模型的训练")
        return
    
    # 打印最终结果
    print("\n=== 模型性能对比 ===")
    print(f"基础Inception网络: 最终准确率 {base_results['test_acc'][-1]:.2f}%")
    print(f"Inception+残差网络: 最终准确率 {residual_results['test_acc'][-1]:.2f}%")
    print(f"Inception+CBAM网络: 最终准确率 {cbam_results['test_acc'][-1]:.2f}%")
    
    # 计算提升幅度
    base_acc = base_results['test_acc'][-1]
    residual_gain = residual_results['test_acc'][-1] - base_acc
    cbam_gain = cbam_results['test_acc'][-1] - base_acc
    print(f"残差机制提升: {residual_gain:.2f}%")
    print(f"CBAM注意力提升: {cbam_gain:.2f}%")
    
    # 绘制对比图
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(base_results['train_loss'], label='Base Inception')
    plt.plot(residual_results['train_loss'], label='Inception+Residual')
    plt.plot(cbam_results['train_loss'], label='Inception+CBAM')
    plt.title('训练损失对比')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.plot(base_results['test_acc'], label='Base Inception')
    plt.plot(residual_results['test_acc'], label='Inception+Residual')
    plt.plot(cbam_results['test_acc'], label='Inception+CBAM')
    plt.title('测试准确率对比')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('model_comparison.png')
    plt.show()

if __name__ == "__main__":
    compare_models()

=== 模型性能对比 ===
基础Inception网络: 最终准确率 79.95%
Inception+残差网络: 最终准确率 78.55%
Inception+CBAM网络: 最终准确率 76.24%
残差机制提升: -1.40%
CBAM注意力提升: -3.71%

总结

一、训练趋势分析

1. 训练损失（左图）

共性：三种模型的训练损失均随 epoch 增加逐步下降，说明模型都在有效学习特征，优化器和损失函数配置基本合理。
差异：
- Base Inception（蓝色）损失下降最平缓，收敛速度最慢；
- Inception+Residual（橙色）损失下降稍快，但后期趋于稳定的损失值高于基础模型；
- Inception+CBAM（绿色）损失下降最快，且最终损失最低，说明 CBAM 对特征的筛选让模型学习更高效。

2. 测试准确率（右图）

共性：前 5 个 epoch 准确率快速上升较快，模型快速学习到基础分类模式；
差异：
- Base Inception（蓝色）最终准确率最高（79.95%），但曲线波动大，泛化性一般；
- Inception+Residual（橙色）前期期待机准确率高，但后期下降明显，残差连接可能引入了冗余或过拟合；
- Inception+CBAM（绿色）准确率最低（76.24%），且波动最大，CBAM 的注意力机制可能未适配当前任务，或增加了模型复杂度导致过拟合。

二、模型改进效果分析

1. 残差机制的问题（Inception+Residual）

预期作用：残差连接本应缓解梯度消失、加深网络，但实际准确率反而下降（-1.40%），可能原因：
- 残差模块的通道匹配（1x1 卷积投影）引入额外参数，增加了过拟合风险；
- CIFAR-10 数据简单，残差带来的 “深度优势” 未体现，反而因模型变复杂导致泛化下降。

2. CBAM 注意力的问题（Inception+CBAM）

预期作用：CBAM 本应增强关键特征、抑制冗余信息，但实际准确率下降更明显（-3.71%），可能原因：
- CBAM 对通道和空间注意力的 “强制筛选”，可能让模型过度关注局部细节，丢失全局特征；
- 小数据集（CIFAR-10）下，注意力机制的 “自适应学习” 容易过拟合，不如基础模型直接。

@浙大疏锦行

python打卡训练营打卡记录day54

精度测试

残差消融实验

CBAM消融实验

对比分析：三种模型结果可视化

总结

一、训练趋势分析

1. 训练损失（左图）

2. 测试准确率（右图）

二、模型改进效果分析

1. 残差机制的问题（Inception+Residual）

2. CBAM 注意力的问题（Inception+CBAM）

网站公告

今日签到

热门文章

最新发布