import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dataset.mnist import load_mnist
# sigmoid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
s = sigmoid(x)
return s * (1 - s)
# relu
def relu(x):
return np.maximum(0, x)
def relu_grad(x):
grad = np.zeros(x)
grad[x >= 0] = 1
return grad
# softmax
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x)
return np.exp(x) / np.sum(np.exp(x))
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def accuracy(y, t):
pred = np.argmax(y, axis=1)
true = np.argmax(t, axis=1)
return np.mean(pred == true)
class BatchNorm:
def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
self.gamma = gamma
self.beta = beta
self.momentum = momentum
self.input_shape = None
self.running_mean = running_mean
self.running_var = running_var
self.batch_size = None
self.xc = None
self.std = None
self.dgamma = None
self.dbeta = None
def forward(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def __forward(self, x, train_flg):
if self.running_mean is None:
N, D = x.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if train_flg:
mu = x.mean(axis=0)
xc = x - mu
var = np.mean(xc**2, axis=0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.batch_size = x.shape[0]
self.xc = xc
self.xn = xn
self.std = std
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
else:
xc = x - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = self.gamma * xn + self.beta
return out
def backward(self, dout):
if dout.ndim != 2:
N, C, H, W = dout.shape
dout = dout.reshape(N, -1)
dx = self.__backward(dout)
dx = dx.reshape(*self.input_shape)
return dx
def __backward(self, dout):
dbeta = dout.sum(axis=0)
dgamma = np.sum(self.xn * dout, axis=0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis=0)
dx = dxc - dmu / self.batch_size
self.dgamma = dgamma
self.dbeta = dbeta
return dx
# Dropout层
class Dropout:
def __init__(self, dropout_ratio=0.08):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
# 五层网络实现
class FiveLayerNet:
def __init__(self, input_size=784, hidden_sizes=[100, 100, 50, 50], output_size=10,
weight_init_std='he', use_batchnorm=False, use_dropout=False, dropout_ratio=0.08,
weight_decay_lambda=0.0):
self.use_batchnorm = use_batchnorm
self.use_dropout = use_dropout
self.weight_decay_lambda = weight_decay_lambda
# 初始化权重和偏置
self.params = {}
all_sizes = [input_size] + hidden_sizes + [output_size]
# 改进权重初始化方法
for i in range(len(all_sizes)-1):
if weight_init_std == 'he':
# ReLU激活函数
scale = np.sqrt(2.0 / all_sizes[i])
self.params['W'+str(i+1)] = scale * np.random.randn(all_sizes[i], all_sizes[i+1])
elif weight_init_std == 'xavier':
# sigmoid等激活函数
scale = np.sqrt(1.0 / all_sizes[i])
self.params['W'+str(i+1)] = scale * np.random.randn(all_sizes[i], all_sizes[i+1])
else:
self.params['W'+str(i+1)] = weight_init_std * np.random.randn(all_sizes[i], all_sizes[i+1])
self.params['b'+str(i+1)] = np.zeros(all_sizes[i+1])
# 初始化BatchNorm层
if self.use_batchnorm:
self.bn_layers = {}
for i in range(len(hidden_sizes)):
gamma = np.ones(hidden_sizes[i])
beta = np.zeros(hidden_sizes[i])
self.bn_layers['bn' + str(i+1)] = BatchNorm(gamma, beta)
# 初始化Dropout层
if self.use_dropout:
self.dropout_layers = {}
for i in range(len(hidden_sizes)):
self.dropout_layers['dropout' + str(i+1)] = Dropout(dropout_ratio)
def forward(self, x, train_flg=True):
self.x = x
self.layers = {}
self.activations = {}
a = x
L = len(self.params) // 2
for i in range(1, L):
W = self.params['W'+str(i)]
b = self.params['b'+str(i)]
a = np.dot(a, W) + b
if self.use_batchnorm:
a = self.bn_layers['bn'+str(i)].forward(a, train_flg)
a = relu(a)
if self.use_dropout:
a = self.dropout_layers['dropout'+str(i)].forward(a, train_flg)
self.activations['layer' + str(i)] = a
# 最后一层输出
W = self.params['W'+str(L)]
b = self.params['b'+str(L)]
score = np.dot(a, W) + b
y = softmax(score)
self.score = score
self.y = y
return y
def loss(self, x, t):
y = self.forward(x, train_flg=True)
weight_decay = 0
for i in range(len(self.params)//2):
W = self.params['W'+str(i+1)]
weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
return cross_entropy_error(y, t) + weight_decay
def accuracy(self, x, t):
y = self.forward(x, train_flg=False)
return accuracy(y, t)
def gradient(self, x, t):
# 前向传播
self.loss(x, t)
batch_num = x.shape[0]
grads = {}
# 初始化梯度
dout = self.y.copy()
dout[np.arange(batch_num), np.argmax(t, axis=1)] -= 1
dout = dout / batch_num
L = len(self.params)//2
# 反向传播最后一层
grads['W'+str(L)] = np.dot(self.activations['layer'+str(L-1)].T, dout)
grads['b'+str(L)] = np.sum(dout, axis=0)
if self.weight_decay_lambda > 0:
grads['W'+str(L)] += self.weight_decay_lambda * self.params['W'+str(L)]
dout = np.dot(dout, self.params['W'+str(L)].T)
# 反向传播隐藏层
for i in reversed(range(1, L)):
# dropout反向
if self.use_dropout:
dout = self.dropout_layers['dropout'+str(i)].backward(dout)
dout = dout * (self.activations['layer'+str(i)] > 0)
if self.use_batchnorm and i > 1:
dout = self.bn_layers['bn'+str(i)].backward(dout)
if i == 1:
a_prev = self.x
else:
a_prev = self.activations['layer'+str(i-1)]
grads['W'+str(i)] = np.dot(a_prev.T, dout)
grads['b'+str(i)] = np.sum(dout, axis=0)
if self.weight_decay_lambda > 0:
grads['W'+str(i)] += self.weight_decay_lambda * self.params['W'+str(i)]
dout = np.dot(dout, self.params['W'+str(i)].T)
return grads
# 优化器
class SGD:
"""随机梯度下降法(Stochastic Gradient Descent)"""
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for key in params.keys():
params[key] -= self.lr * grads[key]
# 读入数据
(x_train_full, t_train_full), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)
# 取一部分数据进行训练和验证
train_size = 500
test_size = 125
x_train = x_train_full[:train_size]
t_train = t_train_full[:train_size]
x_test = x_test[:test_size]
t_test = t_test[:test_size]
print(f"训练集: {x_train.shape[0]},测试集: {x_test.shape[0]}")
# 超参数
iters_num = 400
batch_size = 25
learning_rate = 0.01
train_size = x_train.shape[0]
iter_per_epoch = max(train_size // batch_size, 1)
epochs = iters_num // iter_per_epoch
训练集: 500,测试集: 125
# 实验1: 批量标准化对比
# 不使用批量标准化的网络
network1 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=False
)
# 使用批量标准化的网络
network2 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=True
)
optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)
# 训练记录
train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 无批量标准化
grad1 = network1.gradient(x_batch, t_batch)
optimizer1.update(network1.params, grad1)
loss1 = network1.loss(x_batch, t_batch)
train_loss1.append(loss1)
# 有批量标准化
grad2 = network2.gradient(x_batch, t_batch)
optimizer2.update(network2.params, grad2)
loss2 = network2.loss(x_batch, t_batch)
train_loss2.append(loss2)
if i % iter_per_epoch == 0:
train_acc1.append(network1.accuracy(x_train, t_train))
test_acc1.append(network1.accuracy(x_test, t_test))
train_acc2.append(network2.accuracy(x_train, t_train))
test_acc2.append(network2.accuracy(x_test, t_test))
print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
print(f"Without BatchNorm - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
print(f"With BatchNorm - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")
Epoch 1/20
Without BatchNorm - Loss: 2.3055, Train Acc: 0.0940, Test Acc: 0.0800
With BatchNorm - Loss: 2.5311, Train Acc: 0.0440, Test Acc: 0.0240
Epoch 2/20
Without BatchNorm - Loss: 2.2174, Train Acc: 0.2040, Test Acc: 0.2160
With BatchNorm - Loss: 1.9168, Train Acc: 0.2520, Test Acc: 0.1200
Epoch 3/20
Without BatchNorm - Loss: 1.7899, Train Acc: 0.4020, Test Acc: 0.3440
With BatchNorm - Loss: 1.6802, Train Acc: 0.4760, Test Acc: 0.2880
Epoch 4/20
Without BatchNorm - Loss: 1.7844, Train Acc: 0.5360, Test Acc: 0.4320
With BatchNorm - Loss: 1.4192, Train Acc: 0.5800, Test Acc: 0.3600
Epoch 5/20
Without BatchNorm - Loss: 1.2317, Train Acc: 0.6180, Test Acc: 0.5120
With BatchNorm - Loss: 1.1777, Train Acc: 0.6280, Test Acc: 0.4080
Epoch 6/20
Without BatchNorm - Loss: 1.3574, Train Acc: 0.6980, Test Acc: 0.6160
With BatchNorm - Loss: 1.1342, Train Acc: 0.7180, Test Acc: 0.4640
Epoch 7/20
Without BatchNorm - Loss: 1.1397, Train Acc: 0.7360, Test Acc: 0.6560
With BatchNorm - Loss: 1.0016, Train Acc: 0.7480, Test Acc: 0.5360
Epoch 8/20
Without BatchNorm - Loss: 0.9572, Train Acc: 0.7840, Test Acc: 0.6960
With BatchNorm - Loss: 0.9753, Train Acc: 0.7940, Test Acc: 0.5280
Epoch 9/20
Without BatchNorm - Loss: 0.9413, Train Acc: 0.8160, Test Acc: 0.7040
With BatchNorm - Loss: 1.1090, Train Acc: 0.8260, Test Acc: 0.5760
Epoch 10/20
Without BatchNorm - Loss: 0.7149, Train Acc: 0.8400, Test Acc: 0.7280
With BatchNorm - Loss: 0.7278, Train Acc: 0.8440, Test Acc: 0.5840
Epoch 11/20
Without BatchNorm - Loss: 0.5245, Train Acc: 0.8500, Test Acc: 0.7280
With BatchNorm - Loss: 0.7085, Train Acc: 0.8520, Test Acc: 0.5440
Epoch 12/20
Without BatchNorm - Loss: 0.7069, Train Acc: 0.8700, Test Acc: 0.7280
With BatchNorm - Loss: 0.9384, Train Acc: 0.8840, Test Acc: 0.6080
Epoch 13/20
Without BatchNorm - Loss: 0.5311, Train Acc: 0.8880, Test Acc: 0.7360
With BatchNorm - Loss: 0.8163, Train Acc: 0.9160, Test Acc: 0.6480
Epoch 14/20
Without BatchNorm - Loss: 0.3427, Train Acc: 0.8960, Test Acc: 0.7920
With BatchNorm - Loss: 0.7136, Train Acc: 0.9000, Test Acc: 0.6400
Epoch 15/20
Without BatchNorm - Loss: 0.4568, Train Acc: 0.9240, Test Acc: 0.7840
With BatchNorm - Loss: 0.6349, Train Acc: 0.9280, Test Acc: 0.7040
Epoch 16/20
Without BatchNorm - Loss: 0.3327, Train Acc: 0.9240, Test Acc: 0.7840
With BatchNorm - Loss: 0.5653, Train Acc: 0.9340, Test Acc: 0.6720
Epoch 17/20
Without BatchNorm - Loss: 0.2246, Train Acc: 0.9280, Test Acc: 0.8160
With BatchNorm - Loss: 0.6319, Train Acc: 0.9360, Test Acc: 0.6960
Epoch 18/20
Without BatchNorm - Loss: 0.1577, Train Acc: 0.9340, Test Acc: 0.8320
With BatchNorm - Loss: 0.4392, Train Acc: 0.9340, Test Acc: 0.7120
Epoch 19/20
Without BatchNorm - Loss: 0.2883, Train Acc: 0.9380, Test Acc: 0.8240
With BatchNorm - Loss: 0.5763, Train Acc: 0.9540, Test Acc: 0.7280
Epoch 20/20
Without BatchNorm - Loss: 0.2185, Train Acc: 0.9480, Test Acc: 0.8320
With BatchNorm - Loss: 0.4844, Train Acc: 0.9540, Test Acc: 0.6880
plt.figure(figsize=(16, 6))
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='Without BatchNorm')
plt.plot(np.arange(len(train_loss2)), train_loss2, label='With BatchNorm')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()
plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='Without BatchNorm (train)')
plt.plot(x, test_acc1, label='Without BatchNorm (test)', linestyle='--')
plt.plot(x, train_acc2, label='With BatchNorm (train)')
plt.plot(x, test_acc2, label='With BatchNorm (test)', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Comparison of training and testing accuracy")
plt.legend()
plt.tight_layout()
plt.show()
# 实验2: 权重衰减对比
network1 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=True,
weight_decay_lambda=0
)
network2 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=True,
weight_decay_lambda=0.001
)
optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)
train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# No weight decay
grad1 = network1.gradient(x_batch, t_batch)
optimizer1.update(network1.params, grad1)
loss1 = network1.loss(x_batch, t_batch)
train_loss1.append(loss1)
# With weight decay
grad2 = network2.gradient(x_batch, t_batch)
optimizer2.update(network2.params, grad2)
loss2 = network2.loss(x_batch, t_batch)
train_loss2.append(loss2)
if i % iter_per_epoch == 0:
train_acc1.append(network1.accuracy(x_train, t_train))
test_acc1.append(network1.accuracy(x_test, t_test))
train_acc2.append(network2.accuracy(x_train, t_train))
test_acc2.append(network2.accuracy(x_test, t_test))
print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
print(f"No weight decay - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
print(f"With weight decay- Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")
Epoch 1/20
No weight decay - Loss: 2.3354, Train Acc: 0.1680, Test Acc: 0.1600
With weight decay- Loss: 2.7179, Train Acc: 0.1100, Test Acc: 0.1520
Epoch 2/20
No weight decay - Loss: 1.9522, Train Acc: 0.2880, Test Acc: 0.1840
With weight decay- Loss: 2.4056, Train Acc: 0.3000, Test Acc: 0.2240
Epoch 3/20
No weight decay - Loss: 1.3362, Train Acc: 0.4400, Test Acc: 0.2880
With weight decay- Loss: 1.8403, Train Acc: 0.4840, Test Acc: 0.3360
Epoch 4/20
No weight decay - Loss: 1.3748, Train Acc: 0.5420, Test Acc: 0.3440
With weight decay- Loss: 1.7578, Train Acc: 0.5840, Test Acc: 0.3840
Epoch 5/20
No weight decay - Loss: 1.2470, Train Acc: 0.6200, Test Acc: 0.4160
With weight decay- Loss: 1.4355, Train Acc: 0.6820, Test Acc: 0.4400
Epoch 6/20
No weight decay - Loss: 1.3875, Train Acc: 0.6620, Test Acc: 0.4400
With weight decay- Loss: 1.7909, Train Acc: 0.7280, Test Acc: 0.4800
Epoch 7/20
No weight decay - Loss: 1.1725, Train Acc: 0.7260, Test Acc: 0.5040
With weight decay- Loss: 1.5532, Train Acc: 0.7860, Test Acc: 0.5200
Epoch 8/20
No weight decay - Loss: 1.1724, Train Acc: 0.7600, Test Acc: 0.5120
With weight decay- Loss: 1.5477, Train Acc: 0.8120, Test Acc: 0.5760
Epoch 9/20
No weight decay - Loss: 0.7348, Train Acc: 0.7980, Test Acc: 0.5440
With weight decay- Loss: 1.0268, Train Acc: 0.8340, Test Acc: 0.5840
Epoch 10/20
No weight decay - Loss: 0.7982, Train Acc: 0.7960, Test Acc: 0.5680
With weight decay- Loss: 1.0995, Train Acc: 0.8580, Test Acc: 0.6080
Epoch 11/20
No weight decay - Loss: 0.9589, Train Acc: 0.8500, Test Acc: 0.5600
With weight decay- Loss: 1.1479, Train Acc: 0.8760, Test Acc: 0.6240
Epoch 12/20
No weight decay - Loss: 0.7069, Train Acc: 0.8660, Test Acc: 0.5920
With weight decay- Loss: 1.0813, Train Acc: 0.8900, Test Acc: 0.6960
Epoch 13/20
No weight decay - Loss: 0.6297, Train Acc: 0.8940, Test Acc: 0.6240
With weight decay- Loss: 0.9254, Train Acc: 0.8900, Test Acc: 0.6800
Epoch 14/20
No weight decay - Loss: 0.8401, Train Acc: 0.8920, Test Acc: 0.6400
With weight decay- Loss: 1.0330, Train Acc: 0.9040, Test Acc: 0.6640
Epoch 15/20
No weight decay - Loss: 0.7174, Train Acc: 0.8980, Test Acc: 0.6240
With weight decay- Loss: 1.0208, Train Acc: 0.9160, Test Acc: 0.6960
Epoch 16/20
No weight decay - Loss: 0.6756, Train Acc: 0.9040, Test Acc: 0.6480
With weight decay- Loss: 1.1875, Train Acc: 0.9140, Test Acc: 0.7040
Epoch 17/20
No weight decay - Loss: 0.5438, Train Acc: 0.9260, Test Acc: 0.7120
With weight decay- Loss: 0.9118, Train Acc: 0.9280, Test Acc: 0.7040
Epoch 18/20
No weight decay - Loss: 0.6022, Train Acc: 0.9300, Test Acc: 0.6800
With weight decay- Loss: 0.7961, Train Acc: 0.9360, Test Acc: 0.7120
Epoch 19/20
No weight decay - Loss: 0.7079, Train Acc: 0.9320, Test Acc: 0.7120
With weight decay- Loss: 0.8546, Train Acc: 0.9260, Test Acc: 0.6720
Epoch 20/20
No weight decay - Loss: 0.6250, Train Acc: 0.9540, Test Acc: 0.7280
With weight decay- Loss: 0.9674, Train Acc: 0.9420, Test Acc: 0.7120
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='No weight decay')
plt.plot(np.arange(len(train_loss2)), train_loss2, label='With weight decay')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()
plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='No weight decay (train)')
plt.plot(x, test_acc1, label='No weight decay (test)', linestyle='--')
plt.plot(x, train_acc2, label='With weight decay(train)')
plt.plot(x, test_acc2, label='With weight decay(test)', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()
plt.tight_layout()
plt.show()
# 实验3: Dropout
# 不用Dropout
network1 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=True,
use_dropout=False
)
# 使用Dropout
network2 = FiveLayerNet(
input_size=784,
hidden_sizes=[100, 100, 50, 50],
output_size=10,
weight_init_std='he',
use_batchnorm=True,
use_dropout=True,
dropout_ratio=0.08
)
optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)
train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 无Dropout
grad1 = network1.gradient(x_batch, t_batch)
optimizer1.update(network1.params, grad1)
loss1 = network1.loss(x_batch, t_batch)
train_loss1.append(loss1)
# 有Dropout
grad2 = network2.gradient(x_batch, t_batch)
optimizer2.update(network2.params, grad2)
loss2 = network2.loss(x_batch, t_batch)
train_loss2.append(loss2)
if i % iter_per_epoch == 0:
train_acc1.append(network1.accuracy(x_train, t_train))
test_acc1.append(network1.accuracy(x_test, t_test))
train_acc2.append(network2.accuracy(x_train, t_train))
test_acc2.append(network2.accuracy(x_test, t_test))
print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
print(f"无Dropout - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
print(f"有Dropout - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")
Epoch 1/20
无Dropout - Loss: 2.8517, Train Acc: 0.1180, Test Acc: 0.0880
有Dropout - Loss: 2.2606, Train Acc: 0.1240, Test Acc: 0.1440
Epoch 2/20
无Dropout - Loss: 1.9766, Train Acc: 0.2780, Test Acc: 0.2640
有Dropout - Loss: 2.1891, Train Acc: 0.2380, Test Acc: 0.1600
Epoch 3/20
无Dropout - Loss: 1.6820, Train Acc: 0.4260, Test Acc: 0.3280
有Dropout - Loss: 2.1212, Train Acc: 0.3920, Test Acc: 0.2160
Epoch 4/20
无Dropout - Loss: 1.6266, Train Acc: 0.5420, Test Acc: 0.3440
有Dropout - Loss: 1.8294, Train Acc: 0.5400, Test Acc: 0.3200
Epoch 5/20
无Dropout - Loss: 1.4466, Train Acc: 0.6400, Test Acc: 0.4240
有Dropout - Loss: 1.5504, Train Acc: 0.5960, Test Acc: 0.3440
Epoch 6/20
无Dropout - Loss: 1.1496, Train Acc: 0.6960, Test Acc: 0.4640
有Dropout - Loss: 1.5032, Train Acc: 0.6660, Test Acc: 0.4240
Epoch 7/20
无Dropout - Loss: 1.2502, Train Acc: 0.7280, Test Acc: 0.4720
有Dropout - Loss: 1.7346, Train Acc: 0.7120, Test Acc: 0.4320
Epoch 8/20
无Dropout - Loss: 0.8974, Train Acc: 0.7760, Test Acc: 0.5200
有Dropout - Loss: 1.5057, Train Acc: 0.7420, Test Acc: 0.5040
Epoch 9/20
无Dropout - Loss: 0.9743, Train Acc: 0.8000, Test Acc: 0.5440
有Dropout - Loss: 1.2010, Train Acc: 0.7700, Test Acc: 0.5120
Epoch 10/20
无Dropout - Loss: 1.0423, Train Acc: 0.8220, Test Acc: 0.5520
有Dropout - Loss: 1.4685, Train Acc: 0.8060, Test Acc: 0.5200
Epoch 11/20
无Dropout - Loss: 0.7758, Train Acc: 0.8480, Test Acc: 0.5840
有Dropout - Loss: 1.0502, Train Acc: 0.8180, Test Acc: 0.5440
Epoch 12/20
无Dropout - Loss: 0.7487, Train Acc: 0.8660, Test Acc: 0.6000
有Dropout - Loss: 0.9353, Train Acc: 0.8280, Test Acc: 0.5920
Epoch 13/20
无Dropout - Loss: 0.9809, Train Acc: 0.8840, Test Acc: 0.6320
有Dropout - Loss: 1.3208, Train Acc: 0.8360, Test Acc: 0.5680
Epoch 14/20
无Dropout - Loss: 0.6833, Train Acc: 0.9000, Test Acc: 0.6080
有Dropout - Loss: 0.9210, Train Acc: 0.8540, Test Acc: 0.6240
Epoch 15/20
无Dropout - Loss: 0.7784, Train Acc: 0.9080, Test Acc: 0.6480
有Dropout - Loss: 1.0822, Train Acc: 0.8700, Test Acc: 0.6080
Epoch 16/20
无Dropout - Loss: 0.6455, Train Acc: 0.9120, Test Acc: 0.6800
有Dropout - Loss: 0.9547, Train Acc: 0.8900, Test Acc: 0.6240
Epoch 17/20
无Dropout - Loss: 0.6330, Train Acc: 0.9140, Test Acc: 0.6640
有Dropout - Loss: 0.8852, Train Acc: 0.8780, Test Acc: 0.6240
Epoch 18/20
无Dropout - Loss: 0.7478, Train Acc: 0.9340, Test Acc: 0.7200
有Dropout - Loss: 0.9836, Train Acc: 0.8860, Test Acc: 0.6720
Epoch 19/20
无Dropout - Loss: 0.6356, Train Acc: 0.9360, Test Acc: 0.6960
有Dropout - Loss: 1.1165, Train Acc: 0.9080, Test Acc: 0.6480
Epoch 20/20
无Dropout - Loss: 0.5600, Train Acc: 0.9400, Test Acc: 0.7360
有Dropout - Loss: 0.9190, Train Acc: 0.9180, Test Acc: 0.6480
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='无Dropout')
plt.plot(np.arange(len(train_loss2)), train_loss2, label='有Dropout')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()
plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='无Dropout(train)')
plt.plot(x, test_acc1, label='无Dropout (test)', linestyle='--')
plt.plot(x, train_acc2, label='有Dropout(train)')
plt.plot(x, test_acc2, label='有Dropout(test)', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()
plt.tight_layout()
plt.show()
# 实验4: 组合
# 组合
network2 = FiveLayerNet(
input_size=784,
hidden_sizes=[50, 50, 50, 50],
output_size=10,
weight_init_std=0.01,
use_batchnorm=True,
use_dropout=True,
dropout_ratio=0.1,
weight_decay_lambda=0.01
)
optimizer2 = SGD(lr=learning_rate)
train_loss2, train_acc2, test_acc2 = [], [], []
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 组合
grad2 = network2.gradient(x_batch, t_batch)
optimizer2.update(network2.params, grad2)
loss2 = network2.loss(x_batch, t_batch)
train_loss2.append(loss2)
if i % iter_per_epoch == 0:
train_acc2.append(network2.accuracy(x_train, t_train))
test_acc2.append(network2.accuracy(x_test, t_test))
print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
print(f"Combination Model - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")
Epoch 1/20
Combination Model - Loss: 2.3020, Train Acc: 0.1520, Test Acc: 0.0960
Epoch 2/20
Combination Model - Loss: 2.2120, Train Acc: 0.5200, Test Acc: 0.4160
Epoch 3/20
Combination Model - Loss: 2.0455, Train Acc: 0.5600, Test Acc: 0.5360
Epoch 4/20
Combination Model - Loss: 1.8359, Train Acc: 0.5960, Test Acc: 0.5840
Epoch 5/20
Combination Model - Loss: 1.7061, Train Acc: 0.6400, Test Acc: 0.6240
Epoch 6/20
Combination Model - Loss: 1.5048, Train Acc: 0.6540, Test Acc: 0.6160
Epoch 7/20
Combination Model - Loss: 1.5717, Train Acc: 0.6260, Test Acc: 0.6240
Epoch 8/20
Combination Model - Loss: 1.5191, Train Acc: 0.6800, Test Acc: 0.6560
Epoch 9/20
Combination Model - Loss: 1.2902, Train Acc: 0.7160, Test Acc: 0.6640
Epoch 10/20
Combination Model - Loss: 1.2222, Train Acc: 0.7140, Test Acc: 0.6480
Epoch 11/20
Combination Model - Loss: 1.0935, Train Acc: 0.7440, Test Acc: 0.7040
Epoch 12/20
Combination Model - Loss: 0.9825, Train Acc: 0.7780, Test Acc: 0.7120
Epoch 13/20
Combination Model - Loss: 0.9710, Train Acc: 0.8080, Test Acc: 0.7600
Epoch 14/20
Combination Model - Loss: 1.1035, Train Acc: 0.8460, Test Acc: 0.7440
Epoch 15/20
Combination Model - Loss: 1.0875, Train Acc: 0.9020, Test Acc: 0.7520
Epoch 16/20
Combination Model - Loss: 0.9283, Train Acc: 0.8960, Test Acc: 0.7760
Epoch 17/20
Combination Model - Loss: 0.7865, Train Acc: 0.9160, Test Acc: 0.7840
Epoch 18/20
Combination Model - Loss: 0.7991, Train Acc: 0.9300, Test Acc: 0.8080
Epoch 19/20
Combination Model - Loss: 0.6010, Train Acc: 0.9300, Test Acc: 0.8240
Epoch 20/20
Combination Model - Loss: 0.6647, Train Acc: 0.9460, Test Acc: 0.8320
### 保证所用代码均来自课件查找frx:230121420105 防伪代码
plt.figure(figsize=(15, 6))
### 保证所用代码均来自课件查找frx:230121420105 防伪代码
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss2)), train_loss2, label='Combination Model')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()
### 保证所用代码均来自课件查找frx:230121420105 防伪代码
plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc2, label='Combination Model (train)')
plt.plot(x, test_acc2, label='Combination Model (test)', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()
plt.tight_layout()
plt.show()
### 保证所用代码均来自课件查找frx:230121420105 防伪代码