class Twolayernet
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
//首先实现对输入,隐藏,输出层和权重,超参数的定义。
self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size,
hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std *np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
//np.random.randn()实现高斯分布随机数
#生成层
self.layers = OrderedDict()#实现有序排序
self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
'''class Affine:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.dW = None
self.db = None
def forward(self, x):
self.x = x
out = np.dot(x, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
return dx'''
#权重和输入之间的反向传播
self.lastLayer = SoftmaxWithLoss()
'''class SoftmaxWithLoss:
def __init__(self):
self.loss = None # 损失
self.y = None # softmax的输出
self.t = None # 监督数据(one-hot vector)
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
dx = (self.y - self.t) / batch_size
return dx '''
#SoftmaxWithlosss()实现了softmax函数的反向传播(原理暂且不讨论)
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
#搞不清楚self.layers.values()的含义,不过先将其理解为Affine类中的forward函数,用以求出预测值
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)
#此处的lastlayer.forward()求出的是损失函数loss
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
if t.ndim != 1 :
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
#t.ndim返回的是t的维度
shape[0]表示行数,shape[1]表示列数
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
#x = lambda a, b : a * b
print(x(5, 6))=30
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
#损失函数是有关权重参数的梯度!!!
此处我们可以这么理解:我们通过梯度以求损失函数达到最小的情况
这个地方我们将loss(x,t)当做一个伪函数,params[x]即为loss的变量,那么这个梯度同样指向损失函数的最小值
def gradient(self, x, t):
# forward
self.loss(x, t)(损失函数)
# backward
dout = 1
dout = self.lastLayer.backward(dout)
#lastLayer.backward(dout)实现的是softmax的反向传播
layers = list(self.layers.values())
layers.reverse()
#reverse() 一个实现反转的函数
[1,2,3,4].reverse()=[4,3,2,1]
for layer in layers
dout = layer.backward(dout)
# 通过backward()实现了对dw,db等的赋值
grads = {}
grads['W1'] = self.layers['Affine1'].dW
grads['b1'] = self.layers['Affine1'].db
grads['W2'] = self.layers['Affine2'].dW
grads['b2'] = self.layers['Affine2'].db
return grads