误差反向传播法实现的笔记(1)

发布于:2022-10-18 ⋅ 阅读:(795) ⋅ 点赞:(0)

class Twolayernet

        def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):

                //首先实现对输入,隐藏,输出层和权重,超参数的定义。

                self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size,  

 hidden_size)

                self.params['b1'] = np.zeros(hidden_size)

                self.params['W2'] = weight_init_std *np.random.randn(hidden_size, output_size)

                self.params['b2'] = np.zeros(output_size)

                //np.random.randn()实现高斯分布随机数

                #生成层

                self.layers = OrderedDict()#实现有序排序

                self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])

                self.layers['Relu1'] = Relu()

                self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

      '''class Affine:

        def __init__(self, W, b):

                self.W = W

                self.b = b

                self.x = None

                self.dW = None

                self.db = None

        def forward(self, x):

                self.x = x

                out = np.dot(x, self.W) + self.b

                return out

        def backward(self, dout):

                dx = np.dot(dout, self.W.T)

                self.dW = np.dot(self.x.T, dout)

                self.db = np.sum(dout, axis=0)

                return dx'''

        #权重和输入之间的反向传播

        self.lastLayer = SoftmaxWithLoss()

        '''class SoftmaxWithLoss:

        def __init__(self):

                self.loss = None # 损失

                self.y = None # softmax的输出

                self.t = None # 监督数据(one-hot vector)

        def forward(self, x, t):

                self.t = t

                self.y = softmax(x)

                self.loss = cross_entropy_error(self.y, self.t)

                return self.loss

        def backward(self, dout=1):

                batch_size = self.t.shape[0]

                dx = (self.y - self.t) / batch_size

                return dx '''

                #SoftmaxWithlosss()实现了softmax函数的反向传播(原理暂且不讨论)

        def predict(self, x):

                for layer in self.layers.values():

                 x = layer.forward(x)

                 return x

        #搞不清楚self.layers.values()的含义,不过先将其理解为Affine类中的forward函数,用以求出预测值

        def loss(self, x, t):

                y = self.predict(x)

                return self.lastLayer.forward(y, t)

#此处的lastlayer.forward()求出的是损失函数loss

        def accuracy(self, x, t):

                y = self.predict(x)

                y = np.argmax(y, axis=1)

                if t.ndim != 1 :

                        t = np.argmax(t, axis=1)

                accuracy = np.sum(y == t) / float(x.shape[0])

                return accuracy

#t.ndim返回的是t的维度

shape[0]表示行数,shape[1]表示列数

        def numerical_gradient(self, x, t):

                loss_W = lambda W: self.loss(x, t)

                #x = lambda a, b : a * b

                print(x(5, 6))=30

                grads = {}

                grads['W1'] = numerical_gradient(loss_W, self.params['W1'])

                grads['b1'] = numerical_gradient(loss_W, self.params['b1'])

                grads['W2'] = numerical_gradient(loss_W, self.params['W2'])

                grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

                return grads

#损失函数是有关权重参数的梯度!!!

此处我们可以这么理解:我们通过梯度以求损失函数达到最小的情况

这个地方我们将loss(x,t)当做一个伪函数,params[x]即为loss的变量,那么这个梯度同样指向损失函数的最小值

        def gradient(self, x, t):

                # forward

                self.loss(x, t)(损失函数)

                # backward

                dout = 1

                dout = self.lastLayer.backward(dout)

                #lastLayer.backward(dout)实现的是softmax的反向传播

                layers = list(self.layers.values())

                layers.reverse()

                #reverse()     一个实现反转的函数

                        [1,2,3,4].reverse()=[4,3,2,1]

                for layer in layers

                        dout = layer.backward(dout)

                # 通过backward()实现了对dw,db等的赋值

                grads = {}

                grads['W1'] = self.layers['Affine1'].dW

                grads['b1'] = self.layers['Affine1'].db

                grads['W2'] = self.layers['Affine2'].dW

                grads['b2'] = self.layers['Affine2'].db

                return grads 

本文含有隐藏内容,请 开通VIP 后查看

网站公告

今日签到

点亮在社区的每一天
去签到