import numpy as np class Tensor(object): def __init__(self, data, autograd=False, creators=None, creation_op=None, id=None): self.data = np.array(data) self.autograd = autograd self.grad = None if (id is None): self.id = np.random.randint(0, 100000) else: self.id = id self.creators = creators self.creation_op = creation_op self.children = {} if (creators is not None): for c in creators: if (self.id not in c.children): c.children[self.id] = 1 else: c.children[self.id] += 1 def all_children_grads_accounted_for(self): for id, cnt in self.children.items(): if (cnt != 0): return False return True def backward(self, grad=None, grad_origin=None): if (self.autograd): if (grad is None): grad = Tensor(np.ones_like(self.data)) if (grad_origin is not None): if (self.children[grad_origin.id] == 0): raise Exception("cannot backprop more than once") else: self.children[grad_origin.id] -= 1 if (self.grad is None): self.grad = grad else: self.grad += grad # grads must not have grads of their own assert grad.autograd == False # only continue backpropping if there's something to # backprop into and if all gradients (from children) # are accounted for override waiting for children if # "backprop" was called on this variable directly if (self.creators is not None and (self.all_children_grads_accounted_for() or grad_origin is None)): if (self.creation_op == "add"): self.creators[0].backward(self.grad, self) self.creators[1].backward(self.grad, self) if (self.creation_op == "sub"): self.creators[0].backward(Tensor(self.grad.data), self) self.creators[1].backward(Tensor(self.grad.__neg__().data), self) if (self.creation_op == "mul"): new = self.grad * self.creators[1] self.creators[0].backward(new, self) new = self.grad * self.creators[0] self.creators[1].backward(new, self) if (self.creation_op == "mm"): c0 = self.creators[0] c1 = self.creators[1] new = self.grad.mm(c1.transpose()) c0.backward(new) new = self.grad.transpose().mm(c0).transpose() c1.backward(new) if (self.creation_op == "transpose"): self.creators[0].backward(self.grad.transpose()) if ("sum" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.expand(dim, self.creators[0].data.shape[dim])) if ("expand" in self.creation_op): dim = int(self.creation_op.split("_")[1]) self.creators[0].backward(self.grad.sum(dim)) if (self.creation_op == "neg"): self.creators[0].backward(self.grad.__neg__()) #加法 def __add__(self, other): if (self.autograd and other.autograd): return Tensor(self.data + other.data, autograd=True, creators=[self, other], creation_op="add") return Tensor(self.data + other.data) #取负 def __neg__(self): if (self.autograd): return Tensor(self.data * -1, autograd=True, creators=[self], creation_op="neg") return Tensor(self.data * -1) #减法 def __sub__(self, other): if (self.autograd and other.autograd): return Tensor(self.data - other.data, autograd=True, creators=[self, other], creation_op="sub") return Tensor(self.data - other.data) #乘法 def __mul__(self, other): if (self.autograd and other.autograd): return Tensor(self.data * other.data, autograd=True, creators=[self, other], creation_op="mul") return Tensor(self.data * other.data) #求和 def sum(self, dim): if (self.autograd): return Tensor(self.data.sum(dim), #即 Tensor 对象所存储的 numpy 数组数据)在指定维度 dim 上进行求和操作 autograd=True, creators=[self], creation_op="sum_" + str(dim)) return Tensor(self.data.sum(dim)) #扩展 def expand(self, dim, copies): trans_cmd = list(range(0, len(self.data.shape))) trans_cmd.insert(dim, len(self.data.shape)) new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd) if (self.autograd): return Tensor(new_data, autograd=True, creators=[self], creation_op="expand_" + str(dim)) return Tensor(new_data) #转置 def transpose(self): if (self.autograd): return Tensor(self.data.transpose(), autograd=True, creators=[self], creation_op="transpose") return Tensor(self.data.transpose()) #矩阵乘法 def mm(self, x): if (self.autograd): return Tensor(self.data.dot(x.data), autograd=True, creators=[self, x], creation_op="mm") return Tensor(self.data.dot(x.data)) def __repr__(self): return str(self.data.__repr__()) def __str__(self): return str(self.data.__str__()) class SGD(object): def __init__(self, parameters, alpha=0.1): self.parameters = parameters self.alpha = alpha def zero(self): for p in self.parameters: p.grad.data *= 0 def step(self, zero=True): for p in self.parameters: p.data -= p.grad.data * self.alpha if (zero): p.grad.data *= 0 class Layer(object): def __init__(self): self.parameters = list() def get_parameters(self): return self.parameters class Linear(Layer): def __init__(self, n_inputs, n_outputs): super().__init__() W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs)) self.weight = Tensor(W, autograd=True) self.bias = Tensor(np.zeros(n_outputs), autograd=True) self.parameters.append(self.weight) self.parameters.append(self.bias) def forward(self, input): return input.mm(self.weight) + self.bias.expand(0, len(input.data)) class Sequential(Layer): def __init__(self, layers=list()): super().__init__() self.layers = layers def add(self, layer): self.layers.append(layer) def forward(self, input): for layer in self.layers: input = layer.forward(input) return input def get_parameters(self): params = list() for l in self.layers: params += l.get_parameters() return params np.random.seed(1) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True) target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True) w = list() ''' w.append(Tensor(np.random.rand(2, 3), autograd=True)) 这行代码的主要功能是创建一个形状为 (2, 3) 的随机张量(Tensor), 并将其添加到列表 w 中。这个随机张量的数据是从均匀分布 [0, 1) 中随机采样得到的,同时开启了自动求导(autograd=True)功能, 意味着后续可以对该张量进行梯度计算,常用于深度学习模型的参数初始化。 ''' weights_0_1 = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) weights_1_2 = np.array([[0.1], [0.2], [0.3]]) w.append(Tensor(weights_0_1, autograd=True)) w.append(Tensor(weights_1_2, autograd=True)) model = Sequential([Linear(2, 3), Linear(3, 1)]) optim = SGD(parameters=model.get_parameters(), alpha=0.05) for i in range(10): # Predict pred = model.forward(data) # Compare loss = ((pred - target) * (pred - target)).sum(0) # Learn loss.backward(Tensor(np.ones_like(loss.data))) optim.step() print(loss) ''' [4.33222765] [0.06584977] [0.01869537] [0.01068846] [0.00609207] [0.00360451] [0.00210719] [0.00126275] [0.00075884] [0.00046488] '''