From 0b40285a04bfbf2d73f7a7154eacb4613f08b350 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sun, 30 Aug 2020 10:03:25 +0200 Subject: [PATCH] Simplify code Eliminate unnecessary persistent variables and initializations. --- nnet.py | 44 +++++++++++++++++++------------------------- train.py | 6 +++--- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/nnet.py b/nnet.py index 72d6783..ddf8ba6 100644 --- a/nnet.py +++ b/nnet.py @@ -10,8 +10,6 @@ class LinearLayer: ini_type: initialization type for weight parameters: plain, xavier, or he ''' def __init__(self, input_shape, n_out, ini_type="plain"): - self.m = input_shape[1] # number of examples in training data - # initialize weights n_in = input_shape[0] if ini_type == 'plain': @@ -26,42 +24,39 @@ class LinearLayer: self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in) # set variance of W to 2/n self.b = np.zeros((n_out, 1)) - self.Z = np.zeros((self.W.shape[0], input_shape[1])) + self.shape = (self.W.shape[0], input_shape[1]) def forward(self, A_prev): self.A_prev = A_prev - self.Z = self.W @ self.A_prev + self.b - return self.Z + return self.W @ self.A_prev + self.b - def backward(self, upstream_grad): + def backward(self, upstream_grad, learning_rate=0.1): # derivative of Cost w.r.t W - self.dW = upstream_grad @ self.A_prev.T + dW = upstream_grad @ self.A_prev.T # derivative of Cost w.r.t b, sum across rows - self.db = np.sum(upstream_grad, axis=1, keepdims=True) + db = np.sum(upstream_grad, axis=1, keepdims=True) # derivative of Cost w.r.t A_prev - self.dA_prev = self.W.T @ upstream_grad - return self.dA_prev + dA_prev = self.W.T @ upstream_grad + + # update parameters + self.W -= learning_rate * dW + self.b -= learning_rate * db - def update_params(self, learning_rate=0.1): - self.W -= learning_rate * self.dW - self.b -= learning_rate * self.db + return dA_prev class SigmoidLayer: def __init__(self, shape): - self.A = np.zeros(shape) + self.shape = shape def forward(self, Z): + assert Z.shape == self.shape self.A = 1 / (1 + np.exp(-Z)) # compute activations return self.A - def backward(self, upstream_grad): + def backward(self, upstream_grad, learning_rate=0.1): # couple upstream gradient with local gradient, the result will be sent back to the Linear layer - self.dZ = upstream_grad * self.A * (1 - self.A) - return self.dZ - - def update_params(self, learning_rate=0.1): - pass + return upstream_grad * self.A * (1 - self.A) def label_vectors(labels, n): @@ -72,7 +67,7 @@ def label_vectors(labels, n): def forward(layers, X): - assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}' + assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}' cur = X for layer in layers: cur = layer.forward(cur) @@ -87,8 +82,8 @@ def classify(y): def accuracy(layers, X, labels): '''Count percentage of test inputs which are being recognized correctly''' - assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}' - assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}' + assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}' + assert layers[0].shape[1] == labels.size, f'first layer width {layers[0].shape[1]} does not match number of labels {labels.size}' output = forward(layers, X) classes = classify(output)[0] return 100 * (np.sum(classes == labels) / classes.size) @@ -120,7 +115,6 @@ def train(layers, X, Y, learning_rate=0.1, cost_fn=cost_sqe): cur = dOutput for layer in reversed(layers): - cur = layer.backward(cur) - layer.update_params(learning_rate) + cur = layer.backward(cur, learning_rate) return cost diff --git a/train.py b/train.py index 5ff7666..8510624 100755 --- a/train.py +++ b/train.py @@ -14,9 +14,9 @@ nnet_batch = 10000 # (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output) # handle 10,000 vectors at a time Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80) -A1 = nnet.SigmoidLayer(Z1.Z.shape) -ZO = nnet.LinearLayer(input_shape=A1.A.shape, n_out=10) -AO = nnet.SigmoidLayer(ZO.Z.shape) +A1 = nnet.SigmoidLayer(Z1.shape) +ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10) +AO = nnet.SigmoidLayer(ZO.shape) net = (Z1, A1, ZO, AO) res = nnet.forward(net, test_images[:, 0:10000]) -- 2.39.5