From: Martin Pitt <martin@piware.de>
Date: Sun, 30 Aug 2020 08:03:25 +0000 (+0200)
Subject: Simplify code
X-Git-Url: https://piware.de/gitweb/?p=handwriting-recognition.git;a=commitdiff_plain;h=0b40285a04bfbf2d73f7a7154eacb4613f08b350;hp=72f90468f63b736bca28d0fc5ebf3f7d1989de4f

Simplify code

Eliminate unnecessary persistent variables and initializations.
---

diff --git a/nnet.py b/nnet.py
index 72d6783..ddf8ba6 100644
--- a/nnet.py
+++ b/nnet.py
@@ -10,8 +10,6 @@ class LinearLayer:
     ini_type: initialization type for weight parameters: plain, xavier, or he
     '''
     def __init__(self, input_shape, n_out, ini_type="plain"):
-        self.m = input_shape[1]  # number of examples in training data
-
         # initialize weights
         n_in = input_shape[0]
         if ini_type == 'plain':
@@ -26,42 +24,39 @@ class LinearLayer:
             self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in)  # set variance of W to 2/n
 
         self.b = np.zeros((n_out, 1))
-        self.Z = np.zeros((self.W.shape[0], input_shape[1]))
+        self.shape = (self.W.shape[0], input_shape[1])
 
     def forward(self, A_prev):
         self.A_prev = A_prev
-        self.Z = self.W @ self.A_prev + self.b
-        return self.Z
+        return self.W @ self.A_prev + self.b
 
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
         # derivative of Cost w.r.t W
-        self.dW = upstream_grad @ self.A_prev.T
+        dW = upstream_grad @ self.A_prev.T
         # derivative of Cost w.r.t b, sum across rows
-        self.db = np.sum(upstream_grad, axis=1, keepdims=True)
+        db = np.sum(upstream_grad, axis=1, keepdims=True)
         # derivative of Cost w.r.t A_prev
-        self.dA_prev = self.W.T @ upstream_grad
-        return self.dA_prev
+        dA_prev = self.W.T @ upstream_grad
+
+        # update parameters
+        self.W -= learning_rate * dW
+        self.b -= learning_rate * db
 
-    def update_params(self, learning_rate=0.1):
-        self.W -= learning_rate * self.dW
-        self.b -= learning_rate * self.db
+        return dA_prev
 
 
 class SigmoidLayer:
     def __init__(self, shape):
-        self.A = np.zeros(shape)
+        self.shape = shape
 
     def forward(self, Z):
+        assert Z.shape == self.shape
         self.A = 1 / (1 + np.exp(-Z))  # compute activations
         return self.A
 
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
         # couple upstream gradient with local gradient, the result will be sent back to the Linear layer
-        self.dZ = upstream_grad * self.A * (1 - self.A)
-        return self.dZ
-
-    def update_params(self, learning_rate=0.1):
-        pass
+        return upstream_grad * self.A * (1 - self.A)
 
 
 def label_vectors(labels, n):
@@ -72,7 +67,7 @@ def label_vectors(labels, n):
 
 
 def forward(layers, X):
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
     cur = X
     for layer in layers:
         cur = layer.forward(cur)
@@ -87,8 +82,8 @@ def classify(y):
 def accuracy(layers, X, labels):
     '''Count percentage of test inputs which are being recognized correctly'''
 
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
-    assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
+    assert layers[0].shape[1] == labels.size, f'first layer width {layers[0].shape[1]} does not match number of labels {labels.size}'
     output = forward(layers, X)
     classes = classify(output)[0]
     return 100 * (np.sum(classes == labels) / classes.size)
@@ -120,7 +115,6 @@ def train(layers, X, Y, learning_rate=0.1, cost_fn=cost_sqe):
 
     cur = dOutput
     for layer in reversed(layers):
-        cur = layer.backward(cur)
-        layer.update_params(learning_rate)
+        cur = layer.backward(cur, learning_rate)
 
     return cost
diff --git a/train.py b/train.py
index 5ff7666..8510624 100755
--- a/train.py
+++ b/train.py
@@ -14,9 +14,9 @@ nnet_batch = 10000
 #                   (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output)
 # handle 10,000 vectors at a time
 Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80)
-A1 = nnet.SigmoidLayer(Z1.Z.shape)
-ZO = nnet.LinearLayer(input_shape=A1.A.shape, n_out=10)
-AO = nnet.SigmoidLayer(ZO.Z.shape)
+A1 = nnet.SigmoidLayer(Z1.shape)
+ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10)
+AO = nnet.SigmoidLayer(ZO.shape)
 net = (Z1, A1, ZO, AO)
 
 res = nnet.forward(net, test_images[:, 0:10000])