]> piware.de Git - handwriting-recognition.git/blobdiff - nnet.py
Simplify code
[handwriting-recognition.git] / nnet.py
diff --git a/nnet.py b/nnet.py
index 72d67836c4db59e99fbc7ed63015bfb5898fa042..ddf8ba68b948bfda759c4d8b5696f0efd6c39559 100644 (file)
--- a/nnet.py
+++ b/nnet.py
@@ -10,8 +10,6 @@ class LinearLayer:
     ini_type: initialization type for weight parameters: plain, xavier, or he
     '''
     def __init__(self, input_shape, n_out, ini_type="plain"):
-        self.m = input_shape[1]  # number of examples in training data
-
         # initialize weights
         n_in = input_shape[0]
         if ini_type == 'plain':
@@ -26,42 +24,39 @@ class LinearLayer:
             self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in)  # set variance of W to 2/n
 
         self.b = np.zeros((n_out, 1))
-        self.Z = np.zeros((self.W.shape[0], input_shape[1]))
+        self.shape = (self.W.shape[0], input_shape[1])
 
     def forward(self, A_prev):
         self.A_prev = A_prev
-        self.Z = self.W @ self.A_prev + self.b
-        return self.Z
+        return self.W @ self.A_prev + self.b
 
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
         # derivative of Cost w.r.t W
-        self.dW = upstream_grad @ self.A_prev.T
+        dW = upstream_grad @ self.A_prev.T
         # derivative of Cost w.r.t b, sum across rows
-        self.db = np.sum(upstream_grad, axis=1, keepdims=True)
+        db = np.sum(upstream_grad, axis=1, keepdims=True)
         # derivative of Cost w.r.t A_prev
-        self.dA_prev = self.W.T @ upstream_grad
-        return self.dA_prev
+        dA_prev = self.W.T @ upstream_grad
+
+        # update parameters
+        self.W -= learning_rate * dW
+        self.b -= learning_rate * db
 
-    def update_params(self, learning_rate=0.1):
-        self.W -= learning_rate * self.dW
-        self.b -= learning_rate * self.db
+        return dA_prev
 
 
 class SigmoidLayer:
     def __init__(self, shape):
-        self.A = np.zeros(shape)
+        self.shape = shape
 
     def forward(self, Z):
+        assert Z.shape == self.shape
         self.A = 1 / (1 + np.exp(-Z))  # compute activations
         return self.A
 
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
         # couple upstream gradient with local gradient, the result will be sent back to the Linear layer
-        self.dZ = upstream_grad * self.A * (1 - self.A)
-        return self.dZ
-
-    def update_params(self, learning_rate=0.1):
-        pass
+        return upstream_grad * self.A * (1 - self.A)
 
 
 def label_vectors(labels, n):
@@ -72,7 +67,7 @@ def label_vectors(labels, n):
 
 
 def forward(layers, X):
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
     cur = X
     for layer in layers:
         cur = layer.forward(cur)
@@ -87,8 +82,8 @@ def classify(y):
 def accuracy(layers, X, labels):
     '''Count percentage of test inputs which are being recognized correctly'''
 
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
-    assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
+    assert layers[0].shape[1] == labels.size, f'first layer width {layers[0].shape[1]} does not match number of labels {labels.size}'
     output = forward(layers, X)
     classes = classify(output)[0]
     return 100 * (np.sum(classes == labels) / classes.size)
@@ -120,7 +115,6 @@ def train(layers, X, Y, learning_rate=0.1, cost_fn=cost_sqe):
 
     cur = dOutput
     for layer in reversed(layers):
-        cur = layer.backward(cur)
-        layer.update_params(learning_rate)
+        cur = layer.backward(cur, learning_rate)
 
     return cost