Simplify code

[handwriting-recognition.git] / nnet.py
diff --git a/nnet.py b/nnet.py

index 72d67836c4db59e99fbc7ed63015bfb5898fa042..ddf8ba68b948bfda759c4d8b5696f0efd6c39559 100644 (file)
--- a/nnet.py
+++ b/nnet.py
@@ -10,8 +10,6 @@ class LinearLayer:
      ini_type: initialization type for weight parameters: plain, xavier, or he
      '''
      def __init__(self, input_shape, n_out, ini_type="plain"):
-        self.m = input_shape[1]  # number of examples in training data
-
          # initialize weights
          n_in = input_shape[0]
          if ini_type == 'plain':
@@ -26,42 +24,39 @@ class LinearLayer:
              self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in)  # set variance of W to 2/n
  
          self.b = np.zeros((n_out, 1))
-        self.Z = np.zeros((self.W.shape[0], input_shape[1]))
+        self.shape = (self.W.shape[0], input_shape[1])
  
      def forward(self, A_prev):
          self.A_prev = A_prev
-        self.Z = self.W @ self.A_prev + self.b
-        return self.Z
+        return self.W @ self.A_prev + self.b
  
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
          # derivative of Cost w.r.t W
-        self.dW = upstream_grad @ self.A_prev.T
+        dW = upstream_grad @ self.A_prev.T
          # derivative of Cost w.r.t b, sum across rows
-        self.db = np.sum(upstream_grad, axis=1, keepdims=True)
+        db = np.sum(upstream_grad, axis=1, keepdims=True)
          # derivative of Cost w.r.t A_prev
-        self.dA_prev = self.W.T @ upstream_grad
-        return self.dA_prev
+        dA_prev = self.W.T @ upstream_grad
+
+        # update parameters
+        self.W -= learning_rate * dW
+        self.b -= learning_rate * db
  
-    def update_params(self, learning_rate=0.1):
-        self.W -= learning_rate * self.dW
-        self.b -= learning_rate * self.db
+        return dA_prev
  
  
  class SigmoidLayer:
      def __init__(self, shape):
-        self.A = np.zeros(shape)
+        self.shape = shape
  
      def forward(self, Z):
+        assert Z.shape == self.shape
          self.A = 1 / (1 + np.exp(-Z))  # compute activations
          return self.A
  
-    def backward(self, upstream_grad):
+    def backward(self, upstream_grad, learning_rate=0.1):
          # couple upstream gradient with local gradient, the result will be sent back to the Linear layer
-        self.dZ = upstream_grad * self.A * (1 - self.A)
-        return self.dZ
-
-    def update_params(self, learning_rate=0.1):
-        pass
+        return upstream_grad * self.A * (1 - self.A)
  
  
  def label_vectors(labels, n):
@@ -72,7 +67,7 @@ def label_vectors(labels, n):
  
  
  def forward(layers, X):
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
      cur = X
      for layer in layers:
          cur = layer.forward(cur)
@@ -87,8 +82,8 @@ def classify(y):
  def accuracy(layers, X, labels):
      '''Count percentage of test inputs which are being recognized correctly'''
  
-    assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
-    assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}'
+    assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
+    assert layers[0].shape[1] == labels.size, f'first layer width {layers[0].shape[1]} does not match number of labels {labels.size}'
      output = forward(layers, X)
      classes = classify(output)[0]
      return 100 * (np.sum(classes == labels) / classes.size)
@@ -120,7 +115,6 @@ def train(layers, X, Y, learning_rate=0.1, cost_fn=cost_sqe):
  
      cur = dOutput
      for layer in reversed(layers):
-        cur = layer.backward(cur)
-        layer.update_params(learning_rate)
+        cur = layer.backward(cur, learning_rate)
  
      return cost