X-Git-Url: https://piware.de/gitweb/?p=handwriting-recognition.git;a=blobdiff_plain;f=nnet.py;fp=nnet.py;h=72d67836c4db59e99fbc7ed63015bfb5898fa042;hp=0000000000000000000000000000000000000000;hb=1de3cdb5ecba32a8a3b0a02bbf71e883383a689d;hpb=8dcd00e9f8bbfc569c9b29ac06d748320d8bf737 diff --git a/nnet.py b/nnet.py new file mode 100644 index 0000000..72d6783 --- /dev/null +++ b/nnet.py @@ -0,0 +1,126 @@ +# based on https://www.kdnuggets.com/2019/08/numpy-neural-networks-computational-graphs.html +import numpy as np + +# use a constant seed to keep things reproducible +rg = np.random.default_rng(1) + + +class LinearLayer: + ''' + ini_type: initialization type for weight parameters: plain, xavier, or he + ''' + def __init__(self, input_shape, n_out, ini_type="plain"): + self.m = input_shape[1] # number of examples in training data + + # initialize weights + n_in = input_shape[0] + if ini_type == 'plain': + self.W = rg.standard_normal(size=(n_out, n_in)) * 0.01 # set weights 'W' to small random gaussian + elif ini_type == 'xavier': + self.W = rg.standard_normal(size=(n_out, n_in)) / (np.sqrt(n_in)) # set variance of W to 1/n + elif ini_type == 'he': + # Good when ReLU used in hidden layers + # Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification + # Kaiming He et al. (https://arxiv.org/abs/1502.01852) + # http: // cs231n.github.io / neural - networks - 2 / # init + self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in) # set variance of W to 2/n + + self.b = np.zeros((n_out, 1)) + self.Z = np.zeros((self.W.shape[0], input_shape[1])) + + def forward(self, A_prev): + self.A_prev = A_prev + self.Z = self.W @ self.A_prev + self.b + return self.Z + + def backward(self, upstream_grad): + # derivative of Cost w.r.t W + self.dW = upstream_grad @ self.A_prev.T + # derivative of Cost w.r.t b, sum across rows + self.db = np.sum(upstream_grad, axis=1, keepdims=True) + # derivative of Cost w.r.t A_prev + self.dA_prev = self.W.T @ upstream_grad + return self.dA_prev + + def update_params(self, learning_rate=0.1): + self.W -= learning_rate * self.dW + self.b -= learning_rate * self.db + + +class SigmoidLayer: + def __init__(self, shape): + self.A = np.zeros(shape) + + def forward(self, Z): + self.A = 1 / (1 + np.exp(-Z)) # compute activations + return self.A + + def backward(self, upstream_grad): + # couple upstream gradient with local gradient, the result will be sent back to the Linear layer + self.dZ = upstream_grad * self.A * (1 - self.A) + return self.dZ + + def update_params(self, learning_rate=0.1): + pass + + +def label_vectors(labels, n): + y = np.zeros((n, labels.size)) + for i, l in enumerate(labels): + y[l][i] = 1.0 + return y + + +def forward(layers, X): + assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}' + cur = X + for layer in layers: + cur = layer.forward(cur) + return cur + + +def classify(y): + # the recognized digit is the index of the highest-valued output neuron + return np.argmax(y, axis=0), np.max(y, axis=0) + + +def accuracy(layers, X, labels): + '''Count percentage of test inputs which are being recognized correctly''' + + assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}' + assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}' + output = forward(layers, X) + classes = classify(output)[0] + return 100 * (np.sum(classes == labels) / classes.size) + + +def cost_sqe(Y, output): + ''' + This function computes and returns the Cost and its derivative. + The is function uses the Squared Error Cost function -> (1/2m)*sum(Y - output)^2 + Args: + Y: label vectors of data + output: Predictions(activations) from a last layer, the output layer + Returns: + cost: The Squared Error Cost result + dOutput: gradient of Cost w.r.t the output + ''' + m = Y.shape[1] + + cost = (1 / (2 * m)) * np.sum(np.square(Y - output)) + cost = np.squeeze(cost) # remove extraneous dimensions to give just a scalar + + dOutput = -1 / m * (Y - output) # derivative of the squared error cost function + return cost, dOutput + + +def train(layers, X, Y, learning_rate=0.1, cost_fn=cost_sqe): + output = forward(layers, X) + cost, dOutput = cost_fn(Y, output) + + cur = dOutput + for layer in reversed(layers): + cur = layer.backward(cur) + layer.update_params(learning_rate) + + return cost