From 579223dbae47c81cd315f5b575bfd9f6647890f5 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sat, 29 Aug 2020 13:31:25 +0200 Subject: [PATCH] Add backpropagation and first round of learning --- README.md | 18 ++++++++++++++++++ train.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/README.md b/README.md index 96974c9..760002f 100644 --- a/README.md +++ b/README.md @@ -49,3 +49,21 @@ output vector of first image: [ 0. 52766.88424917 0. classification of first image: 1 with confidence 52766.88424917019; real label 5 correctly recognized images after initialization: 10.076666666666668% ``` + + - Add backpropagation algorithm and run a first training round. This is slow, as expected: + ``` + $ time ./train.py +output vector of first image: [ 0. 52766.88424917 0. 0. + 14840.28619491 14164.62850135 0. 7011.882333 + 0. 46979.62976127] +classification of first image: 1 with confidence 52766.88424917019; real label 5 +correctly recognized images after initialization: 10.076666666666668% +round #0 of learning... +./train.py:18: RuntimeWarning: overflow encountered in exp + return 1 / (1 + np.exp(-x)) +correctly recognized images: 14.211666666666666% + +real 0m37.927s +user 1m19.103s +sys 1m10.169s +``` diff --git a/train.py b/train.py index 366a5a0..fc9bb4b 100755 --- a/train.py +++ b/train.py @@ -63,6 +63,45 @@ def classify(y): return np.argmax(y), np.max(y) +def cost_grad(x, target_y, transfer=sigmoid, transfer_prime=sigmoid_prime): + '''Return (∂C/∂w, ∂C/∂b) for a particular input and desired output vector''' + + # forward pass, remember all z vectors and activations for every layer + z_s, a_s = feed_forward(x, transfer) + + # backward pass + deltas = [None] * len(weights) # delta = dC/dz error for each layer + # insert the last layer error + deltas[-1] = transfer_prime(z_s[-1]) * 2 * (a_s[-1] - target_y) + for i in reversed(range(len(deltas) - 1)): + deltas[i] = (weights[i + 1].T @ deltas[i + 1]) * transfer_prime(z_s[i]) + + dw = [d @ a_s[i+1] for i, d in enumerate(deltas)] + db = deltas + return dw, db + + +def label_vector(label): + x = np.zeros(10) + x[label] = 1.0 + return x + + +def backpropagate(image_batch, label_batch, eta): + '''Update NN with gradient descent and backpropagation to a batch of inputs + + eta is the learning rate. + ''' + global weights, biases + + num_images = image_batch.shape[1] + for i in range(num_images): + y = label_vector(label_batch[i]) + dws, dbs = cost_grad(image_batch[:, i], y) + weights = [w + eta * dw for w, dw in zip(weights, dws)] + biases = [b + eta * db for b, db in zip(biases, dbs)] + + def test(): """Count percentage of test inputs which are being recognized correctly""" @@ -80,3 +119,9 @@ print(f'output vector of first image: {res[1][-1]}') digit, conf = classify(res[1][-1]) print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') print(f'correctly recognized images after initialization: {test()}%') + +for i in range(1): + print(f"round #{i} of learning...") + backpropagate(test_images, test_labels, 1) + +print(f'correctly recognized images: {test()}%') -- 2.39.2