classification of first image: 1 with confidence 52766.88424917019; real label 5
correctly recognized images after initialization: 10.076666666666668%
```
+
+ - Add backpropagation algorithm and run a first training round. This is slow, as expected:
+ ```
+ $ time ./train.py
+output vector of first image: [ 0. 52766.88424917 0. 0.
+ 14840.28619491 14164.62850135 0. 7011.882333
+ 0. 46979.62976127]
+classification of first image: 1 with confidence 52766.88424917019; real label 5
+correctly recognized images after initialization: 10.076666666666668%
+round #0 of learning...
+./train.py:18: RuntimeWarning: overflow encountered in exp
+ return 1 / (1 + np.exp(-x))
+correctly recognized images: 14.211666666666666%
+
+real 0m37.927s
+user 1m19.103s
+sys 1m10.169s
+```
return np.argmax(y), np.max(y)
+def cost_grad(x, target_y, transfer=sigmoid, transfer_prime=sigmoid_prime):
+ '''Return (∂C/∂w, ∂C/∂b) for a particular input and desired output vector'''
+
+ # forward pass, remember all z vectors and activations for every layer
+ z_s, a_s = feed_forward(x, transfer)
+
+ # backward pass
+ deltas = [None] * len(weights) # delta = dC/dz error for each layer
+ # insert the last layer error
+ deltas[-1] = transfer_prime(z_s[-1]) * 2 * (a_s[-1] - target_y)
+ for i in reversed(range(len(deltas) - 1)):
+ deltas[i] = (weights[i + 1].T @ deltas[i + 1]) * transfer_prime(z_s[i])
+
+ dw = [d @ a_s[i+1] for i, d in enumerate(deltas)]
+ db = deltas
+ return dw, db
+
+
+def label_vector(label):
+ x = np.zeros(10)
+ x[label] = 1.0
+ return x
+
+
+def backpropagate(image_batch, label_batch, eta):
+ '''Update NN with gradient descent and backpropagation to a batch of inputs
+
+ eta is the learning rate.
+ '''
+ global weights, biases
+
+ num_images = image_batch.shape[1]
+ for i in range(num_images):
+ y = label_vector(label_batch[i])
+ dws, dbs = cost_grad(image_batch[:, i], y)
+ weights = [w + eta * dw for w, dw in zip(weights, dws)]
+ biases = [b + eta * db for b, db in zip(biases, dbs)]
+
+
def test():
"""Count percentage of test inputs which are being recognized correctly"""
digit, conf = classify(res[1][-1])
print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
print(f'correctly recognized images after initialization: {test()}%')
+
+for i in range(1):
+ print(f"round #{i} of learning...")
+ backpropagate(test_images, test_labels, 1)
+
+print(f'correctly recognized images: {test()}%')