]> piware.de Git - handwriting-recognition.git/commitdiff
Initial Neural network with forward feeding
authorMartin Pitt <martin@piware.de>
Sat, 29 Aug 2020 10:48:59 +0000 (12:48 +0200)
committerMartin Pitt <martin@piware.de>
Sun, 30 Aug 2020 09:40:25 +0000 (11:40 +0200)
Two hidden layers with parametrizable size. Two possible transfer
functions, defaulting to reLU for now.

Initialize weights and biases randomly. This gives totally random
classifications of course, but at least makes sure that the data
structures and computations work.

Also already add a function to recognize the test images and count
correct ones. Without trainingh, 10% of the samples are expected to be
right by pure chance.

README.md
train.py [new file with mode: 0755]

index fa1c48496f16f54aaba8b1d40f637dc5db10e0b4..96974c990119387ee722e8f5d809536d0885233c 100644 (file)
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@ Basics:
  - [MNIST database of handwritten digits](http://yann.lecun.com/exdb/mnist/)
  - [Neuron](https://en.wikipedia.org/wiki/Artificial_neuron)
  - [Perceptron](https://en.wikipedia.org/wiki/Perceptron)
+ - [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation)
  - [3Blue1Brown video series](https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi)
 
 Too high-level for first-time learning, but apparently very abstract and powerful for real-life:
@@ -37,3 +38,14 @@ plt.close()
  - Read the MNIST database into numpy arrays with `./read_display_mnist.py`. Plot the first ten images and show their labels, to make sure the data makes sense:
 
    ![visualize training data](screenshots/mnist-visualize-training-data.png)
+
+ - Define the structure of the neural network: two hidden layers with parametrizable sizes. Initialize weights and biases randomly. This gives totally random classifications of course, but at least makes sure that the data structures and computations work:
+
+```
+$ ./train.py
+output vector of first image: [    0.         52766.88424917     0.             0.
+ 14840.28619491 14164.62850135     0.          7011.882333
+     0.         46979.62976127]
+classification of first image: 1 with confidence 52766.88424917019; real label 5
+correctly recognized images after initialization: 10.076666666666668%
+```
diff --git a/train.py b/train.py
new file mode 100755 (executable)
index 0000000..8a6dc96
--- /dev/null
+++ b/train.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python3
+
+import numpy as np
+
+import mnist
+
+# use a constant seed to keep things reproducible
+rg = np.random.default_rng(1)
+
+# transfer functions
+
+# https://en.wikipedia.org/wiki/Sigmoid_function
+# classic, differentiable, apparently worse for training
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def sigmoid_prime(x):
+    return sigmoid(x) * (1 - sigmoid(x))
+
+
+# https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
+# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0
+def reLU(x):
+    return np.maximum(x, 0)
+
+
+def reLU_prime(x):
+    return np.heaviside(x, 1)
+
+
+train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
+test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
+assert rows == rows2
+assert cols == cols2
+
+# neural network structure: two hidden layers, one output layer
+SIZES = (rows * cols, 20, 16, 10)
+NUM_LAYERS = len(SIZES)
+
+# initialize weight matrices and bias vectors with random numbers
+weights = []
+biases = []
+for i in range(1, NUM_LAYERS):
+    weights.append(rg.normal(size=(SIZES[i], SIZES[i-1])))
+    biases.append(rg.normal(scale=10, size=SIZES[i]))
+
+
+def feed_forward(x, transfer=reLU):
+    '''Compute all z and output vectors for given input vector'''
+
+    a_s = [x]
+    z_s = []
+    for w, b in zip(weights, biases):
+        x = w @ x + b
+        z_s.append(x)
+        a_s.append(transfer(x))
+    return (z_s, a_s)
+
+
+def classify(y):
+    # the recognized digit is the index of the highest-valued output neuron
+    return np.argmax(y), np.max(y)
+
+
+def test():
+    """Count percentage of test inputs which are being recognized correctly"""
+
+    good = 0
+    num_images = test_images.shape[1]
+    for i in range(num_images):
+        # the recognized digit is the index of the highest-valued output neuron
+        y = classify(feed_forward(test_images[:, i])[1][-1])[0]
+        good += int(y == test_labels[i])
+    return 100 * (good / num_images)
+
+
+res = feed_forward(test_images[:, 0])
+print(f'output vector of first image: {res[1][-1]}')
+digit, conf = classify(res[1][-1])
+print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
+print(f'correctly recognized images after initialization: {test()}%')