X-Git-Url: https://piware.de/gitweb/?p=handwriting-recognition.git;a=blobdiff_plain;f=train.py;fp=train.py;h=8a6dc96f2c1d40281d452fc3c894fffb79a73436;hp=0000000000000000000000000000000000000000;hb=0ea12b213873b4bef12e1f2b65eed64704ee040f;hpb=8af4223121b60d5d67b7121d87c5c6fed01b58e7 diff --git a/train.py b/train.py new file mode 100755 index 0000000..8a6dc96 --- /dev/null +++ b/train.py @@ -0,0 +1,82 @@ +#!/usr/bin/python3 + +import numpy as np + +import mnist + +# use a constant seed to keep things reproducible +rg = np.random.default_rng(1) + +# transfer functions + +# https://en.wikipedia.org/wiki/Sigmoid_function +# classic, differentiable, apparently worse for training +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def sigmoid_prime(x): + return sigmoid(x) * (1 - sigmoid(x)) + + +# https://en.wikipedia.org/wiki/Rectifier_(neural_networks) +# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0 +def reLU(x): + return np.maximum(x, 0) + + +def reLU_prime(x): + return np.heaviside(x, 1) + + +train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte') +test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte') +assert rows == rows2 +assert cols == cols2 + +# neural network structure: two hidden layers, one output layer +SIZES = (rows * cols, 20, 16, 10) +NUM_LAYERS = len(SIZES) + +# initialize weight matrices and bias vectors with random numbers +weights = [] +biases = [] +for i in range(1, NUM_LAYERS): + weights.append(rg.normal(size=(SIZES[i], SIZES[i-1]))) + biases.append(rg.normal(scale=10, size=SIZES[i])) + + +def feed_forward(x, transfer=reLU): + '''Compute all z and output vectors for given input vector''' + + a_s = [x] + z_s = [] + for w, b in zip(weights, biases): + x = w @ x + b + z_s.append(x) + a_s.append(transfer(x)) + return (z_s, a_s) + + +def classify(y): + # the recognized digit is the index of the highest-valued output neuron + return np.argmax(y), np.max(y) + + +def test(): + """Count percentage of test inputs which are being recognized correctly""" + + good = 0 + num_images = test_images.shape[1] + for i in range(num_images): + # the recognized digit is the index of the highest-valued output neuron + y = classify(feed_forward(test_images[:, i])[1][-1])[0] + good += int(y == test_labels[i]) + return 100 * (good / num_images) + + +res = feed_forward(test_images[:, 0]) +print(f'output vector of first image: {res[1][-1]}') +digit, conf = classify(res[1][-1]) +print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') +print(f'correctly recognized images after initialization: {test()}%')