]> piware.de Git - handwriting-recognition.git/blob - train.py
366a5a06a2d7b7a9f436228fb4d4b873d0ed5495
[handwriting-recognition.git] / train.py
1 #!/usr/bin/python3
2
3 import numpy as np
4
5 import mnist
6
7 # use a constant seed to keep things reproducible
8 rg = np.random.default_rng(1)
9
10 # transfer functions
11
12 # https://en.wikipedia.org/wiki/Sigmoid_function
13 # classic, differentiable, apparently worse for training
14 def sigmoid(x):
15     return 1 / (1 + np.exp(-x))
16
17
18 def sigmoid_prime(x):
19     return sigmoid(x) * (1 - sigmoid(x))
20
21
22 # https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
23 # mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0
24 def reLU(x):
25     return np.maximum(x, 0)
26
27
28 def reLU_prime(x):
29     return np.heaviside(x, 1)
30
31
32 train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
33 test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
34 assert rows == rows2
35 assert cols == cols2
36
37 # neural network structure: two hidden layers, one output layer
38 SIZES = (rows * cols, 20, 16, 10)
39 NUM_LAYERS = len(SIZES)
40
41 # initialize weight matrices and bias vectors with random numbers
42 weights = []
43 biases = []
44 for i in range(1, NUM_LAYERS):
45     weights.append(rg.normal(size=(SIZES[i], SIZES[i-1])))
46     biases.append(rg.normal(scale=10, size=SIZES[i]))
47
48
49 def feed_forward(x, transfer=sigmoid):
50     '''Compute all z and output vectors for given input vector'''
51
52     a_s = [x]
53     z_s = []
54     for w, b in zip(weights, biases):
55         x = w @ x + b
56         z_s.append(x)
57         a_s.append(transfer(x))
58     return (z_s, a_s)
59
60
61 def classify(y):
62     # the recognized digit is the index of the highest-valued output neuron
63     return np.argmax(y), np.max(y)
64
65
66 def test():
67     """Count percentage of test inputs which are being recognized correctly"""
68
69     good = 0
70     num_images = test_images.shape[1]
71     for i in range(num_images):
72         # the recognized digit is the index of the highest-valued output neuron
73         y = classify(feed_forward(test_images[:, i])[1][-1])[0]
74         good += int(y == test_labels[i])
75     return 100 * (good / num_images)
76
77
78 res = feed_forward(test_images[:, 0])
79 print(f'output vector of first image: {res[1][-1]}')
80 digit, conf = classify(res[1][-1])
81 print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
82 print(f'correctly recognized images after initialization: {test()}%')