]> piware.de Git - handwriting-recognition.git/blobdiff - train.py
Process many images in parallel
[handwriting-recognition.git] / train.py
index 8a6dc96f2c1d40281d452fc3c894fffb79a73436..012e2c7f3527c6917d86fdfa2392b44c12f81d78 100755 (executable)
--- a/train.py
+++ b/train.py
@@ -1,82 +1,35 @@
 #!/usr/bin/python3
 
-import numpy as np
-
 import mnist
-
-# use a constant seed to keep things reproducible
-rg = np.random.default_rng(1)
-
-# transfer functions
-
-# https://en.wikipedia.org/wiki/Sigmoid_function
-# classic, differentiable, apparently worse for training
-def sigmoid(x):
-    return 1 / (1 + np.exp(-x))
-
-
-def sigmoid_prime(x):
-    return sigmoid(x) * (1 - sigmoid(x))
-
-
-# https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
-# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0
-def reLU(x):
-    return np.maximum(x, 0)
-
-
-def reLU_prime(x):
-    return np.heaviside(x, 1)
-
+import nnet
 
 train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
 test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
 assert rows == rows2
 assert cols == cols2
+num_train = train_images.shape[1]
+nnet_batch = 10000
 
 # neural network structure: two hidden layers, one output layer
-SIZES = (rows * cols, 20, 16, 10)
-NUM_LAYERS = len(SIZES)
-
-# initialize weight matrices and bias vectors with random numbers
-weights = []
-biases = []
-for i in range(1, NUM_LAYERS):
-    weights.append(rg.normal(size=(SIZES[i], SIZES[i-1])))
-    biases.append(rg.normal(scale=10, size=SIZES[i]))
-
-
-def feed_forward(x, transfer=reLU):
-    '''Compute all z and output vectors for given input vector'''
-
-    a_s = [x]
-    z_s = []
-    for w, b in zip(weights, biases):
-        x = w @ x + b
-        z_s.append(x)
-        a_s.append(transfer(x))
-    return (z_s, a_s)
-
-
-def classify(y):
-    # the recognized digit is the index of the highest-valued output neuron
-    return np.argmax(y), np.max(y)
-
-
-def test():
-    """Count percentage of test inputs which are being recognized correctly"""
-
-    good = 0
-    num_images = test_images.shape[1]
-    for i in range(num_images):
-        # the recognized digit is the index of the highest-valued output neuron
-        y = classify(feed_forward(test_images[:, i])[1][-1])[0]
-        good += int(y == test_labels[i])
-    return 100 * (good / num_images)
-
-
-res = feed_forward(test_images[:, 0])
-print(f'output vector of first image: {res[1][-1]}')
-digit, conf = classify(res[1][-1])
+#                   (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output)
+# handle 10,000 vectors at a time
+Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=20)
+A1 = nnet.SigmoidLayer(Z1.Z.shape)
+Z2 = nnet.LinearLayer(input_shape=A1.A.shape, n_out=16)
+A2 = nnet.SigmoidLayer(Z2.Z.shape)
+ZO = nnet.LinearLayer(input_shape=A2.A.shape, n_out=10)
+AO = nnet.SigmoidLayer(ZO.Z.shape)
+net = (Z1, A1, Z2, A2, ZO, AO)
+
+res = nnet.forward(net, train_images[:, 0:10000])
+print(f'output vector of first image: {res[:, 0]}')
+digit, conf = nnet.classify(res[:, 0])
 print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
-print(f'correctly recognized images after initialization: {test()}%')
+print(f'correctly recognized images after initialization: {nnet.accuracy(net, test_images, test_labels)}%')
+
+train_y = nnet.label_vectors(train_labels, 10)
+for i in range(100):
+    for batch in range(0, num_train, nnet_batch):
+        cost = nnet.train(net, train_images[:, batch:(batch + nnet_batch)], train_y[:, batch:(batch + nnet_batch)])
+    print(f'cost after training round {i}: {cost}')
+print(f'correctly recognized images after training: {nnet.accuracy(net, test_images, test_labels)}%')