X-Git-Url: https://piware.de/gitweb/?p=handwriting-recognition.git;a=blobdiff_plain;f=train.py;h=8510624665ba1ddc66300e00030624b40d7025ac;hp=366a5a06a2d7b7a9f436228fb4d4b873d0ed5495;hb=0b40285a04bfbf2d73f7a7154eacb4613f08b350;hpb=a856d010176366562caf03eaa873bb91f658446e diff --git a/train.py b/train.py index 366a5a0..8510624 100755 --- a/train.py +++ b/train.py @@ -1,82 +1,38 @@ #!/usr/bin/python3 -import numpy as np - import mnist - -# use a constant seed to keep things reproducible -rg = np.random.default_rng(1) - -# transfer functions - -# https://en.wikipedia.org/wiki/Sigmoid_function -# classic, differentiable, apparently worse for training -def sigmoid(x): - return 1 / (1 + np.exp(-x)) - - -def sigmoid_prime(x): - return sigmoid(x) * (1 - sigmoid(x)) - - -# https://en.wikipedia.org/wiki/Rectifier_(neural_networks) -# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0 -def reLU(x): - return np.maximum(x, 0) - - -def reLU_prime(x): - return np.heaviside(x, 1) - +import nnet train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte') test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte') assert rows == rows2 assert cols == cols2 +num_train = train_images.shape[1] +nnet_batch = 10000 # neural network structure: two hidden layers, one output layer -SIZES = (rows * cols, 20, 16, 10) -NUM_LAYERS = len(SIZES) - -# initialize weight matrices and bias vectors with random numbers -weights = [] -biases = [] -for i in range(1, NUM_LAYERS): - weights.append(rg.normal(size=(SIZES[i], SIZES[i-1]))) - biases.append(rg.normal(scale=10, size=SIZES[i])) - - -def feed_forward(x, transfer=sigmoid): - '''Compute all z and output vectors for given input vector''' - - a_s = [x] - z_s = [] - for w, b in zip(weights, biases): - x = w @ x + b - z_s.append(x) - a_s.append(transfer(x)) - return (z_s, a_s) - - -def classify(y): - # the recognized digit is the index of the highest-valued output neuron - return np.argmax(y), np.max(y) - - -def test(): - """Count percentage of test inputs which are being recognized correctly""" - - good = 0 - num_images = test_images.shape[1] - for i in range(num_images): - # the recognized digit is the index of the highest-valued output neuron - y = classify(feed_forward(test_images[:, i])[1][-1])[0] - good += int(y == test_labels[i]) - return 100 * (good / num_images) - - -res = feed_forward(test_images[:, 0]) -print(f'output vector of first image: {res[1][-1]}') -digit, conf = classify(res[1][-1]) +# (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output) +# handle 10,000 vectors at a time +Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80) +A1 = nnet.SigmoidLayer(Z1.shape) +ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10) +AO = nnet.SigmoidLayer(ZO.shape) +net = (Z1, A1, ZO, AO) + +res = nnet.forward(net, test_images[:, 0:10000]) +print(f'output vector of first image: {res[:, 0]}') +digit, conf = nnet.classify(res[:, 0]) +print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') +print(f'correctly recognized images after initialization: {nnet.accuracy(net, test_images, test_labels)}%') + +train_y = nnet.label_vectors(train_labels, 10) +for i in range(100): + for batch in range(0, num_train, nnet_batch): + cost = nnet.train(net, train_images[:, batch:(batch + nnet_batch)], train_y[:, batch:(batch + nnet_batch)], learning_rate=1) + print(f'cost after training round {i}: {cost}') +print(f'correctly recognized images after training: {nnet.accuracy(net, test_images, test_labels)}%') + +res = nnet.forward(net, test_images[:, 0:10000]) +print(f'output vector of first image: {res[:, 0]}') +digit, conf = nnet.classify(res[:, 0]) print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') -print(f'correctly recognized images after initialization: {test()}%')