X-Git-Url: https://piware.de/gitweb/?p=handwriting-recognition.git;a=blobdiff_plain;f=train.py;h=8510624665ba1ddc66300e00030624b40d7025ac;hp=b29effaa799aeb738f932c8e3260facdf6467a86;hb=0b40285a04bfbf2d73f7a7154eacb4613f08b350;hpb=8dcd00e9f8bbfc569c9b29ac06d748320d8bf737 diff --git a/train.py b/train.py index b29effa..8510624 100755 --- a/train.py +++ b/train.py @@ -1,141 +1,38 @@ #!/usr/bin/python3 -import numpy as np - import mnist - -# use a constant seed to keep things reproducible -rg = np.random.default_rng(1) - -# transfer functions - -# https://en.wikipedia.org/wiki/Sigmoid_function -# classic, differentiable, apparently worse for training -def sigmoid(x): - return 1 / (1 + np.exp(-x)) - - -def sigmoid_prime(x): - return sigmoid(x) * (1 - sigmoid(x)) - - -# https://en.wikipedia.org/wiki/Rectifier_(neural_networks) -# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0 -def reLU(x): - return np.maximum(x, 0) - - -def reLU_prime(x): - return np.heaviside(x, 1) - +import nnet train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte') test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte') assert rows == rows2 assert cols == cols2 +num_train = train_images.shape[1] +nnet_batch = 10000 # neural network structure: two hidden layers, one output layer -SIZES = (rows * cols, 20, 16, 10) -NUM_LAYERS = len(SIZES) - -# initialize weight matrices and bias vectors with random numbers -weights = [] -biases = [] -for i in range(1, NUM_LAYERS): - weights.append(rg.normal(size=(SIZES[i], SIZES[i-1]))) - biases.append(rg.normal(scale=10, size=SIZES[i])) - - -def feed_forward(x, transfer=sigmoid): - '''Compute all z and output vectors for given input vector''' - - a_s = [x] - z_s = [] - for w, b in zip(weights, biases): - x = w @ x + b - z_s.append(x) - a_s.append(transfer(x)) - return (z_s, a_s) - - -def classify(y): - # the recognized digit is the index of the highest-valued output neuron - return np.argmax(y), np.max(y) - - -def cost_grad(x, target_y, transfer=sigmoid, transfer_prime=sigmoid_prime): - '''Return (∂C/∂w, ∂C/∂b) for a particular input and desired output vector''' - - # forward pass, remember all z vectors and activations for every layer - z_s, a_s = feed_forward(x, transfer) - - # backward pass - deltas = [None] * len(weights) # delta = dC/dz error for each layer - # insert the last layer error - deltas[-1] = transfer_prime(z_s[-1]) * 2 * (a_s[-1] - target_y) - for i in reversed(range(len(deltas) - 1)): - deltas[i] = (weights[i + 1].T @ deltas[i + 1]) * transfer_prime(z_s[i]) - - dw = [d @ a_s[i+1] for i, d in enumerate(deltas)] - db = deltas - return dw, db - - -def label_vector(label): - x = np.zeros(10) - x[label] = 1.0 - return x - - -def backpropagate(image_batch, label_batch, eta): - '''Update NN with gradient descent and backpropagation to a batch of inputs - - eta is the learning rate. - ''' - global weights, biases - - num_images = image_batch.shape[1] - for i in range(num_images): - y = label_vector(label_batch[i]) - dws, dbs = cost_grad(image_batch[:, i], y) - weights = [w + eta * dw for w, dw in zip(weights, dws)] - biases = [b + eta * db for b, db in zip(biases, dbs)] - - -def train(images, labels, eta, batch_size=100): - '''Do backpropagation for smaller batches - - This greatly speeds up the learning process, at the expense of finding a more erratic path to the local minimum. - ''' - num_images = images.shape[1] - offset = 0 - while offset < num_images: - images_batch = images[:, offset:offset + batch_size] - labels_batch = labels[offset:offset + batch_size] - backpropagate(images_batch, labels_batch, eta) - offset += batch_size - - -def test(): - """Count percentage of test inputs which are being recognized correctly""" - - good = 0 - num_images = test_images.shape[1] - for i in range(num_images): - # the recognized digit is the index of the highest-valued output neuron - y = classify(feed_forward(test_images[:, i])[1][-1])[0] - good += int(y == test_labels[i]) - return 100 * (good / num_images) - - -res = feed_forward(test_images[:, 0]) -print(f'output vector of first image: {res[1][-1]}') -digit, conf = classify(res[1][-1]) +# (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output) +# handle 10,000 vectors at a time +Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80) +A1 = nnet.SigmoidLayer(Z1.shape) +ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10) +AO = nnet.SigmoidLayer(ZO.shape) +net = (Z1, A1, ZO, AO) + +res = nnet.forward(net, test_images[:, 0:10000]) +print(f'output vector of first image: {res[:, 0]}') +digit, conf = nnet.classify(res[:, 0]) +print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') +print(f'correctly recognized images after initialization: {nnet.accuracy(net, test_images, test_labels)}%') + +train_y = nnet.label_vectors(train_labels, 10) +for i in range(100): + for batch in range(0, num_train, nnet_batch): + cost = nnet.train(net, train_images[:, batch:(batch + nnet_batch)], train_y[:, batch:(batch + nnet_batch)], learning_rate=1) + print(f'cost after training round {i}: {cost}') +print(f'correctly recognized images after training: {nnet.accuracy(net, test_images, test_labels)}%') + +res = nnet.forward(net, test_images[:, 0:10000]) +print(f'output vector of first image: {res[:, 0]}') +digit, conf = nnet.classify(res[:, 0]) print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}') -print(f'correctly recognized images after initialization: {test()}%') - -for i in range(1): - print(f"round #{i} of learning...") - train(test_images, test_labels, 1) - -print(f'correctly recognized images: {test()}%')