#!/usr/bin/python3
import numpy as np
import mnist
# use a constant seed to keep things reproducible
rg = np.random.default_rng(1)
# transfer functions
# https://en.wikipedia.org/wiki/Sigmoid_function
# classic, differentiable, apparently worse for training
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
# https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
# mostly preferred these days, not differentiable at 0, but slope can be defined arbitrarily as 0 or 1 at 0
def reLU(x):
return np.maximum(x, 0)
def reLU_prime(x):
return np.heaviside(x, 1)
train_images, train_labels, rows, cols = mnist.load('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
test_images, test_labels, rows2, cols2 = mnist.load('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
assert rows == rows2
assert cols == cols2
# neural network structure: two hidden layers, one output layer
SIZES = (rows * cols, 20, 16, 10)
NUM_LAYERS = len(SIZES)
# initialize weight matrices and bias vectors with random numbers
weights = []
biases = []
for i in range(1, NUM_LAYERS):
weights.append(rg.normal(size=(SIZES[i], SIZES[i-1])))
biases.append(rg.normal(scale=10, size=SIZES[i]))
def feed_forward(x, transfer=sigmoid):
'''Compute all z and output vectors for given input vector'''
a_s = [x]
z_s = []
for w, b in zip(weights, biases):
x = w @ x + b
z_s.append(x)
a_s.append(transfer(x))
return (z_s, a_s)
def classify(y):
# the recognized digit is the index of the highest-valued output neuron
return np.argmax(y), np.max(y)
def test():
"""Count percentage of test inputs which are being recognized correctly"""
good = 0
num_images = test_images.shape[1]
for i in range(num_images):
# the recognized digit is the index of the highest-valued output neuron
y = classify(feed_forward(test_images[:, i])[1][-1])[0]
good += int(y == test_labels[i])
return 100 * (good / num_images)
res = feed_forward(test_images[:, 0])
print(f'output vector of first image: {res[1][-1]}')
digit, conf = classify(res[1][-1])
print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
print(f'correctly recognized images after initialization: {test()}%')