-SIZES = (rows * cols, 20, 16, 10)
-NUM_LAYERS = len(SIZES)
-
-# initialize weight matrices and bias vectors with random numbers
-weights = []
-biases = []
-for i in range(1, NUM_LAYERS):
- weights.append(rg.normal(size=(SIZES[i], SIZES[i-1])))
- biases.append(rg.normal(scale=10, size=SIZES[i]))
-
-
-def feed_forward(x, transfer=sigmoid):
- '''Compute all z and output vectors for given input vector'''
-
- a_s = [x]
- z_s = []
- for w, b in zip(weights, biases):
- x = w @ x + b
- z_s.append(x)
- a_s.append(transfer(x))
- return (z_s, a_s)
-
-
-def classify(y):
- # the recognized digit is the index of the highest-valued output neuron
- return np.argmax(y), np.max(y)
-
-
-def cost_grad(x, target_y, transfer=sigmoid, transfer_prime=sigmoid_prime):
- '''Return (∂C/∂w, ∂C/∂b) for a particular input and desired output vector'''
-
- # forward pass, remember all z vectors and activations for every layer
- z_s, a_s = feed_forward(x, transfer)
-
- # backward pass
- deltas = [None] * len(weights) # delta = dC/dz error for each layer
- # insert the last layer error
- deltas[-1] = transfer_prime(z_s[-1]) * 2 * (a_s[-1] - target_y)
- for i in reversed(range(len(deltas) - 1)):
- deltas[i] = (weights[i + 1].T @ deltas[i + 1]) * transfer_prime(z_s[i])
-
- dw = [d @ a_s[i+1] for i, d in enumerate(deltas)]
- db = deltas
- return dw, db
-
-
-def label_vector(label):
- x = np.zeros(10)
- x[label] = 1.0
- return x
-
-
-def backpropagate(image_batch, label_batch, eta):
- '''Update NN with gradient descent and backpropagation to a batch of inputs
-
- eta is the learning rate.
- '''
- global weights, biases
-
- num_images = image_batch.shape[1]
- for i in range(num_images):
- y = label_vector(label_batch[i])
- dws, dbs = cost_grad(image_batch[:, i], y)
- weights = [w + eta * dw for w, dw in zip(weights, dws)]
- biases = [b + eta * db for b, db in zip(biases, dbs)]
-
-
-def test():
- """Count percentage of test inputs which are being recognized correctly"""
-
- good = 0
- num_images = test_images.shape[1]
- for i in range(num_images):
- # the recognized digit is the index of the highest-valued output neuron
- y = classify(feed_forward(test_images[:, i])[1][-1])[0]
- good += int(y == test_labels[i])
- return 100 * (good / num_images)
-
-
-res = feed_forward(test_images[:, 0])
-print(f'output vector of first image: {res[1][-1]}')
-digit, conf = classify(res[1][-1])
+# (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output)
+# handle 10,000 vectors at a time
+Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80)
+A1 = nnet.SigmoidLayer(Z1.shape)
+ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10)
+AO = nnet.SigmoidLayer(ZO.shape)
+net = (Z1, A1, ZO, AO)
+
+res = nnet.forward(net, test_images[:, 0:10000])
+print(f'output vector of first image: {res[:, 0]}')
+digit, conf = nnet.classify(res[:, 0])
+print(f'classification of first image: {digit} with confidence {conf}; real label {test_labels[0]}')
+print(f'correctly recognized images after initialization: {nnet.accuracy(net, test_images, test_labels)}%')
+
+train_y = nnet.label_vectors(train_labels, 10)
+for i in range(100):
+ for batch in range(0, num_train, nnet_batch):
+ cost = nnet.train(net, train_images[:, batch:(batch + nnet_batch)], train_y[:, batch:(batch + nnet_batch)], learning_rate=(100-i)/100)
+ print(f'cost after training round {i}: {cost}')
+print(f'correctly recognized images after training: {nnet.accuracy(net, test_images, test_labels)}%')
+
+res = nnet.forward(net, test_images[:, 0:10000])
+print(f'output vector of first image: {res[:, 0]}')
+digit, conf = nnet.classify(res[:, 0])