ini_type: initialization type for weight parameters: plain, xavier, or he
'''
def __init__(self, input_shape, n_out, ini_type="plain"):
- self.m = input_shape[1] # number of examples in training data
-
# initialize weights
n_in = input_shape[0]
if ini_type == 'plain':
self.W = rg.standard_normal(size=(n_out, n_in)) * np.sqrt(2/n_in) # set variance of W to 2/n
self.b = np.zeros((n_out, 1))
- self.Z = np.zeros((self.W.shape[0], input_shape[1]))
+ self.shape = (self.W.shape[0], input_shape[1])
def forward(self, A_prev):
self.A_prev = A_prev
- self.Z = self.W @ self.A_prev + self.b
- return self.Z
+ return self.W @ self.A_prev + self.b
- def backward(self, upstream_grad):
+ def backward(self, upstream_grad, learning_rate=0.1):
# derivative of Cost w.r.t W
- self.dW = upstream_grad @ self.A_prev.T
+ dW = upstream_grad @ self.A_prev.T
# derivative of Cost w.r.t b, sum across rows
- self.db = np.sum(upstream_grad, axis=1, keepdims=True)
+ db = np.sum(upstream_grad, axis=1, keepdims=True)
# derivative of Cost w.r.t A_prev
- self.dA_prev = self.W.T @ upstream_grad
- return self.dA_prev
+ dA_prev = self.W.T @ upstream_grad
+
+ # update parameters
+ self.W -= learning_rate * dW
+ self.b -= learning_rate * db
- def update_params(self, learning_rate=0.1):
- self.W -= learning_rate * self.dW
- self.b -= learning_rate * self.db
+ return dA_prev
class SigmoidLayer:
def __init__(self, shape):
- self.A = np.zeros(shape)
+ self.shape = shape
def forward(self, Z):
+ assert Z.shape == self.shape
self.A = 1 / (1 + np.exp(-Z)) # compute activations
return self.A
- def backward(self, upstream_grad):
+ def backward(self, upstream_grad, learning_rate=0.1):
# couple upstream gradient with local gradient, the result will be sent back to the Linear layer
- self.dZ = upstream_grad * self.A * (1 - self.A)
- return self.dZ
-
- def update_params(self, learning_rate=0.1):
- pass
+ return upstream_grad * self.A * (1 - self.A)
def label_vectors(labels, n):
def forward(layers, X):
- assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
+ assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
cur = X
for layer in layers:
cur = layer.forward(cur)
def accuracy(layers, X, labels):
'''Count percentage of test inputs which are being recognized correctly'''
- assert X.shape[1] == layers[0].m, f'input length {X.shape[1]} does not match first layer width {layers[0].m}'
- assert layers[0].m == labels.size, f'first layer width {layers[0].m} does not match number of labels {labels.size}'
+ assert X.shape[1] == layers[0].shape[1], f'input length {X.shape[1]} does not match first layer width {layers[0].shape[1]}'
+ assert layers[0].shape[1] == labels.size, f'first layer width {layers[0].shape[1]} does not match number of labels {labels.size}'
output = forward(layers, X)
classes = classify(output)[0]
return 100 * (np.sum(classes == labels) / classes.size)
cur = dOutput
for layer in reversed(layers):
- cur = layer.backward(cur)
- layer.update_params(learning_rate)
+ cur = layer.backward(cur, learning_rate)
return cost
# (input)--> [Linear->Sigmoid] -> [Linear->Sigmoid] -->(output)
# handle 10,000 vectors at a time
Z1 = nnet.LinearLayer(input_shape=(rows * cols, nnet_batch), n_out=80)
-A1 = nnet.SigmoidLayer(Z1.Z.shape)
-ZO = nnet.LinearLayer(input_shape=A1.A.shape, n_out=10)
-AO = nnet.SigmoidLayer(ZO.Z.shape)
+A1 = nnet.SigmoidLayer(Z1.shape)
+ZO = nnet.LinearLayer(input_shape=A1.shape, n_out=10)
+AO = nnet.SigmoidLayer(ZO.shape)
net = (Z1, A1, ZO, AO)
res = nnet.forward(net, test_images[:, 0:10000])