From d986cacf7fc94fb78904f01e11128d666efff804 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sun, 30 Aug 2020 10:20:14 +0200 Subject: [PATCH] Add reLU layer implementation This requires normalizing the input data to [0,1], otherwise the data gets wildly out of range. But normalizing the input range makes Sigmoid worse, so don't do this by default. Even with normalization, reLU still performs slightly worse than Sigmoid, though. --- README.md | 9 +++++++++ nnet.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/README.md b/README.md index d48ebf3..b309254 100644 --- a/README.md +++ b/README.md @@ -112,3 +112,12 @@ output vector of first image: [1.11064478e-02 5.59058012e-03 5.40483856e-02 7.93 real 4m10.904s user 11m21.203s ``` + +- Replace [Sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) activation function with [reLU](https://en.wikipedia.org/wiki/Rectifier_%28neural_networks%29). Some interesting effects, like a learning rate of 1 leads to "overshooting", and the cost function actually _increases_ during the learning steps several times, and the overall result was worse. Changing the learning rate to linearly fall during the training rounds helps. But in the end, the result is still worse: +``` +cost after training round 99: 0.07241763398153217 +correctly recognized images after training: 92.46% +output vector of first image: [0. 0. 0. 0. 0. 0. + 0. 0.89541759 0. 0. ] +classification of first image: 7 with confidence 0.8954175907939048; real label 7 +``` diff --git a/nnet.py b/nnet.py index ddf8ba6..239c96b 100644 --- a/nnet.py +++ b/nnet.py @@ -59,6 +59,20 @@ class SigmoidLayer: return upstream_grad * self.A * (1 - self.A) +class reLULayer: + def __init__(self, shape): + self.shape = shape + + def forward(self, Z): + assert Z.shape == self.shape + self.A = np.maximum(Z, 0) + return self.A + + def backward(self, upstream_grad, learning_rate=0.1): + # couple upstream gradient with local gradient, the result will be sent back to the Linear layer + return upstream_grad * np.heaviside(self.A, 1) + + def label_vectors(labels, n): y = np.zeros((n, labels.size)) for i, l in enumerate(labels): -- 2.39.2