Chapter 1: Foundations
The author explains how the derivatives and chain rules is used from basic functions to matrix-matrix multiplication.
Chapter 2: Fundamentals
The author explains about linear regression and neural networks as stacked of linear regression with non-linear activations.
Chapter 3: Deep Learning from Scratch
Build a neural network with autograd.
Operations
class Operation(object):
- forward(input):
- Call self._output() to calculate the result
- store the input and output
- backward(output_grad):
- check the shape of self.output and output_grad
- calculate the input_grad based on output_grad
- check the shape of input_grad and self.input
- _output(): abstract
- _input_grad(output_grad): abstract
class ParamOperation(Operation):
- init(param): store param to self.param
- backward(output_grad):
- check the shape of self.output and output_grad
- calculate the input_grad based on output_grad
- calculate the param_grad based on output_grad
- check the shape of param_grad and self.param
- check the shape of input_grad and self.input
- _param_grad(output_grad): abstract
class WeightMultiply(ParamOperation):
- init(W): super().init(W)
- _output(self): compute output
np.dot(self.input_, self.param)
- _input_grad(self, output_grad): compute input grad
np.dot(output_grad, np.transpose(self.param, (1, 0)))
- _param_grad(self, output_grad): compute param grad
np.dot(np.transpose(self.input_, (1, 0)), output_grad)
class BiasAdd(ParamOperation):
- init(B): super().init(B)
- _output(self): compute output
self.input_ + self.param
- _input_grad(self, output_grad): compute input grad
np.ones_like(self.input_) * output_grad
- _param_grad(self, output_grad): compute param grad
param_grad = np.ones_like(self.param) * output_grad
np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])
class Sigmoid(Operation):
- _output(self): compute output
1.0 / (1.0 + np.exp(-1.0 * self.input_))
- _input_grad(self, output_grad): compute input grad
sigmoid_bwd = self.output * (1.0 - self.output)
input_grad = sigmoid_bwd * output_grad
Layers
class Layer(object)
init(self, neurons: int):
- self.neurons = neurons
- self.first = True
- self.params: List[ndarray] = []
- self.param_grads: List[ndarray] = []
- self.operations: List[Operation] = []
_setup_layer(num_in: int): abstract
_forward(input): forward propagation
- if self.first: self._setuplayer(input); self.first = False
- self.input_ = input_
- for operations: input_ = operation.forward(input_)
- self.output = input_
_backward(output_grad): backward propagation
- check shape self.output & output_grad
- for reversed(self.operations): output_grad = operation.backward(output_grad)
- input_grad = output_grad
- self._param_grads()
- return input_grad
_param_grads(): extract _param_grads from each operations
- self.param_grads = []
- for operations:
- if(issubclass(operation.class, ParamOperation)):
- self.param_grads.append(operation.param_grad)
- if(issubclass(operation.class, ParamOperation)):
_params(): extract _params from each opertions
- self.params = []
- for operations:
- if(issubclass(operation.class, ParamOperation)):
- self.params.append(operation.param)
- if(issubclass(operation.class, ParamOperation)):
class Dense(Layer):
- init(neurons, activation: Operation = Sigmoid()):
- super().init(neurons)
- self.activation = activation
- _setuplayer(self, input): define a fully connected layer
- if self.seed: np.random.seed(self.seed)
- self.params = [weight, bias]
- self.operations = [WeightMultiply, BiasAdd, self.activation]
Loss
class Loss(object):
- forward(self, prediction, target):
- check shape prediction, target
- save prediction, target
- loss_value = self._output()
- backward():
- self.input_grad = self._input_grad()
- check shape prediction, self.input_grad
- return self.input_grad
- _output(): abstract
- _input_grad(): abstract
class MeanSquaredError(Loss):
- output(self):
np.sum(np.power(self.prediction - self.target), 2) / self.prediction.shape[0]
- _input_grad(self):
return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]
Neural Network
class NeuralNetwork(object):
- init(layers, loss, seed)
- forward(x_batch):
- x_out = x_batch
- for layers:
- x_out = layer.forward(x_out) return x_out
- backward(loss_grad):
- for reversed(self.layers):
- grad = layer.backward(grad)
- for reversed(self.layers):
- train_batch(x_batch, y_batch):
- prediction = self.forward(x_batch)
- loss = self.loss.forward(prediction, y_batch)
- self.backward(self.loss.backward())
- return loss
- params(): return each layer params
- param_grads() return each layer param_grads