Deep Learning from Scratch - Seth Weidman

Last updated on Aug 27, 2019 0 Comments

Chapter 1: Foundations

The author explains how the derivatives and chain rules is used from basic functions to matrix-matrix multiplication.

The author explains about linear regression and neural networks as stacked of linear regression with non-linear activations.

Build a neural network with autograd.

class Operation(object):

forward(input):
- Call self._output() to calculate the result
- store the input and output
backward(output_grad):
- check the shape of self.output and output_grad
- calculate the input_grad based on output_grad
- check the shape of input_grad and self.input
_output(): abstract
_input_grad(output_grad): abstract

class ParamOperation(Operation):

class WeightMultiply(ParamOperation):

init(W): super().init(W)
_output(self): compute output
- np.dot(self.input_, self.param)
_input_grad(self, output_grad): compute input grad
- np.dot(output_grad, np.transpose(self.param, (1, 0)))
_param_grad(self, output_grad): compute param grad
- np.dot(np.transpose(self.input_, (1, 0)), output_grad)

class BiasAdd(ParamOperation):

init(B): super().init(B)
_output(self): compute output
- self.input_ + self.param
_input_grad(self, output_grad): compute input grad
- np.ones_like(self.input_) * output_grad
_param_grad(self, output_grad): compute param grad
- param_grad = np.ones_like(self.param) * output_grad
- np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])

class Sigmoid(Operation):

_output(self): compute output
- 1.0 / (1.0 + np.exp(-1.0 * self.input_))
_input_grad(self, output_grad): compute input grad
- sigmoid_bwd = self.output * (1.0 - self.output)
- input_grad = sigmoid_bwd * output_grad

class Layer(object)

init(self, neurons: int):
- self.neurons = neurons
- self.first = True
- self.params: List[ndarray] = []
- self.param_grads: List[ndarray] = []
- self.operations: List[Operation] = []
_setup_layer(num_in: int): abstract
_forward(input): forward propagation
- if self.first: self._setuplayer(input); self.first = False
- self.input_ = input_
- for operations: input_ = operation.forward(input_)
- self.output = input_
_backward(output_grad): backward propagation
- check shape self.output & output_grad
- for reversed(self.operations): output_grad = operation.backward(output_grad)
- input_grad = output_grad
- self._param_grads()
- return input_grad
_param_grads(): extract _param_grads from each operations
- self.param_grads = []
- for operations:
  - if(issubclass(operation.class, ParamOperation)):
    - self.param_grads.append(operation.param_grad)
_params(): extract _params from each opertions
- self.params = []
- for operations:
  - if(issubclass(operation.class, ParamOperation)):
    - self.params.append(operation.param)

class Dense(Layer):

init(neurons, activation: Operation = Sigmoid()):
- super().init(neurons)
- self.activation = activation
_setuplayer(self, input): define a fully connected layer
- if self.seed: np.random.seed(self.seed)
- self.params = [weight, bias]
- self.operations = [WeightMultiply, BiasAdd, self.activation]

class Loss(object):

forward(self, prediction, target):
- check shape prediction, target
- save prediction, target
- loss_value = self._output()
backward():
- self.input_grad = self._input_grad()
- check shape prediction, self.input_grad
- return self.input_grad
_output(): abstract
_input_grad(): abstract

class MeanSquaredError(Loss):

output(self):
- np.sum(np.power(self.prediction - self.target), 2) / self.prediction.shape[0]
_input_grad(self):
- return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]

class NeuralNetwork(object):

init(layers, loss, seed)
forward(x_batch):
- x_out = x_batch
- for layers:
  - x_out = layer.forward(x_out) return x_out
backward(loss_grad):
- for reversed(self.layers):
  - grad = layer.backward(grad)
train_batch(x_batch, y_batch):
- prediction = self.forward(x_batch)
- loss = self.loss.forward(prediction, y_batch)
- self.backward(self.loss.backward())
- return loss
params(): return each layer params
param_grads() return each layer param_grads