Deep Learning from Scratch - Seth Weidman

Chapter 1: Foundations

The author explains how the derivatives and chain rules is used from basic functions to matrix-matrix multiplication.

Chapter 2: Fundamentals

The author explains about linear regression and neural networks as stacked of linear regression with non-linear activations.

Chapter 3: Deep Learning from Scratch

Build a neural network with autograd.

Operations

class Operation(object):

  • forward(input):
    • Call self._output() to calculate the result
    • store the input and output
  • backward(output_grad):
    • check the shape of self.output and output_grad
    • calculate the input_grad based on output_grad
    • check the shape of input_grad and self.input
  • _output(): abstract
  • _input_grad(output_grad): abstract

class ParamOperation(Operation):

  • init(param): store param to self.param
  • backward(output_grad):
    • check the shape of self.output and output_grad
    • calculate the input_grad based on output_grad
    • calculate the param_grad based on output_grad
    • check the shape of param_grad and self.param
    • check the shape of input_grad and self.input
  • _param_grad(output_grad): abstract

class WeightMultiply(ParamOperation):

  • init(W): super().init(W)
  • _output(self): compute output
    • np.dot(self.input_, self.param)
  • _input_grad(self, output_grad): compute input grad
    • np.dot(output_grad, np.transpose(self.param, (1, 0)))
  • _param_grad(self, output_grad): compute param grad
    • np.dot(np.transpose(self.input_, (1, 0)), output_grad)

class BiasAdd(ParamOperation):

  • init(B): super().init(B)
  • _output(self): compute output
    • self.input_ + self.param
  • _input_grad(self, output_grad): compute input grad
    • np.ones_like(self.input_) * output_grad
  • _param_grad(self, output_grad): compute param grad
    • param_grad = np.ones_like(self.param) * output_grad
    • np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])

class Sigmoid(Operation):

  • _output(self): compute output
    • 1.0 / (1.0 + np.exp(-1.0 * self.input_))
  • _input_grad(self, output_grad): compute input grad
    • sigmoid_bwd = self.output * (1.0 - self.output)
    • input_grad = sigmoid_bwd * output_grad

Layers

class Layer(object)

  • init(self, neurons: int):

    • self.neurons = neurons
    • self.first = True
    • self.params: List[ndarray] = []
    • self.param_grads: List[ndarray] = []
    • self.operations: List[Operation] = []
  • _setup_layer(num_in: int): abstract

  • _forward(input): forward propagation

    • if self.first: self._setuplayer(input); self.first = False
    • self.input_ = input_
    • for operations: input_ = operation.forward(input_)
    • self.output = input_
  • _backward(output_grad): backward propagation

    • check shape self.output & output_grad
    • for reversed(self.operations): output_grad = operation.backward(output_grad)
    • input_grad = output_grad
    • self._param_grads()
    • return input_grad
  • _param_grads(): extract _param_grads from each operations

    • self.param_grads = []
    • for operations:
      • if(issubclass(operation.class, ParamOperation)):
        • self.param_grads.append(operation.param_grad)
  • _params(): extract _params from each opertions

    • self.params = []
    • for operations:
      • if(issubclass(operation.class, ParamOperation)):
        • self.params.append(operation.param)

class Dense(Layer):

  • init(neurons, activation: Operation = Sigmoid()):
    • super().init(neurons)
    • self.activation = activation
  • _setuplayer(self, input): define a fully connected layer
    • if self.seed: np.random.seed(self.seed)
    • self.params = [weight, bias]
    • self.operations = [WeightMultiply, BiasAdd, self.activation]

Loss

class Loss(object):

  • forward(self, prediction, target):
    • check shape prediction, target
    • save prediction, target
    • loss_value = self._output()
  • backward():
    • self.input_grad = self._input_grad()
    • check shape prediction, self.input_grad
    • return self.input_grad
  • _output(): abstract
  • _input_grad(): abstract

class MeanSquaredError(Loss):

  • output(self):
    • np.sum(np.power(self.prediction - self.target), 2) / self.prediction.shape[0]
  • _input_grad(self):
    • return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]

Neural Network

class NeuralNetwork(object):

  • init(layers, loss, seed)
  • forward(x_batch):
    • x_out = x_batch
    • for layers:
      • x_out = layer.forward(x_out) return x_out
  • backward(loss_grad):
    • for reversed(self.layers):
      • grad = layer.backward(grad)
  • train_batch(x_batch, y_batch):
    • prediction = self.forward(x_batch)
    • loss = self.loss.forward(prediction, y_batch)
    • self.backward(self.loss.backward())
    • return loss
  • params(): return each layer params
  • param_grads() return each layer param_grads

Trainer and Optimizer

Avatar
Quan Hua
Deep Learning Enthusiast
comments powered by Disqus