2
\$\begingroup\$

I've created binary classification model from scratch, just to understand intuition behind that.

However when I compare my implementation to model from tensorflow/pytorch with the same parameters and configuration I noticed that my model achieved similar results in about 3 000 epochs but tensorflow/pytorch model achieved that in 300 epochs.

I also noticed that my model calculates very small gradient when tensorflow/pytorch calculates a much higher gradient in every epoch

  • Is there any way to optimize calculating gradient in backward function to make model learning faster?
  • Is there any other such field that could be optimized/simplified and how could it be implemented

Below is my backward function responsible for calculating gradient:

def backward(
              y: np.ndarray,
              y_pred: np.ndarray,
              layers: List[ Dict[ str, np.ndarray ] ]
            ) -> None:

    loss: np.ndarray = binary_cross_entropy_loss_prime(y, y_pred)

    for layer in reversed(layers):
        dZ: np.ndarray = layer['prime'](layer['z']) * loss
        layer['db'] = (dZ * np.ones_like(layer['b'])).sum(axis = 0, keepdims=True) / loss.shape[0]
        dU: np.ndarray = dZ * np.ones_like(layer['u'])
        layer['dw'] = np.dot(layer['x'].T, dU) / loss.shape[0]
        loss = np.dot(dU, layer['w'].T)

and also full code with data types to easier understanding:


"""# Dataset and libraries"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from typing import List, Dict

from sklearn.datasets import make_moons
x, y = make_moons(n_samples = 1000, noise = 0.2, random_state = 100)

# expand y second dim
# before expand_dims -> y.shape = (1000, )
# after expand_dims -> y.shape = (1000, 1)
y = np.expand_dims(y, 1)

# final shapes: X -> (1000, 2), Y -> (1000, 1)

"""# Activations functions"""

def sigma(x: np.ndarray) -> np.ndarray:
  return 1 / (1 + np.exp(-x))

def sigma_prime(x: np.ndarray) -> np.ndarray:
  e = np.exp(x)
  return e / (e + 1) ** 2

def relu(x: np.ndarray) -> np.ndarray:
  return np.maximum(0, x)

def relu_prime(x: np.ndarray) -> np.ndarray:
  return np.where(x <= 0, 0, 1)

"""# Dense layers"""

dense_layers = [
      { 'w': np.random.rand(2, 8) * 0.1, 'b': np.random.rand(1, 8) * 0.1, 'activ': relu, 'prime': relu_prime },
      { 'w': np.random.rand(8, 8) * 0.1, 'b': np.random.rand(1, 8) * 0.1, 'activ': relu, 'prime': relu_prime },
      { 'w': np.random.rand(8, 1) * 0.1, 'b': np.random.rand(1, 1) * 0.1, 'activ': sigma, 'prime': sigma_prime }
]

"""# Losses and metrics """

def binary_cross_entropy_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
  number_of_rows = y_true.shape[0] # 1000 rows
  number_of_cols = y_true.shape[1] # 1 cols
  return np.sum(-(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))) / number_of_rows * number_of_cols

def binary_cross_entropy_loss_prime(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
  return (1 - y_true) / (1 - y_pred) - y_true / y_pred

def accuracy(y_true: np.ndarray, y_pred: np.ndarray, threshhold: float = 0.5) -> float:
  return (np.where(y_pred <= threshhold, 0, 1) == y_true).mean()

"""# Forward propagation"""

def forward(x: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ]) -> np.ndarray:
  for layer in layers:
    layer['x'] = x
    layer['u'] = np.dot(x, layer['w'])
    layer['z'] = layer['u'] + layer['b']
    layer['a'] = layer['activ'](layer['z'])
    x = layer['a']
  return x

"""# Backward propagation"""

def backward(y: np.ndarray, y_pred: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ]) -> None:
    loss: np.ndarray = binary_cross_entropy_loss_prime(y, y_pred)

    for layer in reversed(layers):
        dZ: np.ndarray = layer['prime'](layer['z']) * loss
        layer['db'] = (dZ * np.ones_like(layer['b'])).sum(axis = 0, keepdims=True) / loss.shape[0]
        dU: np.ndarray = dZ * np.ones_like(layer['u'])
        layer['dw'] = np.dot(layer['x'].T, dU) / loss.shape[0]
        loss = np.dot(dU, layer['w'].T)

"""# Update weights and biases (SGD optimizer)"""

def update(layers: List[ Dict[ str, np.ndarray ] ], learning_rate: float) -> None:
    for layer in layers:
        layer['w'] -= learning_rate * layer['dw']
        layer['b'] -= learning_rate * layer['db']

"""# Train model"""

def train(x: np.ndarray, y: np.ndarray, layers: List[ Dict[ str, np.ndarray ] ], epochs: int, learning_rate: float) -> None:
    
    for epoch in range(epochs):
    
        # Forward propagation
        y_hat = forward(x, layers)

        # Backward propagation
        backward(y, y_hat, layers)

        # Update layers
        update(layers, learning_rate)

        # show progress
        if epoch % 100 == 0:
            print('Iteration nr: ', epoch, ', loss: ', binary_cross_entropy_loss(y, y_hat), ', accuracy: ', accuracy(y, y_hat))


train(x, y, dense_layers, 3001, 0.01)
```
\$\endgroup\$

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.