Source code for olpy.classifiers.ogd

import numpy as np
import math

from sklearn.metrics import (zero_one_loss, log_loss, mean_squared_error,
                             hinge_loss)
from . __base import OnlineLearningModel


[docs]class OGD(OnlineLearningModel): """Online Gradient Descent model. Zinkevich, M., Online convex programming and generalized infinitesimal gradient ascent, Proc. 1th Int. Conf. Machine Learning, 103, 928-936 Attributes: C (:obj:`float`, optional): OGD's parameter. Defaults to 1. loss_function (callable, optional): Loss function used to evaluate the need to update the model. Defaults to sklearn.metrics.zero_one_loss num_iterations (:obj:`int`, optional): Number of iterations to run the training for. Defaults to 1. random_state (:obj:`int`, optional): The random seed to use with the pseudo-random generator. Defaults to `None`. positive_label (:obj:`int`, optional): The number in the output field that represents the positive label. The value passed should be different than -1. Defaults to 1. class_weight (:obj:`dict`, optional): Represents the relative weight of the labels in the data. Useful for imbalanced classification tasks. Raises: AssertionError: if `positive_label` is equal to -1. """ def __init__( self, C=1, loss_function=zero_one_loss, num_iterations=1, random_state=None, positive_label=1, class_weight=None ): super().__init__( num_iterations=num_iterations, random_state=random_state, positive_label=positive_label, class_weight=class_weight ) self._C = C self._loss_function = loss_function self._t = 0 def _update(self, x: np.ndarray, y: int): """Updates the weight vector in case a mistake occured. When presented with a data point, this method evaluates the error and based on the result, updates or not the weights vector. Args: x (:obj:`np.ndarray` or `list`): An array representing one single data point. Array needs to be 2D. y (`int`): Output value for the data point. Takes value between 1 and -1. Returns: None Raises: IndexError: if the value x is not 2D. """ decision = self.weights.dot(x) prediction = np.sign(decision) c = self._C / math.sqrt(self._t) # Changed the parameters to call the loss function as it seems they # expect at least two values if self._loss_function == hinge_loss: loss = self._loss_function([y, -y], [decision, -decision]) else: loss = self._loss_function([y], [prediction]) if loss > 0: if self._loss_function == log_loss: self.weights = (self.weights + c * y * x * (1 / (1 + math.exp(y * decision))) * self.class_weight_[y]) elif self._loss_function == mean_squared_error: self.weights = (self.weights - c * ((decision - y) * x * self.class_weight_[y])) else: self.weights = self.weights + c * y * x self._t += 1
[docs] def get_params(self, deep=True): """Get parameters for this estimator. This function is for use with hyper-parameter tuning utilities such as `GridSearchCV`_. Args: deep(:obj:`bool`, optional): If True, will return the parameters for this estimator and contained sub-objects that are estimators. Defaults to True. .. _GridSearchCV: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html """ params = super().get_params() params['C'] = self._C params['loss_function'] = self._loss_function return params
def _setup(self, X): """Initializes the values for the model' parameters. Based on the data in argument, this method initializes the parameters `t` (number of iterations). Args: X (:obj:`numpy.ndarray`): Input data with n rows and m columns Returns: None """ self._t = 1