Source code for sklego.meta.thresholder

import numpy as np
from sklearn import clone
from sklearn.base import (
    BaseEstimator,
    ClassifierMixin,
)
from sklearn.utils.validation import (
    check_is_fitted
)
from sklearn.exceptions import NotFittedError

from sklego.base import ProbabilisticClassifier


[docs]class Thresholder(BaseEstimator, ClassifierMixin):
    """
    Takes a two class estimator and moves the threshold. This way you might
    design the algorithm to only accept a certain class if the probability
    for it is larger than, say, 90% instead of 50%.

    :param model: the moddel to threshold
    :param threshold: the actual threshold to use
    :param refit: if True, we will always retrain the model even if it is already fitted.
    If False we only refit if the original model isn't fitted.
    """

    def __init__(self, model, threshold: float, refit=False):
        self.model = model
        self.threshold = threshold
        self.refit = refit

    def _handle_refit(self, X, y, sample_weight=None):
        """Only refit when we need to, unless refit=True is present."""
        if self.refit:
            self.estimator_ = clone(self.model)
            self.estimator_.fit(X, y, sample_weight=sample_weight)
        else:
            try:
                _ = self.estimator_.predict(X[:1])
            except NotFittedError:
                self.estimator_.fit(X, y, sample_weight=sample_weight)

[docs]    def fit(self, X, y, sample_weight=None):
        """
        Fit the data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :param sample_weight: array-like, shape=(n_samples) Individual weights for each sample.
        :return: Returns an instance of self.
        """
        self.estimator_ = self.model
        if not isinstance(self.estimator_, ProbabilisticClassifier):
            raise ValueError(
                "The Thresholder meta model only works on classification models with .predict_proba."
            )
        self._handle_refit(X, y, sample_weight)
        self.classes_ = self.estimator_.classes_
        if len(self.classes_) != 2:
            raise ValueError(
                "The Thresholder meta model only works on models with two classes."
            )
        return self

[docs]    def predict(self, X):
        """
        Predict new data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :return: array, shape=(n_samples,) the predicted data
        """
        check_is_fitted(self, ["classes_", "estimator_"])
        predicate = self.estimator_.predict_proba(X)[:, 1] > self.threshold
        return np.where(predicate, self.classes_[1], self.classes_[0])

[docs]    def predict_proba(self, X):
        check_is_fitted(self, ["classes_", "estimator_"])
        return self.estimator_.predict_proba(X)

[docs]    def score(self, X, y):
        return self.estimator_.score(X, y)