Source code for sklego.dummy

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_X_y
from sklearn.utils.validation import (
    check_is_fitted,
    check_array,
    check_random_state,
    FLOAT_DTYPES,
)


[docs]class RandomRegressor(BaseEstimator, RegressorMixin):
    """
    A RandomRegressor makes random predictions only based on the "y"
    value that is seen. The goal is that such a regressor can be used
    for benchmarking. It should be easily beatable.

    :param str strategy: how we want to select random values, can be "uniform" or "normal"
    :param int seed: the seed value, default: 42
    """

    def __init__(self, strategy="uniform", random_state=None):
        self.allowed_strategies = ("uniform", "normal")
        self.random_state = random_state
        self.strategy = strategy

[docs]    def fit(self, X: np.array, y: np.array) -> "RandomRegressor":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        if self.strategy not in self.allowed_strategies:
            raise ValueError(
                f"strategy {self.strategy} is not in {self.allowed_strategies}"
            )
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.dim_ = X.shape[1]

        self.min_ = np.min(y)
        self.max_ = np.max(y)
        self.mu_ = np.mean(y)
        self.sigma_ = np.std(y)

        return self

[docs]    def predict(self, X):
        """
        Predict new data by making random guesses.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :return: array, shape=(n_samples,) the predicted data
        """
        rs = check_random_state(self.random_state)
        check_is_fitted(self, ["dim_", "min_", "max_", "mu_", "sigma_"])

        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        if X.shape[1] != self.dim_:
            raise ValueError(
                f"Unexpected input dimension {X.shape[1]}, expected {self.dim_}"
            )

        if self.strategy == "normal":
            return rs.normal(self.mu_, self.sigma_, X.shape[0])
        if self.strategy == "uniform":
            return rs.uniform(self.min_, self.max_, X.shape[0])