Source code for sklego.dummy

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_X_y
from sklearn.utils.validation import (
    check_is_fitted,
    check_array,
    check_random_state,
    FLOAT_DTYPES,
)


[docs]class RandomRegressor(BaseEstimator, RegressorMixin): """ A RandomRegressor makes random predictions only based on the "y" value that is seen. The goal is that such a regressor can be used for benchmarking. It should be easily beatable. :param str strategy: how we want to select random values, can be "uniform" or "normal" :param int seed: the seed value, default: 42 """ def __init__(self, strategy="uniform", random_state=None): self.allowed_strategies = ("uniform", "normal") self.random_state = random_state self.strategy = strategy
[docs] def fit(self, X: np.array, y: np.array) -> "RandomRegressor": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ if self.strategy not in self.allowed_strategies: raise ValueError( f"strategy {self.strategy} is not in {self.allowed_strategies}" ) X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.dim_ = X.shape[1] self.min_ = np.min(y) self.max_ = np.max(y) self.mu_ = np.mean(y) self.sigma_ = np.std(y) return self
[docs] def predict(self, X): """ Predict new data by making random guesses. :param X: array-like, shape=(n_columns, n_samples,) training data. :return: array, shape=(n_samples,) the predicted data """ rs = check_random_state(self.random_state) check_is_fitted(self, ["dim_", "min_", "max_", "mu_", "sigma_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) if X.shape[1] != self.dim_: raise ValueError( f"Unexpected input dimension {X.shape[1]}, expected {self.dim_}" ) if self.strategy == "normal": return rs.normal(self.mu_, self.sigma_, X.shape[0]) if self.strategy == "uniform": return rs.uniform(self.min_, self.max_, X.shape[0])