Source code for sklego.preprocessing.dictmapper

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted


[docs]class DictMapper(TransformerMixin, BaseEstimator): """ Map the values of values of columns according to the input dictionary, fall back to the default if the key is not present in the dictionary. :param mapper: The dictionary containing the mapping of the values :param default: The value to fall back to if the value is not in the mapper """ def __init__(self, mapper, default): self.mapper = mapper self.default = default
[docs] def fit(self, X, y=None): """ Checks the input dataframe and records the shape of it :type X: pandas.DataFrame or numpy.ndarray :param X: The column(s) from which the mapping will be applied :param y: Ignored. :rtype: sklego.preprocessing.DictMapper :returns: The fitted object. """ X = check_array( X, copy=True, estimator=self, force_all_finite=True, dtype=None, ensure_2d=True, ) self.dim_ = X.shape[1] return self
[docs] def transform(self, X): """ Performs the mapping on the column(s) of ``X``. :type X: pandas.DataFrame or numpy.ndarray :param X: The column(s) for which the mapping will be applied. :rtype: numpy.ndarray :returns: ``X`` values with the mapping applied :raises: ``ValueError`` if the number of columns from ``X`` differs from the number of columns when fitting """ check_is_fitted(self, ["dim_"]) X = check_array( X, copy=True, estimator=self, force_all_finite=True, dtype=None, ensure_2d=True, ) if X.shape[1] != self.dim_: raise ValueError( f"number of columns {X.shape[1]} does not match fit size {self.dim_}" ) return np.vectorize(self.mapper.get, otypes=[int])(X, self.default)