Source code for sklego.preprocessing.patsytransformer

import numpy as np
from patsy import dmatrix, build_design_matrices, PatsyError
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted


[docs]class PatsyTransformer(TransformerMixin, BaseEstimator): """ The patsy transformer offers a method to select the right columns from a dataframe as well as a DSL for transformations. It is inspired from R formulas. This is can be useful as a first step in the pipeline. :param formula: a patsy-compatible formula :return_type: Either "matrix" or "dataframe", passed on to patsy """ def __init__(self, formula, return_type="matrix"): self.formula = formula self.return_type = return_type
[docs] def fit(self, X, y=None): """Fits the estimator""" X_ = dmatrix(self.formula, X, NA_action="raise", return_type=self.return_type) # check the number of observations hasn't changed. This ought not to # be necessary given NA_action='raise' above but just to be safe assert np.array(X_).shape[0] == np.array(X).shape[0] self.design_info_ = X_.design_info return self
[docs] def transform(self, X): """ Applies the formula to the matrix/dataframe X. Returns - A patsy.DesignMatrix, if return_type="matrix" (the default) - A pandas.DataFrame, if return_type="dataframe" """ check_is_fitted(self, "design_info_") try: return build_design_matrices( [self.design_info_], X, return_type=self.return_type )[0] except PatsyError as e: raise RuntimeError from e