import numpy as np
from scipy.special import softmax
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.mixture import BayesianGaussianMixture
from sklearn.utils import check_X_y
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
[docs]class BayesianGMMClassifier(BaseEstimator, ClassifierMixin):
def __init__(
self,
n_components=1,
covariance_type="full",
tol=0.001,
reg_covar=1e-06,
max_iter=100,
n_init=1,
init_params="kmeans",
weight_concentration_prior_type="dirichlet_process",
weight_concentration_prior=None,
mean_precision_prior=None,
mean_prior=None,
degrees_of_freedom_prior=None,
covariance_prior=None,
random_state=None,
warm_start=False,
verbose=0,
verbose_interval=10,
):
"""
The BayesianGMMClassifier trains a Gaussian Mixture Model for each class in y on a dataset X. Once
a density is trained for each class we can evaluate the likelihood scores to see which class
is more likely. All parameters of the model are an exact copy of the parameters in scikit-learn.
"""
self.n_components = n_components
self.covariance_type = covariance_type
self.tol = tol
self.reg_covar = reg_covar
self.max_iter = max_iter
self.n_init = n_init
self.init_params = init_params
self.weight_concentration_prior_type = weight_concentration_prior_type
self.weight_concentration_prior = weight_concentration_prior
self.mean_precision_prior = mean_precision_prior
self.mean_prior = mean_prior
self.degrees_of_freedom_prior = degrees_of_freedom_prior
self.covariance_prior = covariance_prior
self.random_state = random_state
self.warm_start = warm_start
self.verbose = verbose
self.verbose_interval = verbose_interval
[docs] def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
"""
Fit the model using X, y as training data.
:param X: array-like, shape=(n_columns, n_samples, ) training data.
:param y: array-like, shape=(n_samples, ) training data.
:return: Returns an instance of self.
"""
X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
if X.ndim == 1:
X = np.expand_dims(X, 1)
self.gmms_ = {}
self.classes_ = unique_labels(y)
for c in self.classes_:
subset_x, subset_y = X[y == c], y[y == c]
mixture = BayesianGaussianMixture(
n_components=self.n_components,
covariance_type=self.covariance_type,
tol=self.tol,
reg_covar=self.reg_covar,
max_iter=self.max_iter,
n_init=self.n_init,
init_params=self.init_params,
weight_concentration_prior_type=self.weight_concentration_prior_type,
weight_concentration_prior=self.weight_concentration_prior,
mean_precision_prior=self.mean_precision_prior,
mean_prior=self.mean_prior,
degrees_of_freedom_prior=self.degrees_of_freedom_prior,
covariance_prior=self.covariance_prior,
random_state=self.random_state,
warm_start=self.warm_start,
verbose=self.verbose,
verbose_interval=self.verbose_interval,
)
self.gmms_[c] = mixture.fit(subset_x, subset_y)
return self
[docs] def predict(self, X):
check_is_fitted(self, ["gmms_", "classes_"])
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
return self.classes_[self.predict_proba(X).argmax(axis=1)]
[docs] def predict_proba(self, X):
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
check_is_fitted(self, ["gmms_", "classes_"])
res = np.zeros((X.shape[0], self.classes_.shape[0]))
for idx, c in enumerate(self.classes_):
res[:, idx] = self.gmms_[c].score_samples(X)
return softmax(res, axis=1)