Module timexseries_clustering.data_clustering.models.gmm_cluster
Expand source code
import itertools
import json
import pkgutil
import logging
import os
import numpy as np
import pandas as pd
from timexseries_clustering.data_clustering.models.predictor import ModelResult, SingleResult
from timexseries_clustering.data_clustering.validation_performances import ValidationPerformance
from timexseries_clustering.data_clustering import ClustersModel
from timexseries_clustering.data_clustering.transformation import transformation_factory
from pandas import DataFrame
from sklearn import mixture
logging.getLogger('GaussianMixtureModel').setLevel(logging.WARNING)
log = logging.getLogger(__name__)
def GaussianMixtureModel(ingested_data: DataFrame, clustering_approach: str, distance_metric: str,
param_config: dict, transformation: str = None, n_clusters: int = 3)->ModelResult:
"""
Gaussian Mixture Clustering Model
Parameters
----------
clustering_approach : str
Clustering approach, e.g. "observation_based"
param_config : dict
TIMEX configuration dictionary, to pass to the just created model.
distance_metric : str, e.g. **
Distance/similarity measure type, e.g. "euclidean, dtw, softdtw" **
transformation : str, optional, default None
Optional `transformation` parameter to pass to the just created model.
n_clusters : int, optional, default 3
Optional `number of clusters` parameter to pass to the just created model.
Returns
-------
ModelResult
Model Result of the class specified in `model_class`, it contains the
results of the best clustering with the index of the cluster that each
time series belongs to. Contains also the model characteristics and the
centers of each cluster.
"""
seed=0
model_centers = []
model_characteristics = {}
try:
gamma = param_config["model_parameters"]["gamma"]
except KeyError:
gamma = 0.01
try:
pre_transformation = param_config["model_parameters"]["pre_transformation"]
except KeyError:
pre_transformation = "none"
X = ingested_data.copy().transpose()
gmm = mixture.GaussianMixture(n_components=n_clusters, covariance_type='full', reg_covar=5e-05, verbose=False, random_state=seed)
best_clusters = gmm.fit_predict(X.values)
model_centers = gmm.means_
model_centers_dataframe = pd.DataFrame(model_centers).T
model_centers_dataframe = model_centers_dataframe.set_index(ingested_data.index.date)
inverse_pre_transf = transformation_factory(pre_transformation)
model_centers_dataframe = inverse_pre_transf.inverse(model_centers_dataframe.copy())
model_characteristics["clustering_approach"] = clustering_approach
model_characteristics["model"] = "Gaussian Mixture Model"
model_characteristics["distance_metric"] = "Log-likelihood"
model_characteristics["n_clusters"] = n_clusters
model_characteristics["feature_transformation"] = transformation
model_characteristics["pre_transformation"] = pre_transformation
performance = ValidationPerformance()
performance.set_performance_stats(X.values, best_clusters)
single_result = SingleResult(model_characteristics, performance)
return ModelResult(best_clustering=best_clusters, results=[single_result],characteristics=model_characteristics,
cluster_centers=model_centers_dataframe)
Functions
def GaussianMixtureModel(ingested_data: pandas.core.frame.DataFrame, clustering_approach: str, distance_metric: str, param_config: dict, transformation: str = None, n_clusters: int = 3) ‑> ModelResult
-
Gaussian Mixture Clustering Model
Parameters
clustering_approach
:str
- Clustering approach, e.g. "observation_based"
param_config
:dict
- TIMEX configuration dictionary, to pass to the just created model.
distance_metric
:str, e.g. **
- Distance/similarity measure type, e.g. "euclidean, dtw, softdtw" **
transformation
:str
, optional, defaultNone
- Optional
transformation
parameter to pass to the just created model. n_clusters
:int
, optional, default3
- Optional
number of clusters
parameter to pass to the just created model.
Returns
ModelResult
- Model Result of the class specified in
model_class
, it contains the results of the best clustering with the index of the cluster that each time series belongs to. Contains also the model characteristics and the centers of each cluster.
Expand source code
def GaussianMixtureModel(ingested_data: DataFrame, clustering_approach: str, distance_metric: str, param_config: dict, transformation: str = None, n_clusters: int = 3)->ModelResult: """ Gaussian Mixture Clustering Model Parameters ---------- clustering_approach : str Clustering approach, e.g. "observation_based" param_config : dict TIMEX configuration dictionary, to pass to the just created model. distance_metric : str, e.g. ** Distance/similarity measure type, e.g. "euclidean, dtw, softdtw" ** transformation : str, optional, default None Optional `transformation` parameter to pass to the just created model. n_clusters : int, optional, default 3 Optional `number of clusters` parameter to pass to the just created model. Returns ------- ModelResult Model Result of the class specified in `model_class`, it contains the results of the best clustering with the index of the cluster that each time series belongs to. Contains also the model characteristics and the centers of each cluster. """ seed=0 model_centers = [] model_characteristics = {} try: gamma = param_config["model_parameters"]["gamma"] except KeyError: gamma = 0.01 try: pre_transformation = param_config["model_parameters"]["pre_transformation"] except KeyError: pre_transformation = "none" X = ingested_data.copy().transpose() gmm = mixture.GaussianMixture(n_components=n_clusters, covariance_type='full', reg_covar=5e-05, verbose=False, random_state=seed) best_clusters = gmm.fit_predict(X.values) model_centers = gmm.means_ model_centers_dataframe = pd.DataFrame(model_centers).T model_centers_dataframe = model_centers_dataframe.set_index(ingested_data.index.date) inverse_pre_transf = transformation_factory(pre_transformation) model_centers_dataframe = inverse_pre_transf.inverse(model_centers_dataframe.copy()) model_characteristics["clustering_approach"] = clustering_approach model_characteristics["model"] = "Gaussian Mixture Model" model_characteristics["distance_metric"] = "Log-likelihood" model_characteristics["n_clusters"] = n_clusters model_characteristics["feature_transformation"] = transformation model_characteristics["pre_transformation"] = pre_transformation performance = ValidationPerformance() performance.set_performance_stats(X.values, best_clusters) single_result = SingleResult(model_characteristics, performance) return ModelResult(best_clustering=best_clusters, results=[single_result],characteristics=model_characteristics, cluster_centers=model_centers_dataframe)