Source code for gammapy.utils.cluster

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Utilities for hierarchical/agglomerative clustering."""
import numpy as np
import scipy.cluster.hierarchy as sch

__all__ = ["standard_scaler", "hierarchical_clustering"]


[docs]def standard_scaler(features): r"""Compute standardized features by removing the mean and scaling to unit variance. Calculated through: .. math:: f_\text{scaled} = \frac{f-\text{mean}(f)}{\text{std}(f)} . Parameters ---------- features : `~astropy.table.Table` Table containing the features. Returns ------- scaled_features : `~astropy.table.Table` Table containing the scaled features (dimensionless). """ scaled_features = features.copy() for col in scaled_features.columns: if col not in ["obs_id", "dataset_name"]: data = scaled_features[col].data scaled_features[col] = (data - data.mean()) / data.std() return scaled_features
[docs]def hierarchical_clustering(features, linkage_kwargs=None, fcluster_kwargs=None): """Hierarchical clustering using given features. Parameters ---------- features : `~astropy.table.Table` Table containing the features. linkage_kwargs : dict, optional Arguments forwarded to `scipy.cluster.hierarchy.linkage`. Default is None, which uses method="ward" and metric="euclidean". fcluster_kwargs : dict, optional Arguments forwarded to `scipy.cluster.hierarchy.fcluster`. Default is None, which uses criterion="maxclust" and t=3. Returns ------- features : `~astropy.table.Table` Table containing the features and an extra column for the groups labels. """ features = features.copy() features_array = np.array( [ features[col].data for col in features.columns if col not in ["obs_id", "dataset_name"] ] ).T default_linkage_kwargs = dict(method="ward", metric="euclidean") if linkage_kwargs is not None: default_linkage_kwargs.update(linkage_kwargs) pairwise_distances = sch.distance.pdist(features_array) linkage = sch.linkage(pairwise_distances, **default_linkage_kwargs) default_fcluster_kwargs = dict(criterion="maxclust", t=3) if fcluster_kwargs is not None: default_fcluster_kwargs.update(fcluster_kwargs) labels = sch.fcluster(linkage, **default_fcluster_kwargs) features["labels"] = labels return features