# Licensed under a 3-clause BSD style license - see LICENSE.rst"""Utilities for hierarchical/agglomerative clustering."""importnumpyasnpimportscipy.cluster.hierarchyassch__all__=["standard_scaler","hierarchical_clustering"]
[docs]defstandard_scaler(features):r"""Compute standardized features by removing the mean and scaling to unit variance. Calculated through: .. math:: f_\text{scaled} = \frac{f-\text{mean}(f)}{\text{std}(f)} . Parameters ---------- features : `~astropy.table.Table` Table containing the features. Returns ------- scaled_features : `~astropy.table.Table` Table containing the scaled features (dimensionless). """scaled_features=features.copy()forcolinscaled_features.columns:ifcolnotin["obs_id","dataset_name"]:data=scaled_features[col].datascaled_features[col]=(data-data.mean())/data.std()returnscaled_features
[docs]defhierarchical_clustering(features,linkage_kwargs=None,fcluster_kwargs=None):"""Hierarchical clustering using given features. Parameters ---------- features : `~astropy.table.Table` Table containing the features. linkage_kwargs : dict, optional Arguments forwarded to `scipy.cluster.hierarchy.linkage`. Default is None, which uses method="ward" and metric="euclidean". fcluster_kwargs : dict, optional Arguments forwarded to `scipy.cluster.hierarchy.fcluster`. Default is None, which uses criterion="maxclust" and t=3. Returns ------- features : `~astropy.table.Table` Table containing the features and an extra column for the groups labels. """features=features.copy()features_array=np.array([features[col].dataforcolinfeatures.columnsifcolnotin["obs_id","dataset_name"]]).Tdefault_linkage_kwargs=dict(method="ward",metric="euclidean")iflinkage_kwargsisnotNone:default_linkage_kwargs.update(linkage_kwargs)pairwise_distances=sch.distance.pdist(features_array)linkage=sch.linkage(pairwise_distances,**default_linkage_kwargs)default_fcluster_kwargs=dict(criterion="maxclust",t=3)iffcluster_kwargsisnotNone:default_fcluster_kwargs.update(fcluster_kwargs)labels=sch.fcluster(linkage,**default_fcluster_kwargs)features["labels"]=labelsreturnfeatures