Source code for dmae.initializers

# -*- coding: utf-8 -*-
"""
The :mod:`dmae.initializers` module implements some initializers for DMAE.
"""
# Author: Juan S. Lara <julara@unal.edu.co>
# License: MIT

import numpy as _np
import tensorflow as _tf
from tensorflow.keras.initializers import Initializer as _Initializer
from dmae import dissimilarities as _dissimilarities

[docs]class InitPlusPlus(_Initializer): """ A tf.keras initializer based on K-Means++ that allows dissimilarities. Parameters ---------- X: array-like, shape=(n_samples, n_features) Input data. n_clusters: int Number of clusters. dissimilarity: function, default: :mod:`dmae.dissimilarities.euclidean` A tensorflow function that computes a paiwise dissimilarity function between a batch of points and the cluster's parameters. iters: int, default: 100 Number of interations to run the K-means++ initialization. """ def __init__(self, X, n_clusters, dissimilarity=_dissimilarities.euclidean, iters=100): self.__X = X self.__n_clusters = n_clusters self.__dissimilarity = dissimilarity self.__iters = iters def __call__(self, shape, dtype): """ Estimates `n_clusters` using K-means++ Parameters ---------- shape : tuple Expected parameter shape. dtype : str Parameter's type. Returns ------- init_vals : array-like, shape=(n_clusters, n_features) Matrix with the initial weights. """ idx = _np.arange(self.__X.shape[0]) _np.random.shuffle(idx) selected = idx[:self.__n_clusters] init_vals = self.__X[idx[:self.__n_clusters]] for i in range(self.__iters): clus_sim = self.__dissimilarity( init_vals, init_vals ).numpy() _np.fill_diagonal( clus_sim, _np.inf ) candidate = self.__X[ _np.random.randint( self.__X.shape[0] ) ].reshape(1, -1) candidate_sims = self.__dissimilarity( candidate, init_vals ).numpy().flatten() closest_sim = candidate_sims.min() closest = candidate_sims.argmin() if closest_sim>clus_sim.min(): replace_candidates_idx = _np.array( _np.unravel_index( clus_sim.argmin(), clus_sim.shape ) ) replace_candidates = init_vals[replace_candidates_idx, :] closest_sim = self.__dissimilarity( candidate, replace_candidates ).numpy().flatten() replace = _np.argmin(closest_sim) init_vals[replace_candidates_idx[replace]] = candidate else: candidate_sims[candidate_sims.argmin()] = _np.inf second_closest = candidate_sims.argmin() if candidate_sims[second_closest] > clus_sim[closest].min(): init_vals[closest] = candidate return _tf.cast(init_vals, dtype)
[docs]class InitKMeans(_Initializer): """ A tf.keras initializer to assign the clusters from a sklearn's KMeans model. Parameters ---------- kmeans_model: :mod:`sklearn.cluster.KMeans` Pretrained KMeans model to initialize DMAE. """ def __init__(self, kmeans_model): self.__kmeans = kmeans_model def __call__(self, shape, dtype): """ Converts KMeans centroids into tensors. Parameters ---------- shape : tuple Expected parameter shape. dtype : str Parameter's type. Returns ------- init_vals : array-like, shape=(n_clusters, n_features) Matrix with the initial weights. """ return _tf.cast( self.__kmeans.cluster_centers_, dtype )
[docs]class InitIdentityCov(_Initializer): """ A tf.keras initializer to assign identity matrices to the covariance parameters. Parameters ---------- X: array-like, shape=(n_samples, n_features) Input data. n_clusters: int Number of clusters. """ def __init__(self, X, n_clusters): self.__X = X self.__n_clusters = n_clusters def __call__(self, shape, dtype): """ Generates identity matrices for the given shape and type. Parameters ---------- shape : tuple Expected parameter shape. dtype : str Parameter's type. Returns ------- init_vals : array-like, shape=(n_clusters, n_features) Matrix with the initial weights. """ return _tf.eye(self.__X.shape[1], batch_shape=[self.__n_clusters])
[docs]class InitKMeansCov(_Initializer): """ A tf.keras initializer to compute covariance matrices from K-means. Parameters ---------- kmeans_model: :mod:`sklearn.cluster.KMeans` Pretrained KMeans model to initialize DMAE. X: array-like, shape=(n_samples, n_features) Input data. n_clusters: int Number of clusters. """ def __init__(self, kmeans_model, X, n_clusters): self.__kmeans_model = kmeans_model self.__X = X self.__n_clusters = n_clusters def __call__(self, shape, dtype): """ Computes covariance matrices from the KMeans predictions. Parameters ---------- shape : tuple Expected parameter shape. dtype : str Parameter's type. Returns ------- init_vals : array-like, shape=(n_clusters, n_features) Matrix with the initial weights. """ res = [] preds = self.__kmeans_model.predict(self.__X) for i in range(self.__n_clusters): clus_points = self.__X[preds==i] res.append( _np.expand_dims( _np.linalg.cholesky( _np.linalg.inv( _np.cov(clus_points.T) ) ), axis=0 ) ) return _tf.cast( _np.concatenate(res, axis=0), dtype )