Source code for node_analysis

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.

import numpy as np

from sklearn.cluster import SpectralCoclustering

from sympathy.api import node
from sympathy.api.nodeconfig import Port
from sympathy.api.nodeconfig import Ports
from sympathy.api.nodeconfig import Tag
from sympathy.api.nodeconfig import Tags
from sympathy.api.exceptions import SyDataError
from sympathy.api import table

from sylib.machinelearning.utility import table_to_array


[docs] class SpectralCoClusteringNode(node.Node): author = 'Mathias Broxvall' icon = 'spectral_coclustering.svg' description = ('Rearranges rows/columns so that cluster intra-connections ' 'become adjacent and move towards the diagonal.') name = 'Spectral co-clustering' tags = Tags(Tag.MachineLearning.Unsupervised) nodeid = 'com.sympathyfordata.advancedmachinelearning' \ '.spectral_co_clustering' parameters = node.parameters() parameters.set_boolean( 'name', value=False, label='Create name column', description='Creates an additional column first in the data showing ' 'the original column name for each corresponding row') parameters.set_boolean( 'clust_col', value=False, label='Create cluster column', description='Creates an additional column first in the data showing ' 'cluster ID that each row belongs to') parameters.set_boolean( 'sub_clustering', value=True, label='Sub clustering', description='Performs one level of additional clustering to order ' 'data within a cluster') parameters.set_integer( 'clusters', value=3, label='Clusters', description='Number of clusters to look for') inputs = Ports([ Port.Table('data', name='data'), # Port.Table('weights', name='weights', n=(0, 1, 0)), ]) outputs = Ports([ Port.Table('Table with results', name='result'), Port.Tables('List of columns in each cluster', name='clusters', n=(0, 1, 0)), ]) def execute(self, node_context): data_tbl = node_context.input['data'] output = node_context.output['result'] out_clusters = node_context.output['clusters'] create_name = node_context.parameters['name'].value n_clusters = node_context.parameters['clusters'].value sub_clustering = node_context.parameters['sub_clustering'].value clust_col = node_context.parameters['clust_col'].value if data_tbl.number_of_rows() == 0: raise SyDataError("Empty table") data = table_to_array(data_tbl) if data.shape[1] != data.shape[0]: raise SyDataError('Input table must have exactly as many columns ' 'as rows') col_names = [col.name for col in data_tbl.cols()] clustering = SpectralCoclustering(n_clusters) clustering.fit(data) order = [] orig = np.arange(data.shape[0]) clust_col = [] for cluster in range(n_clusters): indices = orig[clustering.rows_[cluster, :]] if sub_clustering and len(indices) > 2: n_subclusters = max(2, int(len(indices)/2)) print("n subclusters: ", n_subclusters, len(indices)) L = [] for i in indices: L += [data[indices, i]] sub_data = np.column_stack(L) print("sub_data:\n", sub_data) print("sub_data: ", sub_data.shape) print("sub_data: ", sub_data.dtype) subclustering = SpectralCoclustering(n_subclusters) subclustering.fit(sub_data) sub_order = [] sub_clusters = [] for sub_cluster in range(n_subclusters): sub_indices = indices[subclustering.rows_[sub_cluster, :]] sub_order += list(sub_indices) sub_clusters += [sub_cluster]*len(sub_indices) indices = sub_order elif sub_clustering: sub_clusters = [0] * len(indices) order = order + list(indices) tbl = table.File() tbl.set_column_from_array("idx", np.array(indices)) tbl.set_column_from_array("name", np.array(col_names)[np.array(indices)]) if sub_clustering: tbl.set_column_from_array("subcluster", np.array(sub_clusters)) out_clusters.append(tbl) clust_col += [cluster]*len(indices) if create_name: output.set_column_from_array("name", np.array(col_names)[order]) if clust_col: output.set_column_from_array("cluster", np.array(clust_col)) for i in range(data.shape[1]): output.set_column_from_array( col_names[order[i]], data[:, order[i]][order])