Source code for node_covariance

# This file is part of Sympathy for Data.
# Copyright (c) 2018, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.

import numpy as np
from sklearn.covariance import EmpiricalCovariance

from sympathy.api import node
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags
from sympathy.api import exceptions


[docs] class NodeCovarianceMatrix(node.Node): """ Maximum likelihood covariance estimator """ author = 'Mathias Broxvall' icon = 'covariance.svg' description = 'Computes the co-variance matrix for a dataset' name = 'Covariance Matrix' tags = Tags(Tag.Analysis.Features) nodeid = 'com.sympathyfordata.advancedmachinelearning.covariance_matrix' parameters = node.parameters() parameters.set_boolean( 'name', value=True, label='Create name column', description=( 'Creates an additional column first in the data showing the ' 'original column name for each corresponding row')) parameters.set_boolean( 'assume_centered', value=False, label='Skip centering data', description=( 'If True, data are not centered before computation. ' 'Useful when working with data whose mean is almost, but not ' 'exactly zero. If False (default), data are centered before ' 'computation')) inputs = Ports([ Port.Table('data', name='data'), ]) outputs = Ports([ Port.Table('Table with results', name='result'), ]) def execute(self, node_context): data_tbl = node_context.input['data'] if data_tbl.number_of_rows() == 0: raise exceptions.SyDataError("Empty table") output = node_context.output['result'] create_name = node_context.parameters['name'].value assume_cent = node_context.parameters['assume_centered'].value data = np.column_stack([col.data for col in data_tbl.cols()]) col_names = [col.name for col in data_tbl.cols()] cov = EmpiricalCovariance( assume_centered=assume_cent).fit(data).covariance_ if create_name: output.set_column_from_array("name", np.array(col_names)) for i in range(cov.shape[1]): output.set_column_from_array(col_names[i], cov[:, i])