Source code for node_decomposition

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import sklearn
import sklearn.decomposition
import sklearn.cross_decomposition

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.utility import names_from_x
from sylib.machinelearning.utility import names_from_y
from sylib.machinelearning.utility import names_from_prefix
from sylib.machinelearning.descriptors import Descriptor

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType


[docs]class PrincipalComponentAnalysis(SyML_abstract, node.Node): name = 'Principal Component Analysis (PCA)' author = 'Mathias Broxvall' version = '0.1' icon = 'PCA.svg' description = ( 'Linear dimensionality reduction using Singular Value Decomposition ' 'of the data to project it to a lower dimensional space.') nodeid = 'org.sysess.sympathy.machinelearning.pca' tags = Tags(Tag.MachineLearning.DimensionalityReduction) descriptor = Descriptor() descriptor.name = name info = [ {'name': 'n_components', 'dispname': 'Number of components to keep', 'type': UnionType([IntType(min_value=1), FloatType(min_value=0, max_value=1), StringSelectionType(['mle'])], default=1)}, {'name': 'svd_solver', 'dispname': 'Solver', 'type': StringSelectionType( ['auto', 'full', 'arpack', 'randomized'], default='auto')}, {'name': 'tol', 'dispname': 'Tolerance for singular values', 'type': FloatType(default=0.0)}, {'name': 'iterated_power', 'dispname': 'N. of iteratins (for randomized solver)', 'type': UnionType( [IntType(min_value=0), StringSelectionType(['auto'])], default='auto')}, {'name': 'whiten', 'dispname': 'Whiten', 'type': BoolType(default=False)}, ] descriptor.set_info(info, doc_class=sklearn.decomposition.PCA) descriptor.set_attributes([ {'name': 'components_', 'cnames': names_from_x}, {'name': 'explained_variance_'}, {'name': 'explained_variance_ratio_'}, {'name': 'mean_', 'cnames': names_from_x}, {'name': 'n_components_'}, {'name': 'noise_variance_'}, ], doc_class=sklearn.decomposition.PCA) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.decomposition.PCA(**kwargs) model.set_skl(skl) model.save()
[docs]class KernelPCA(SyML_abstract, node.Node): name = 'Kernel Principal Component Analysis (KPCA)' author = 'Mathias Broxvall' version = '0.1' icon = 'PCA.svg' description = ( 'Non-linear dimensionality reduction through the use of kernels') nodeid = 'org.sysess.sympathy.machinelearning.kpca' tags = Tags(Tag.MachineLearning.DimensionalityReduction) descriptor = Descriptor() descriptor.name = name info = [ [ "Model options", {'name': 'n_components', 'dispname': 'Number of components', 'type': UnionType( [IntType(min_value=1), NoneType()], default=None)}, {'name': 'kernel', 'dispname': 'Kernel', 'type': StringSelectionType( ['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'], default='linear')}, {'name': 'fit_inverse_transform', 'dispname': 'Fit inverse-transform', 'type': BoolType(default=False)}, ], [ "Advanced options", {'name': 'degree', 'dispname': 'Poly kernel degree', 'type': IntType(min_value=1, default=3)}, {'name': 'gamma', 'dispname': 'Kernel coefficient (poly, rbf, sigmoid)', 'type': UnionType([ FloatType(min_value=0.0), NoneType()], default=None)}, {'name': 'coef0', 'dispname': 'Independent term (poly, sigmoid)', 'type': FloatType(min_value=0.0, default=1)}, {'name': 'alpha', 'dispname': 'Ridge regression hyperparameter', 'type': FloatType(min_value=0.0, default=1)}, {'name': 'remove_zero_eig', 'dispname': 'Remove components with zero eigenvalue', 'type': BoolType(default=False)}, ], [ "Solver", {'name': 'eigen_solver', 'dispname': 'Eigensolver', 'type': StringSelectionType([ 'auto', 'dense', 'arpack'], default='auto')}, {'name': 'tol', 'dispname': 'Tolerance', 'type': FloatType(default=0.0)}, {'name': 'max_iter', 'dispname': 'Max iteratins', 'type': UnionType([IntType(min_value=1), NoneType()], default=None)}, {'name': 'random_state', 'dispname': 'Random seed', 'type': UnionType([NoneType(), IntType()], default=None)}, {'name': 'n_jobs', 'dispname': 'number of jobs', 'type': IntType(min_value=-1, default=1)}, ] ] descriptor.set_info(info, doc_class=sklearn.decomposition.KernelPCA) descriptor.set_attributes([ {'name': 'lambdas_'}, {'name': 'alphas_'}, {'name': 'dual_coef_', 'cnames': names_from_x}, {'name': 'X_transformed_fit_'}, {'name': 'X_fit_'}, ], doc_class=sklearn.decomposition.KernelPCA) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) kwargs['copy_X'] = True skl = sklearn.decomposition.KernelPCA(**kwargs) model.set_skl(skl) model.save()
[docs]class PLSRegressionCrossDecomposition(SyML_abstract, node.Node): name = 'Partial Least Squares cross-decomposition (PLS regression)' author = 'Mathias Broxvall' version = '0.1' icon = 'PCA.svg' description = ( 'Finds the fundamental relations between two matrices X and Y, ie. ' 'it finds the (multidimensional) direction in X that best explains ' 'maximum multidimensional direction in Y. See also PCA-analysis') nodeid = 'org.sysess.sympathy.machinelearning.pls' tags = Tags(Tag.MachineLearning.DimensionalityReduction) descriptor = Descriptor() descriptor.name = name info = [ {'name': 'n_components', 'dispname': 'Number of components to keep', 'type': IntType(min_value=1, default=2)}, {'name': 'scale', 'dispname': 'Scale the data', 'type': BoolType(default=True)}, {'name': 'max_iter', 'dispname': 'Max iterations', 'type': IntType(min_value=1, default=500)}, {'name': 'tol', 'dispname': 'Tolerance', 'type': FloatType(default=0.0)}, ] descriptor.set_info( info, doc_class=sklearn.cross_decomposition.PLSRegression) descriptor.set_attributes([ {'name': 'x_weights_', 'rnames': names_from_x, 'cnames': names_from_prefix('component ')}, {'name': 'y_weights_', 'rnames': names_from_y, 'cnames': names_from_prefix('component ')}, {'name': 'x_loadings_', 'rnames': names_from_x, 'cnames': names_from_prefix('component ')}, {'name': 'y_loadings_', 'rnames': names_from_y, 'cnames': names_from_prefix('component ')}, {'name': 'x_scores_', 'cnames': names_from_prefix('component ')}, {'name': 'y_scores_', 'cnames': names_from_prefix('component ')}, {'name': 'x_rotations_', 'rnames': names_from_x, 'cnames': names_from_prefix('component ')}, {'name': 'y_rotations_', 'rnames': names_from_y}, {'name': 'coef_'}, {'name': 'n_iter_'}, ], doc_class=sklearn.cross_decomposition.PLSRegression) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.cross_decomposition.PLSRegression(**kwargs) model.set_skl(skl) model.save()