Source code for node_decomposition

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import sklearn.decomposition
import sklearn.cross_decomposition

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.utility import names_from_x
from sylib.machinelearning.utility import names_from_y
from sylib.machinelearning.utility import names_from_prefix
from sylib.machinelearning.descriptors import Descriptor

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType



[docs]
class PrincipalComponentAnalysis(SyML_abstract, node.Node):
    """Principal Component Analysis (PCA) is used to decompose a multivariate dataset in a set of
    successive orthogonal components that explain a maximum amount of the variance. It is
    implemented as a transformer model that learns `n` components in its fit method, and can be
    used on new data to project it on these components.
    """
    name = 'Principal Component Analysis (PCA)'
    author = 'Mathias Broxvall'
    icon = 'PCA.svg'
    description = (
        'Linear dimensionality reduction using Singular Value Decomposition '
        'of the data to project it to a lower dimensional space.')
    nodeid = 'org.sysess.sympathy.machinelearning.pca'
    tags = Tags(Tag.MachineLearning.DimensionalityReduction)

    descriptor = Descriptor()
    descriptor.name = name
    info = [
            {'name': 'n_components',
             'dispname': 'Number of components to keep',
             'type': UnionType([IntType(min_value=1),
                               FloatType(min_value=0, max_value=1),
                               StringSelectionType(['mle'])], default=1)},
            {'name': 'svd_solver',
             'dispname': 'Solver',
             'type': StringSelectionType(
                 ['auto', 'full', 'arpack', 'randomized'], default='auto')},
            {'name': 'tol',
             'dispname': 'Tolerance for singular values',
             'type': FloatType(default=0.0)},
            {'name': 'iterated_power',
             'dispname': 'N. of iteratins (for randomized solver)',
             'type': UnionType(
                 [IntType(min_value=0), StringSelectionType(['auto'])],
                 default='auto')},
            {'name': 'whiten',
             'dispname': 'Whiten',
             'type': BoolType(default=False)},
    ]

    descriptor.set_info(info, doc_class=sklearn.decomposition.PCA)

    descriptor.set_attributes([
        {'name': 'components_', 'cnames': names_from_x},
        {'name': 'explained_variance_'},
        {'name': 'explained_variance_ratio_'},
        {'name': 'mean_', 'cnames': names_from_x},
        {'name': 'n_components_'},
        {'name': 'noise_variance_'},
    ], doc_class=sklearn.decomposition.PCA)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ += SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)
        skl = sklearn.decomposition.PCA(**kwargs)

        model.set_skl(skl)
        model.save()




[docs]
class KernelPCA(SyML_abstract, node.Node):
    """KernelPCA is an extension of PCA which achieves non-linear dimensionality reduction through
    the use of kernels. It has many applications including denoising, compression and structured
    prediction (kernel dependency estimation). KernelPCA supports both transform and
    inverse_transform.
    """
    name = 'Kernel Principal Component Analysis (KPCA)'
    author = 'Mathias Broxvall'
    icon = 'PCA.svg'
    description = (
        'Non-linear dimensionality reduction through the use of kernels')
    nodeid = 'org.sysess.sympathy.machinelearning.kpca'
    tags = Tags(Tag.MachineLearning.DimensionalityReduction)

    descriptor = Descriptor()
    descriptor.name = name
    info = [
        [
            "Model options",
            {'name': 'n_components',
             'dispname': 'Number of components',
             'type': UnionType(
                 [IntType(min_value=1), NoneType()], default=None)},
            {'name': 'kernel',
             'dispname': 'Kernel',
             'type': StringSelectionType(
                 ['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'],
                 default='linear')},
            {'name': 'fit_inverse_transform',
             'dispname': 'Fit inverse-transform',
             'type': BoolType(default=False)},
        ],
        [
            "Advanced options",
            {'name': 'degree',
             'dispname': 'Poly kernel degree',
             'type': IntType(min_value=1, default=3)},
            {'name': 'gamma',
             'dispname': 'Kernel coefficient (poly, rbf, sigmoid)',
             'type': UnionType([
                 FloatType(min_value=0.0), NoneType()], default=None)},
            {'name': 'coef0',
             'dispname': 'Independent term (poly, sigmoid)',
             'type': FloatType(min_value=0.0, default=1)},
            {'name': 'alpha',
             'dispname': 'Ridge regression hyperparameter',
             'type': FloatType(min_value=0.0, default=1)},
            {'name': 'remove_zero_eig',
             'dispname': 'Remove components with zero eigenvalue',
             'type': BoolType(default=False)},
        ],
        [
            "Solver",
            {'name': 'eigen_solver',
             'dispname': 'Eigensolver',
             'type': StringSelectionType([
                 'auto', 'dense', 'arpack'], default='auto')},
            {'name': 'tol',
             'dispname': 'Tolerance',
             'type': FloatType(default=0.0)},
            {'name': 'max_iter',
             'dispname': 'Max iteratins',
             'type': UnionType([IntType(min_value=1), NoneType()],
                               default=None)},
            {'name': 'random_state',
             'dispname': 'Random seed',
             'type': UnionType([NoneType(), IntType()], default=None)},
            {'name': 'n_jobs',
             'dispname': 'number of jobs',
             'type': IntType(min_value=-1, default=1)},
        ]
    ]

    descriptor.set_info(info, doc_class=sklearn.decomposition.KernelPCA)

    descriptor.set_attributes([
        {'name': 'lambdas_'},
        {'name': 'alphas_'},
        {'name': 'dual_coef_', 'cnames': names_from_x},
        {'name': 'X_transformed_fit_'},
        {'name': 'X_fit_'},
    ], doc_class=sklearn.decomposition.KernelPCA)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ += SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)
        kwargs['copy_X'] = True
        skl = sklearn.decomposition.KernelPCA(**kwargs)

        model.set_skl(skl)
        model.save()




[docs]
class PLSRegressionCrossDecomposition(SyML_abstract, node.Node):
    """Partial Least Squares (PLS) cross-decomposition is a statistical method used to find the
    fundamental relations between two matrices, typically predictors (X) and responses (Y). It
    projects both X and Y into a lower-dimensional subspace such that the covariance between
    transformed(X) and transformed(Y) is maximal.

    PLS draws similarities with Principal Component Regression (PCR), where the samples are first
    projected into a lower-dimensional subspace, and the targets y are predicted using
    transformed(X). One issue with PCR is that the dimensionality reduction is unsupervised,
    and may lose some important variables: PCR would keep the features with the most variance, but
    it's possible that features with a small variances are relevant from predicting the target. In
    a way, PLS allows for the same kind of dimensionality reduction, but by taking into account the
    targets y.
    """
    name = 'Partial Least Squares cross-decomposition (PLS regression)'
    author = 'Mathias Broxvall'
    icon = 'PCA.svg'
    description = (
        'Finds the fundamental relations between two matrices X and Y, ie. '
        'it finds the (multidimensional) direction in X that best explains '
        'maximum multidimensional direction in Y. See also PCA-analysis')
    nodeid = 'org.sysess.sympathy.machinelearning.pls'
    tags = Tags(Tag.MachineLearning.DimensionalityReduction)

    descriptor = Descriptor()
    descriptor.name = name
    info = [
        {'name': 'n_components',
         'dispname': 'Number of components to keep',
         'type': IntType(min_value=1, default=2)},
        {'name': 'scale',
         'dispname': 'Scale the data',
         'type': BoolType(default=True)},
        {'name': 'max_iter',
         'dispname': 'Max iterations',
         'type': IntType(min_value=1, default=500)},
        {'name': 'tol',
         'dispname': 'Tolerance',
         'type': FloatType(default=0.0)},
    ]

    descriptor.set_info(
        info, doc_class=sklearn.cross_decomposition.PLSRegression)

    descriptor.set_attributes([
        {'name': 'x_weights_',
         'rnames': names_from_x,
         'cnames': names_from_prefix('component ')},
        {'name': 'y_weights_',
         'rnames': names_from_y,
         'cnames': names_from_prefix('component ')},
        {'name': 'x_loadings_',
         'rnames': names_from_x,
         'cnames': names_from_prefix('component ')},
        {'name': 'y_loadings_',
         'rnames': names_from_y,
         'cnames': names_from_prefix('component ')},
        {'name': 'x_scores_',
         'cnames': names_from_prefix('component ')},
        {'name': 'y_scores_',
         'cnames': names_from_prefix('component ')},
        {'name': 'x_rotations_',
         'rnames': names_from_x,
         'cnames': names_from_prefix('component ')},
        {'name': 'y_rotations_',
         'rnames': names_from_y},
        {'name': 'coef_'},
        {'name': 'n_iter_'},
    ], doc_class=sklearn.cross_decomposition.PLSRegression)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ += SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)
        skl = sklearn.cross_decomposition.PLSRegression(**kwargs)

        model.set_skl(skl)
        model.save()
Source code for node_decomposition

Sympathy for Data

Navigation

Related Topics