Source code for node_regression

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import sklearn
import sklearn.linear_model
import sklearn.kernel_ridge

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.utility import names_from_x
from sylib.machinelearning.utility import names_from_y
from sylib.machinelearning.descriptors import Descriptor

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType

from packaging import version
sklearn_version = version.Version(sklearn.__version__)

# Test for deprecation of criterion arguments
check_10 = sklearn_version >= version.Version('1.0.0')


[docs]class LinearRegression(SyML_abstract, node.Node): name = 'Linear Regression' author = 'Mathias Broxvall' version = '0.1' icon = 'linear_regression.svg' description = 'Ordinary linear regression' nodeid = 'org.sysess.sympathy.machinelearning.linearregression' tags = Tags(Tag.MachineLearning.Regression) descriptor = Descriptor() descriptor.name = name info = [ {'name': 'fit_intercept', 'dispname': 'Fit intercept', 'type': BoolType(default=True)}, {'name': 'normalize', 'dispname': 'Normalize regressors', 'type': BoolType(default=False), 'deprecated': True if check_10 else False}, {'name': 'n_jobs', 'dispname': 'Number of jobs', 'type': IntType(min_value=1, default=1)}, ] descriptor.set_info(info, doc_class=sklearn.linear_model.LinearRegression) descriptor.set_attributes([ {'name': attr_name} for attr_name in [ 'coef_', 'intercept_', 'residues_', ]], doc_class=sklearn.linear_model.LinearRegression) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.linear_model.LinearRegression(**kwargs) model.set_skl(skl) model.save()
[docs]class LogisticRegression(SyML_abstract, node.Node): name = 'Logistic Regression' author = 'Mathias Broxvall' version = '0.1' icon = 'logistic_regression.svg' description = 'Logistic regression of a categorical dependent variable' nodeid = 'org.sysess.sympathy.machinelearning.logisticregression' tags = Tags(Tag.MachineLearning.Supervised) descriptor = Descriptor() descriptor.name = name info = [ [ 'Options', {'name': 'penalty', 'dispname': 'Penalty', 'type': StringSelectionType(['l1', 'l2'], default='l2')}, {'name': 'dual', 'dispname': 'Dual Formulation', 'type': BoolType(default=False)}, {'name': 'C', 'dispname': 'C', 'type': FloatType(min_value=0, default=1.0)}, {'name': 'fit_intercept', 'dispname': 'Fit intercept', 'type': BoolType(default=True)}, {'name': 'intercept_scaling', 'dispname': 'Intercept scaling', 'type': FloatType(default=1.0)}, {'name': 'class_weight', 'dispname': 'Class weights', 'type': UnionType([ NoneType(), StringSelectionType(['balanced'])], default=None)}, {'name': 'tol', 'dispname': 'Tolerance', 'type': FloatType(default=1e-4)}, {'name': 'multi_class', 'dispname': 'Multiclass', 'type': StringSelectionType(['ovr', 'multinomial'], default='ovr')} ], [ 'Solver', {'name': 'max_iter', 'dispname': 'Maximum iterations', 'type': IntType(min_value=0, default=100)}, {'name': 'solver', 'dispname': 'Solver', 'type': StringSelectionType( ['newton-cg', 'lbfgs', 'liblinear', 'sag'], default='liblinear')}, {'name': 'n_jobs', 'dispname': 'Number of jobs', 'desc': ( 'Number of CPU cores used when parallelizing over classes if ' 'multi_class="ovr". Ignored when the solver is set to ' '"liblinear" regardless of multi_class. If given -1 then all ' 'cores are used'), 'type': IntType(min_value=-1, default=1)} ], [ 'Model state', {'name': 'random_state', 'dispname': 'Random seed', 'type': UnionType([NoneType(), IntType()], default=None)}, {'name': 'warm_start', 'dispname': 'Warm start', 'type': BoolType(default=False)} ], ] descriptor.set_info( info, doc_class=sklearn.linear_model.LogisticRegression) descriptor.set_attributes([ {'name': 'n_iter_'}, {'name': 'coef_', 'cnames': names_from_x}, {'name': 'intercept_'}, ], doc_class=sklearn.linear_model.LogisticRegression) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.linear_model.LogisticRegression(**kwargs) model.set_skl(skl) model.save()
[docs]class KernelRidge(SyML_abstract, node.Node): name = 'Kernel Ridge Regression' author = 'Mathias Broxvall' version = '0.1' icon = 'kernel_ridge.svg' description = ( 'Kernel Ridge based classifier combining ridge regression ' '(linear least-squares L2-norm) regression with the kernel trick') nodeid = 'org.sysess.sympathy.machinelearning.kernel_ridge' tags = Tags(Tag.MachineLearning.Regression) descriptor = Descriptor() descriptor.name = name info = [ [ "Model", {'name': 'alpha', 'dispname': 'Alpha', 'type': FloatType(min_value=0, default=1.0)}, {'name': 'kernel', 'dispname': 'Kernel', 'type': StringSelectionType( ['linear', 'rbf', 'poly', 'sigmoid', 'cosine', 'laplacian', 'chi2'], default='rbf')}, ], [ "Advanced", {'name': 'gamma', 'dispname': 'Gamma', 'type': UnionType([NoneType(), FloatType()], default=None)}, {'name': 'coef0', 'dispname': 'Zero coefficient', 'type': FloatType(default=1.0)}, {'name': 'degree', 'dispname': 'Degree', 'type': IntType(min_value=1, default=3)}, ] ] descriptor.set_info(info, doc_class=sklearn.kernel_ridge.KernelRidge) descriptor.set_attributes([ {'name': 'dual_coef_', 'cnames': names_from_y}, {'name': 'X_fit_', 'cnames': names_from_x}, ], doc_class=sklearn.kernel_ridge.KernelRidge) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.kernel_ridge.KernelRidge(**kwargs) model.set_skl(skl) model.save()
[docs]class SupportVectorRegression(SyML_abstract, node.Node): name = 'Epsilon Support Vector Regression' author = 'Mathias Broxvall' version = '0.1' icon = 'svm.svg' description = 'Support vector machine based regressor (SVR)' nodeid = 'org.sysess.sympathy.machinelearning.svr' tags = Tags(Tag.MachineLearning.Regression) descriptor = Descriptor() descriptor.name = name info = [ [ "Options", {'name': 'C', 'dispname': 'Penalty', 'type': FloatType(default=1.0)}, {'name': 'kernel', 'dispname': 'Kernel', 'type': StringSelectionType( ['linear', 'rbf', 'poly', 'sigmoid'], default='rbf')}, {'name': 'epsilon', 'dispname': 'Epsilon', 'type': FloatType(default=0.1)}, ], [ "Advanced", {'name': 'gamma', 'dispname': 'Gamma', 'type': UnionType([ StringSelectionType(['auto']), FloatType()], default='auto')}, {'name': 'degree', 'dispname': 'Polynomial Degree', 'type': IntType(default=3)}, {'name': 'coef0', 'dispname': 'Independent term in kernel function', 'type': FloatType(default=0.0)}, ], [ "Solver", {'name': 'max_iter', 'dispname': 'Maximum iterations', 'type': IntType(default=-1)}, {'name': 'tol', 'dispname': 'Tolerance', 'type': FloatType(default=1e-3)}, {'name': 'shrinking', 'dispname': 'Shrinking', 'type': BoolType(default=True)}, ] ] descriptor.set_info(info, doc_class=sklearn.svm.SVR) descriptor.set_attributes([ {'name': 'support_', }, {'name': 'support_vectors_', 'cnames': names_from_x}, {'name': 'dual_coef_'}, {'name': 'intercept_'}, {'name': 'coef_', 'cnames': names_from_x}, ], doc_class=sklearn.svm.SVR) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort('Model', 'model')]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs) def execute(self, node_context): model = node_context.output['model'] desc = self.__class__.descriptor model.set_desc(desc) kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = sklearn.svm.SVR(**kwargs) model.set_skl(skl) model.save()