# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import sklearn.svm
from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.utility import names_from_x
from sylib.machinelearning.descriptors import Descriptor
from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType
[docs]
class SupportVectorClassifier(SyML_abstract, node.Node):
"""Support Vector Machines (SVMs) are powerful supervised learning models used for
classification and regression tasks. They work by finding the optimal hyperplane that
separates classes in the feature space with the maximum margin.
The advantages of support vector machines are:
- Effective in high dimensional spaces.
- Still effective in cases where number of dimensions is greater than the number of
samples.
- Uses a subset of training points in the decision function (called support vectors), so
it is also memory efficient.
- Versatile: different Kernel functions can be specified for the decision function.
The disadvantages of support vector machines include:
- If the number of features is much greater than the number of samples, avoid over-fitting
in choosing Kernel functions and regularization term is crucial.
- SVMs do not directly provide probability estimates, these are calculated using an
expensive five-fold cross-validation.
"""
name = 'Support Vector Classifier'
author = 'Mathias Broxvall'
icon = 'svm.svg'
description = 'Support vector machine (SVM) based classifier'
nodeid = 'org.sysess.sympathy.machinelearning.svc'
tags = Tags(Tag.MachineLearning.Supervised)
descriptor = Descriptor()
descriptor.name = name
info = [
[
"Model",
{'name': 'C',
'dispname': 'Penalty parameter C',
'type': FloatType(min_value=0.0, default=1.0)},
{'name': 'kernel',
'dispname': 'Kernel',
'type': StringSelectionType([
'rbf', 'linear', 'poly', 'sigmoid', 'precomputed'],
default='rbf')},
],
[
"Advanced",
{'name': 'degree',
'dispname': 'Polynomial kernel degree',
'type': IntType(min_value=1, default=3)},
{'name': 'gamma',
'dispname': 'Kernel coefficient',
'type': UnionType([
FloatType(), StringSelectionType(['auto'])],
default='auto')},
{'name': 'coef0',
'dispname': 'Independent kernel function term',
'type': FloatType(default=0.0)},
{'name': 'probability',
'dispname': 'Enable probability estimates',
'type': BoolType(default=False)},
{'name': 'shrinking',
'dispname': 'Use shrinking heuristic',
'type': BoolType(default=True)},
{'name': 'class_weight',
'dispname': 'Class weight',
'type': UnionType([NoneType(), StringSelectionType(['balanced'])],
default=None)},
],
[
"Solver",
{'name': 'tol',
'dispname': 'Tolerance',
'type': FloatType(default=1e-3)},
{'name': 'max_iter',
'dispname': 'Hard iteration limit',
'type': IntType(min_value=-1)},
{'name': 'random_state',
'dispname': 'Random seed',
'type': UnionType([IntType(), NoneType()], default=None)},
]
]
descriptor.set_info(info, doc_class=sklearn.svm.SVC)
descriptor.set_attributes([
{'name': 'support_'},
{'name': 'support_vectors_', 'cnames': names_from_x},
{'name': 'n_support_'},
{'name': 'dual_coef_'},
{'name': 'coef_', 'cnames': names_from_x},
{'name': 'intercept_'},
], doc_class=sklearn.svm.SVC)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort('Model', 'model')])
__doc__ += SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs)
def execute(self, node_context):
model = node_context.output['model']
desc = self.__class__.descriptor
model.set_desc(desc)
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = sklearn.svm.SVC(**kwargs)
model.set_skl(skl)
model.save()
def _one_class_svm_info():
model_info = [
"Model",
{'name': 'kernel',
'dispname': 'Kernel',
'type': StringSelectionType(
['rbf', 'linear', 'poly', 'sigmoid', 'precomputed'],
default='rbf')},
{'name': 'nu',
'dispname': 'Upper/lower fraction bound',
'type': FloatType(min_value=0, max_value=1, default=0.5)},
]
advanced_info = [
"Advanced",
{'name': 'degree',
'dispname': 'Polynomial kernel degree',
'type': IntType(min_value=1, default=3)},
{'name': 'gamma',
'dispname': 'Kernel coefficient',
'type': UnionType([
FloatType(), StringSelectionType(['auto'])],
default='auto')},
{'name': 'coef0',
'dispname': 'Independent kernel function term',
'type': FloatType(default=0.0)},
{'name': 'shrinking',
'dispname': 'Use shrinking heuristic',
'type': BoolType(default=True)},
]
solver_info = [
"Solver",
{'name': 'tol',
'dispname': 'Tolerance',
'type': FloatType(default=1e-3)},
{'name': 'max_iter',
'dispname': 'Hard iteration limit',
'type': IntType(min_value=-1)},
]
# TODO: Older versions could create additional parameters. Consider
# to add migrations.
# if pversion.parse(sklearn.__version__) < pversion.Version('0.20'):
# solver_info.append({
# 'name': 'random_state',
# 'dispname': 'Random seed',
# 'type': UnionType([IntType(), NoneType()], default=None),
# })
return [model_info, advanced_info, solver_info]
[docs]
class OneClassSVM(SyML_abstract, node.Node):
name = 'One Class Support Vector Machines'
author = 'Mathias Broxvall'
icon = 'outliers.svg'
description = (
'Unsupervised outlier detection based on support vector machines'
)
nodeid = 'org.sysess.sympathy.machinelearning.one_class_svm'
tags = Tags(Tag.MachineLearning.Unsupervised)
descriptor = Descriptor()
descriptor.name = name
info = _one_class_svm_info()
descriptor.set_info(info, doc_class=sklearn.svm.OneClassSVM)
descriptor.set_attributes([
{'name': 'support_'},
{'name': 'support_vectors_', 'cnames': names_from_x},
{'name': 'dual_coef_'},
{'name': 'coef_', 'cnames': names_from_x},
{'name': 'intercept_'},
], doc_class=sklearn.svm.OneClassSVM)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort('Model', 'model')])
__doc__ = SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs)
def execute(self, node_context):
model = node_context.output['model']
desc = self.__class__.descriptor
model.set_desc(desc)
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = sklearn.svm.OneClassSVM(**kwargs)
model.set_skl(skl)
model.save()