# This file is part of Sympathy for Data.
# Copyright (c) 2021 Combine Control Systems
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sympathy.utils.pip_util import import_optional
from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.neuralnetwork import (
MLPClassifierDescriptor,
SkorchDescriptor,
)
from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntListType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType
_old_empty = 999
def _is_old_empty(value):
return value in [_old_empty, [_old_empty]]
def _torch():
return import_optional("torch", group="torch")
def _skorch():
return import_optional("skorch", group="torch")
[docs]
class BinaryImageClassifier(SyML_abstract, node.Node):
name = "Image Classifier (Experimental)"
nodeid = ("com.sympathyfordata.advancedmachinelearning."
"binaryimageclassifier")
author = "Jannes Germishuys"
icon = "image_classifier.svg"
tags = Tags(Tag.MachineLearning.Apply)
description = "Skorch Binary Image Classifier \
https://skorch.readthedocs.io/en/stable/classifier.html"
descriptor = SkorchDescriptor()
descriptor.name = name
info = [
[
"Architecture",
{
"name": "max_epochs",
"dispname": "Maximum number of epochs",
"desc": "The number of epochs to train for each fit.",
"type": IntType(default=20, min_value=0),
},
{
"name": "optimizer",
"dispname": "Optimizer",
"desc": "The optimizer (update rule) used to optimize the module",
"type": StringSelectionType(["sgd", "adam"], default="adam"),
},
{
"name": "batch_size",
"dispname": "Batch size",
"desc": "Mini-batch size. If batch size is -1, a single batch "
"with all the data will be used during training and "
"validation.",
"type": IntType(default=64, min_value=-1),
},
{
"name": "lr",
"dispname": "Learning rate",
"desc": "Learning rate",
"type": FloatType(default=1e-6, min_value=0.0),
},
{
"name": "cross_validation",
"dispname": "Cross validation",
"desc": "Cross validation",
"type": BoolType(default=False),
},
{
"name": "early_stopping",
"dispname": "Early stopping",
"desc": "Early stopping",
"type": BoolType(default=False),
},
],
]
descriptor.set_info(info)
descriptor.set_attributes(
[
{"name": "history_",
"dispname": "Model training history",
"desc": "Model training history"}
],
)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort("Model", "model")])
__doc__ = SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs
)
def execute(self, node_context):
from sylib_aml.amlnets import Cnn
m = node_context.output["model"]
desc = self.__class__.descriptor
m.set_desc(desc)
optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = _skorch().NeuralNetClassifier(
Cnn,
callbacks=[_skorch().callbacks.EarlyStopping(
monitor='valid_loss', patience=5,
threshold=0.0001)]
if kwargs["early_stopping"] and kwargs["cross_validation"]
else None,
max_epochs=kwargs["max_epochs"],
lr=kwargs["lr"],
train_split=_skorch().dataset.CVSplit(5)
if kwargs["cross_validation"] else None,
optimizer=optim_dict[kwargs["optimizer"]],
criterion=_torch().nn.CrossEntropyLoss,
batch_size=kwargs["batch_size"],
iterator_train__batch_size=kwargs["batch_size"],
iterator_valid__batch_size=kwargs["batch_size"],
verbose=0,
)
m.set_skl(skl)
m.save()
[docs]
class BinaryTabularClassifier(SyML_abstract, node.Node):
name = "Tabular Classifier (Experimental)"
nodeid = ("com.sympathyfordata.advancedmachinelearning."
"binarytabularclassifier")
author = "Jannes Germishuys"
icon = "table_classifier.svg"
tags = Tags(Tag.MachineLearning.Apply)
descriptor = SkorchDescriptor()
description = "Skorch Binary Table Classifier \
https://skorch.readthedocs.io/en/stable/classifier.html"
descriptor.name = name
info = [
[
"Architecture",
{
"name": "max_epochs",
"dispname": "Maximum number of epochs",
"desc": "The number of epochs to train for each fit.",
"type": IntType(default=20, min_value=0),
},
{
"name": "optimizer",
"dispname": "Optimizer",
"desc": "The optimizer (update rule) used to optimize the module",
"type": StringSelectionType(["sgd", "adam"], default="adam"),
},
{
"name": "lr",
"dispname": "Learning rate",
"desc": "Learning rate",
"type": FloatType(default=1e-6, min_value=0.0),
},
{
"name": "cross_validation",
"dispname": "Cross validation",
"desc": "Cross validation",
"type": BoolType(default=False),
},
{
"name": "early_stopping",
"dispname": "Early stopping",
"desc": "Early stopping",
"type": BoolType(default=False),
},
],
[
"Embedding setup",
{
"name": "cat_idxs",
"dispname": "Ids of categorical columns",
"desc": "Ids of categorical columns",
"type": IntListType(default=[]),
},
{
"name": "cat_dims",
"dispname": "Number of categories for each categorical column",
"desc": "Number of categories for each categorical column",
"type": IntListType(default=[]),
},
{
"name": "cat_emb_dim",
"dispname": "Embedding dimension for each categorical column",
"desc": "Embedding dimension for each categorical column",
"type": IntListType(default=[]),
},
],
]
descriptor.set_info(info)
descriptor.set_attributes(
[
{"name": "history_",
"dispname": "Model training history",
"desc": "Model training history"}
],
)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort("Model", "model")])
__doc__ = SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs
)
def execute(self, node_context):
from sylib_aml.amlnets import TabularCVSplit
from sylib_aml.tabular_net import TabNet
m = node_context.output["model"]
desc = self.__class__.descriptor
m.set_desc(desc)
optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = _skorch().NeuralNetClassifier(
TabNet,
max_epochs=kwargs["max_epochs"],
lr=kwargs["lr"],
train_split=TabularCVSplit(5)
if kwargs["cross_validation"] else None,
callbacks=[_skorch().callbacks.EarlyStopping(
monitor='valid_loss', patience=5,
threshold=0.0001)]
if kwargs["early_stopping"] and kwargs["cross_validation"]
else None,
module__mask_type="entmax",
criterion=_torch().nn.CrossEntropyLoss,
batch_size=None,
optimizer=optim_dict[kwargs["optimizer"]],
module__cat_idxs=kwargs["cat_idxs"]
if not _is_old_empty(kwargs["cat_idxs"])
else [],
module__cat_dims=kwargs["cat_dims"]
if not _is_old_empty(kwargs["cat_dims"])
else [],
module__cat_emb_dim=kwargs["cat_emb_dim"]
if not _is_old_empty(kwargs["cat_emb_dim"])
else [1],
module__output_dim=1,
predict_nonlinearity=None,
verbose=0,
)
m.set_skl(skl)
m.save()
[docs]
class TabularRegressor(SyML_abstract, node.Node):
name = "Tabular Regressor (Experimental)"
nodeid = "com.sympathyfordata.advancedmachinelearning.tabularregressor"
author = "Jannes Germishuys"
icon = "table_classifier.svg"
tags = Tags(Tag.MachineLearning.Apply)
descriptor = MLPClassifierDescriptor()
description = "Skorch Table Regressor \
https://skorch.readthedocs.io/en/stable/regressor.html"
descriptor.name = name
info = [
[
"Architecture",
{
"name": "max_epochs",
"dispname": "Maximum number of epochs",
"desc": "The number of epochs to train for each fit.",
"type": IntType(default=20, min_value=0),
},
{
"name": "optimizer",
"dispname": "Optimizer",
"desc": "The optimizer (update rule) used to optimize the module",
"type": StringSelectionType(["sgd", "adam"], default="adam"),
},
{
"name": "lr",
"dispname": "Learning rate",
"desc": "Learning rate",
"type": FloatType(default=1e-6, min_value=0.0),
},
{
"name": "cross_validation",
"dispname": "Cross validation",
"desc": "Cross validation",
"type": BoolType(default=False),
},
{
"name": "early_stopping",
"dispname": "Early stopping",
"desc": "Early stopping",
"type": BoolType(default=False),
},
],
[
"Embedding setup",
{
"name": "cat_idxs",
"dispname": "Ids of categorical columns",
"desc": "Ids of categorical columns",
"type": IntListType(default=[]),
},
{
"name": "cat_dims",
"dispname": "Number of categories for each categorical column",
"desc": "Number of categories for each categorical column",
"type": IntListType(default=[]),
},
{
"name": "cat_emb_dim",
"dispname": "Embedding dimension for each categorical column",
"desc": "Embedding dimension for each categorical column",
"type": IntListType(default=[]),
},
],
]
descriptor.set_info(info)
descriptor.set_attributes(
[
{"name": "history_", "desc": "Model training history"}
]
)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort("Model", "model")])
__doc__ = SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs
)
def execute(self, node_context):
from sylib_aml.tabular_net import TabNet
m = node_context.output["model"]
desc = self.__class__.descriptor
m.set_desc(desc)
optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = _skorch().NeuralNetRegressor(
TabNet,
max_epochs=kwargs["max_epochs"],
lr=kwargs["lr"],
train_split=_skorch().dataset.CVSplit(5)
if kwargs["cross_validation"] else None,
callbacks=[_skorch().callbacks.EarlyStopping(
monitor='valid_loss', patience=5,
threshold=0.0001)]
if kwargs["early_stopping"] and kwargs["cross_validation"]
else None,
criterion=_torch().nn.MSELoss,
optimizer=optim_dict[kwargs["optimizer"]],
batch_size=None,
module__cat_idxs=kwargs["cat_idxs"]
if not _is_old_empty(kwargs["cat_idxs"])
else [],
module__cat_dims=kwargs["cat_dims"]
if not _is_old_empty(kwargs["cat_dims"])
else [],
module__cat_emb_dim=kwargs["cat_emb_dim"]
if not _is_old_empty(kwargs["cat_emb_dim"])
else [],
module__output_dim=1,
verbose=0,
)
m.set_skl(skl)
m.save()