Source code for node_neuralnetwork

# This file is part of Sympathy for Data.
# Copyright (c) 2021 Combine Control Systems
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sympathy.utils.pip_util import import_optional

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.neuralnetwork import (
    MLPClassifierDescriptor,
    SkorchDescriptor,
)

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntListType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType


_old_empty = 999


def _is_old_empty(value):
    return value in [_old_empty, [_old_empty]]


def _torch():
    return import_optional("torch", group="torch")


def _skorch():
    return import_optional("skorch", group="torch")


[docs] class BinaryImageClassifier(SyML_abstract, node.Node): name = "Image Classifier (Experimental)" nodeid = ("com.sympathyfordata.advancedmachinelearning." "binaryimageclassifier") author = "Jannes Germishuys" icon = "image_classifier.svg" tags = Tags(Tag.MachineLearning.Apply) description = "Skorch Binary Image Classifier \ https://skorch.readthedocs.io/en/stable/classifier.html" descriptor = SkorchDescriptor() descriptor.name = name info = [ [ "Architecture", { "name": "max_epochs", "dispname": "Maximum number of epochs", "desc": "The number of epochs to train for each fit.", "type": IntType(default=20, min_value=0), }, { "name": "optimizer", "dispname": "Optimizer", "desc": "The optimizer (update rule) used to optimize the module", "type": StringSelectionType(["sgd", "adam"], default="adam"), }, { "name": "batch_size", "dispname": "Batch size", "desc": "Mini-batch size. If batch size is -1, a single batch " "with all the data will be used during training and " "validation.", "type": IntType(default=64, min_value=-1), }, { "name": "lr", "dispname": "Learning rate", "desc": "Learning rate", "type": FloatType(default=1e-6, min_value=0.0), }, { "name": "cross_validation", "dispname": "Cross validation", "desc": "Cross validation", "type": BoolType(default=False), }, { "name": "early_stopping", "dispname": "Early stopping", "desc": "Early stopping", "type": BoolType(default=False), }, ], ] descriptor.set_info(info) descriptor.set_attributes( [ {"name": "history_", "dispname": "Model training history", "desc": "Model training history"} ], ) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort("Model", "model")]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs ) def execute(self, node_context): from sylib_aml.amlnets import Cnn m = node_context.output["model"] desc = self.__class__.descriptor m.set_desc(desc) optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam} kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = _skorch().NeuralNetClassifier( Cnn, callbacks=[_skorch().callbacks.EarlyStopping( monitor='valid_loss', patience=5, threshold=0.0001)] if kwargs["early_stopping"] and kwargs["cross_validation"] else None, max_epochs=kwargs["max_epochs"], lr=kwargs["lr"], train_split=_skorch().dataset.CVSplit(5) if kwargs["cross_validation"] else None, optimizer=optim_dict[kwargs["optimizer"]], criterion=_torch().nn.CrossEntropyLoss, batch_size=kwargs["batch_size"], iterator_train__batch_size=kwargs["batch_size"], iterator_valid__batch_size=kwargs["batch_size"], verbose=0, ) m.set_skl(skl) m.save()
[docs] class BinaryTabularClassifier(SyML_abstract, node.Node): name = "Tabular Classifier (Experimental)" nodeid = ("com.sympathyfordata.advancedmachinelearning." "binarytabularclassifier") author = "Jannes Germishuys" icon = "table_classifier.svg" tags = Tags(Tag.MachineLearning.Apply) descriptor = SkorchDescriptor() description = "Skorch Binary Table Classifier \ https://skorch.readthedocs.io/en/stable/classifier.html" descriptor.name = name info = [ [ "Architecture", { "name": "max_epochs", "dispname": "Maximum number of epochs", "desc": "The number of epochs to train for each fit.", "type": IntType(default=20, min_value=0), }, { "name": "optimizer", "dispname": "Optimizer", "desc": "The optimizer (update rule) used to optimize the module", "type": StringSelectionType(["sgd", "adam"], default="adam"), }, { "name": "lr", "dispname": "Learning rate", "desc": "Learning rate", "type": FloatType(default=1e-6, min_value=0.0), }, { "name": "cross_validation", "dispname": "Cross validation", "desc": "Cross validation", "type": BoolType(default=False), }, { "name": "early_stopping", "dispname": "Early stopping", "desc": "Early stopping", "type": BoolType(default=False), }, ], [ "Embedding setup", { "name": "cat_idxs", "dispname": "Ids of categorical columns", "desc": "Ids of categorical columns", "type": IntListType(default=[]), }, { "name": "cat_dims", "dispname": "Number of categories for each categorical column", "desc": "Number of categories for each categorical column", "type": IntListType(default=[]), }, { "name": "cat_emb_dim", "dispname": "Embedding dimension for each categorical column", "desc": "Embedding dimension for each categorical column", "type": IntListType(default=[]), }, ], ] descriptor.set_info(info) descriptor.set_attributes( [ {"name": "history_", "dispname": "Model training history", "desc": "Model training history"} ], ) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort("Model", "model")]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs ) def execute(self, node_context): from sylib_aml.amlnets import TabularCVSplit from sylib_aml.tabular_net import TabNet m = node_context.output["model"] desc = self.__class__.descriptor m.set_desc(desc) optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam} kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = _skorch().NeuralNetClassifier( TabNet, max_epochs=kwargs["max_epochs"], lr=kwargs["lr"], train_split=TabularCVSplit(5) if kwargs["cross_validation"] else None, callbacks=[_skorch().callbacks.EarlyStopping( monitor='valid_loss', patience=5, threshold=0.0001)] if kwargs["early_stopping"] and kwargs["cross_validation"] else None, module__mask_type="entmax", criterion=_torch().nn.CrossEntropyLoss, batch_size=None, optimizer=optim_dict[kwargs["optimizer"]], module__cat_idxs=kwargs["cat_idxs"] if not _is_old_empty(kwargs["cat_idxs"]) else [], module__cat_dims=kwargs["cat_dims"] if not _is_old_empty(kwargs["cat_dims"]) else [], module__cat_emb_dim=kwargs["cat_emb_dim"] if not _is_old_empty(kwargs["cat_emb_dim"]) else [1], module__output_dim=1, predict_nonlinearity=None, verbose=0, ) m.set_skl(skl) m.save()
[docs] class TabularRegressor(SyML_abstract, node.Node): name = "Tabular Regressor (Experimental)" nodeid = "com.sympathyfordata.advancedmachinelearning.tabularregressor" author = "Jannes Germishuys" icon = "table_classifier.svg" tags = Tags(Tag.MachineLearning.Apply) descriptor = MLPClassifierDescriptor() description = "Skorch Table Regressor \ https://skorch.readthedocs.io/en/stable/regressor.html" descriptor.name = name info = [ [ "Architecture", { "name": "max_epochs", "dispname": "Maximum number of epochs", "desc": "The number of epochs to train for each fit.", "type": IntType(default=20, min_value=0), }, { "name": "optimizer", "dispname": "Optimizer", "desc": "The optimizer (update rule) used to optimize the module", "type": StringSelectionType(["sgd", "adam"], default="adam"), }, { "name": "lr", "dispname": "Learning rate", "desc": "Learning rate", "type": FloatType(default=1e-6, min_value=0.0), }, { "name": "cross_validation", "dispname": "Cross validation", "desc": "Cross validation", "type": BoolType(default=False), }, { "name": "early_stopping", "dispname": "Early stopping", "desc": "Early stopping", "type": BoolType(default=False), }, ], [ "Embedding setup", { "name": "cat_idxs", "dispname": "Ids of categorical columns", "desc": "Ids of categorical columns", "type": IntListType(default=[]), }, { "name": "cat_dims", "dispname": "Number of categories for each categorical column", "desc": "Number of categories for each categorical column", "type": IntListType(default=[]), }, { "name": "cat_emb_dim", "dispname": "Embedding dimension for each categorical column", "desc": "Embedding dimension for each categorical column", "type": IntListType(default=[]), }, ], ] descriptor.set_info(info) descriptor.set_attributes( [ {"name": "history_", "desc": "Model training history"} ] ) parameters = node.parameters() SyML_abstract.generate_parameters(parameters, descriptor) inputs = Ports([]) outputs = Ports([ModelPort("Model", "model")]) __doc__ = SyML_abstract.generate_docstring( description, descriptor.info, descriptor.attributes, inputs, outputs ) def execute(self, node_context): from sylib_aml.tabular_net import TabNet m = node_context.output["model"] desc = self.__class__.descriptor m.set_desc(desc) optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam} kwargs = self.__class__.descriptor.get_parameters( node_context.parameters) skl = _skorch().NeuralNetRegressor( TabNet, max_epochs=kwargs["max_epochs"], lr=kwargs["lr"], train_split=_skorch().dataset.CVSplit(5) if kwargs["cross_validation"] else None, callbacks=[_skorch().callbacks.EarlyStopping( monitor='valid_loss', patience=5, threshold=0.0001)] if kwargs["early_stopping"] and kwargs["cross_validation"] else None, criterion=_torch().nn.MSELoss, optimizer=optim_dict[kwargs["optimizer"]], batch_size=None, module__cat_idxs=kwargs["cat_idxs"] if not _is_old_empty(kwargs["cat_idxs"]) else [], module__cat_dims=kwargs["cat_dims"] if not _is_old_empty(kwargs["cat_dims"]) else [], module__cat_emb_dim=kwargs["cat_emb_dim"] if not _is_old_empty(kwargs["cat_emb_dim"]) else [], module__output_dim=1, verbose=0, ) m.set_skl(skl) m.save()