Source code for node_neuralnetwork

# This file is part of Sympathy for Data.
# Copyright (c) 2021 Combine Control Systems
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sympathy.utils.pip_util import import_optional

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.neuralnetwork import (
    MLPClassifierDescriptor,
    SkorchDescriptor,
)

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntListType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType


_old_empty = 999


def _is_old_empty(value):
    return value in [_old_empty, [_old_empty]]


def _torch():
    return import_optional("torch", group="torch")


def _skorch():
    return import_optional("skorch", group="torch")



[docs]
class BinaryImageClassifier(SyML_abstract, node.Node):
    name = "Image Classifier (Experimental)"
    nodeid = ("com.sympathyfordata.advancedmachinelearning."
              "binaryimageclassifier")
    author = "Jannes Germishuys"
    icon = "image_classifier.svg"
    tags = Tags(Tag.MachineLearning.Apply)
    description = "Skorch Binary Image Classifier \
                   https://skorch.readthedocs.io/en/stable/classifier.html"
    descriptor = SkorchDescriptor()
    descriptor.name = name

    info = [
        [
            "Architecture",
            {
                "name": "max_epochs",
                "dispname": "Maximum number of epochs",
                "desc": "The number of epochs to train for each fit.",
                "type": IntType(default=20, min_value=0),
            },
            {
                "name": "optimizer",
                "dispname": "Optimizer",
                "desc": "The optimizer (update rule) used to optimize the module",
                "type": StringSelectionType(["sgd", "adam"], default="adam"),
            },
            {
                "name": "batch_size",
                "dispname": "Batch size",
                "desc": "Mini-batch size. If batch size is -1, a single batch "
                        "with all the data will be used during training and "
                        "validation.",
                "type": IntType(default=64, min_value=-1),
            },
            {
                "name": "lr",
                "dispname": "Learning rate",
                "desc": "Learning rate",
                "type": FloatType(default=1e-6, min_value=0.0),
            },
            {
                "name": "cross_validation",
                "dispname": "Cross validation",
                "desc": "Cross validation",
                "type": BoolType(default=False),
            },
            {
                "name": "early_stopping",
                "dispname": "Early stopping",
                "desc": "Early stopping",
                "type": BoolType(default=False),
            },
        ],
    ]

    descriptor.set_info(info)
    descriptor.set_attributes(
        [
            {"name": "history_",
             "dispname": "Model training history",
             "desc": "Model training history"}
        ],
    )

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort("Model", "model")])
    __doc__ = SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs
    )

    def execute(self, node_context):
        from sylib_aml.amlnets import Cnn

        m = node_context.output["model"]
        desc = self.__class__.descriptor
        m.set_desc(desc)

        optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)

        skl = _skorch().NeuralNetClassifier(
            Cnn,
            callbacks=[_skorch().callbacks.EarlyStopping(
                monitor='valid_loss', patience=5,
                threshold=0.0001)]
            if kwargs["early_stopping"] and kwargs["cross_validation"]
            else None,
            max_epochs=kwargs["max_epochs"],
            lr=kwargs["lr"],
            train_split=_skorch().dataset.CVSplit(5)
            if kwargs["cross_validation"] else None,
            optimizer=optim_dict[kwargs["optimizer"]],
            criterion=_torch().nn.CrossEntropyLoss,
            batch_size=kwargs["batch_size"],
            iterator_train__batch_size=kwargs["batch_size"],
            iterator_valid__batch_size=kwargs["batch_size"],
            verbose=0,
        )

        m.set_skl(skl)
        m.save()




[docs]
class BinaryTabularClassifier(SyML_abstract, node.Node):
    name = "Tabular Classifier (Experimental)"
    nodeid = ("com.sympathyfordata.advancedmachinelearning."
              "binarytabularclassifier")
    author = "Jannes Germishuys"
    icon = "table_classifier.svg"
    tags = Tags(Tag.MachineLearning.Apply)
    descriptor = SkorchDescriptor()
    description = "Skorch Binary Table Classifier \
                   https://skorch.readthedocs.io/en/stable/classifier.html"
    descriptor.name = name

    info = [
        [
            "Architecture",
            {
                "name": "max_epochs",
                "dispname": "Maximum number of epochs",
                "desc": "The number of epochs to train for each fit.",
                "type": IntType(default=20, min_value=0),
            },
            {
                "name": "optimizer",
                "dispname": "Optimizer",
                "desc": "The optimizer (update rule) used to optimize the module",
                "type": StringSelectionType(["sgd", "adam"], default="adam"),
            },
            {
                "name": "lr",
                "dispname": "Learning rate",
                "desc": "Learning rate",
                "type": FloatType(default=1e-6, min_value=0.0),
            },
            {
                "name": "cross_validation",
                "dispname": "Cross validation",
                "desc": "Cross validation",
                "type": BoolType(default=False),
            },
            {
                "name": "early_stopping",
                "dispname": "Early stopping",
                "desc": "Early stopping",
                "type": BoolType(default=False),
            },
        ],
        [
            "Embedding setup",
            {
                "name": "cat_idxs",
                "dispname": "Ids of categorical columns",
                "desc": "Ids of categorical columns",
                "type": IntListType(default=[]),
            },
            {
                "name": "cat_dims",
                "dispname": "Number of categories for each categorical column",
                "desc": "Number of categories for each categorical column",
                "type": IntListType(default=[]),
            },
            {
                "name": "cat_emb_dim",
                "dispname": "Embedding dimension for each categorical column",
                "desc": "Embedding dimension for each categorical column",
                "type": IntListType(default=[]),
            },
        ],
    ]

    descriptor.set_info(info)
    descriptor.set_attributes(
        [
            {"name": "history_",
             "dispname": "Model training history",
             "desc": "Model training history"}
        ],
    )

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort("Model", "model")])
    __doc__ = SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs
    )

    def execute(self, node_context):
        from sylib_aml.amlnets import TabularCVSplit
        from sylib_aml.tabular_net import TabNet

        m = node_context.output["model"]
        desc = self.__class__.descriptor
        m.set_desc(desc)

        optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}
        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)

        skl = _skorch().NeuralNetClassifier(
            TabNet,
            max_epochs=kwargs["max_epochs"],
            lr=kwargs["lr"],
            train_split=TabularCVSplit(5)
            if kwargs["cross_validation"] else None,
            callbacks=[_skorch().callbacks.EarlyStopping(
                monitor='valid_loss', patience=5,
                threshold=0.0001)]
            if kwargs["early_stopping"] and kwargs["cross_validation"]
            else None,
            module__mask_type="entmax",
            criterion=_torch().nn.CrossEntropyLoss,
            batch_size=None,
            optimizer=optim_dict[kwargs["optimizer"]],
            module__cat_idxs=kwargs["cat_idxs"]
            if not _is_old_empty(kwargs["cat_idxs"])
            else [],
            module__cat_dims=kwargs["cat_dims"]
            if not _is_old_empty(kwargs["cat_dims"])
            else [],
            module__cat_emb_dim=kwargs["cat_emb_dim"]
            if not _is_old_empty(kwargs["cat_emb_dim"])
            else [1],
            module__output_dim=1,
            predict_nonlinearity=None,
            verbose=0,
        )

        m.set_skl(skl)
        m.save()




[docs]
class TabularRegressor(SyML_abstract, node.Node):
    name = "Tabular Regressor (Experimental)"
    nodeid = "com.sympathyfordata.advancedmachinelearning.tabularregressor"
    author = "Jannes Germishuys"
    icon = "table_classifier.svg"
    tags = Tags(Tag.MachineLearning.Apply)
    descriptor = MLPClassifierDescriptor()
    description = "Skorch Table Regressor \
                   https://skorch.readthedocs.io/en/stable/regressor.html"
    descriptor.name = name

    info = [
        [
            "Architecture",
            {
                "name": "max_epochs",
                "dispname": "Maximum number of epochs",
                "desc": "The number of epochs to train for each fit.",
                "type": IntType(default=20, min_value=0),
            },
            {
                "name": "optimizer",
                "dispname": "Optimizer",
                "desc": "The optimizer (update rule) used to optimize the module",
                "type": StringSelectionType(["sgd", "adam"], default="adam"),
            },
            {
                "name": "lr",
                "dispname": "Learning rate",
                "desc": "Learning rate",
                "type": FloatType(default=1e-6, min_value=0.0),
            },
            {
                "name": "cross_validation",
                "dispname": "Cross validation",
                "desc": "Cross validation",
                "type": BoolType(default=False),
            },
            {
                "name": "early_stopping",
                "dispname": "Early stopping",
                "desc": "Early stopping",
                "type": BoolType(default=False),
            },
        ],
        [
            "Embedding setup",
            {
                "name": "cat_idxs",
                "dispname": "Ids of categorical columns",
                "desc": "Ids of categorical columns",
                "type": IntListType(default=[]),
            },
            {
                "name": "cat_dims",
                "dispname": "Number of categories for each categorical column",
                "desc": "Number of categories for each categorical column",
                "type": IntListType(default=[]),
            },
            {
                "name": "cat_emb_dim",
                "dispname": "Embedding dimension for each categorical column",
                "desc": "Embedding dimension for each categorical column",
                "type": IntListType(default=[]),
            },
        ],
    ]

    descriptor.set_info(info)
    descriptor.set_attributes(
        [
            {"name": "history_", "desc": "Model training history"}
        ]
    )

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort("Model", "model")])
    __doc__ = SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs
    )

    def execute(self, node_context):
        from sylib_aml.tabular_net import TabNet

        m = node_context.output["model"]
        desc = self.__class__.descriptor
        m.set_desc(desc)

        optim_dict = {"sgd": _torch().optim.SGD, "adam": _torch().optim.Adam}

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)

        skl = _skorch().NeuralNetRegressor(
            TabNet,
            max_epochs=kwargs["max_epochs"],
            lr=kwargs["lr"],
            train_split=_skorch().dataset.CVSplit(5)
            if kwargs["cross_validation"] else None,
            callbacks=[_skorch().callbacks.EarlyStopping(
                monitor='valid_loss', patience=5,
                threshold=0.0001)]
            if kwargs["early_stopping"] and kwargs["cross_validation"]
            else None,
            criterion=_torch().nn.MSELoss,
            optimizer=optim_dict[kwargs["optimizer"]],
            batch_size=None,
            module__cat_idxs=kwargs["cat_idxs"]
            if not _is_old_empty(kwargs["cat_idxs"])
            else [],
            module__cat_dims=kwargs["cat_dims"]
            if not _is_old_empty(kwargs["cat_dims"])
            else [],
            module__cat_emb_dim=kwargs["cat_emb_dim"]
            if not _is_old_empty(kwargs["cat_emb_dim"])
            else [],
            module__output_dim=1,
            verbose=0,
        )

        m.set_skl(skl)
        m.save()
Source code for node_neuralnetwork

Sympathy for Data

Navigation

Related Topics