Source code for node_pipeline

# Copyright (c) 2017, System Engineering Software Society
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the System Engineering Software Society nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.
# IN NO EVENT SHALL SYSTEM ENGINEERING SOFTWARE SOCIETY BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import (print_function, division, unicode_literals,
                        absolute_import)
import copy
import PySide.QtGui as QtGui
import sklearn, sklearn.base, sklearn.exceptions, sklearn.pipeline

from sympathy.api import node
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags
from sympathy.api.exceptions import SyNodeError, SyDataError, sywarn

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.descriptors import *
from sylib.machinelearning.utility import *
from sylib.machinelearning.pipeline import PipelineDescriptor
from sylib.machinelearning.abstract_nodes import SyML_abstract

[docs]class Pipeline(node.Node): name = 'Pipeline' author = 'Mathias Broxvall' copyright = '(C) 2017 System Engineering Software Society' version = '0.1' icon = 'pipeline.svg' description = 'Applies one model on the output of another' nodeid = 'org.sysess.sympathy.machinelearning.pipeline' tags = Tags(Tag.MachineLearning.Apply) inputs = Ports([ModelPort('models','models', n=(2,)),]) outputs = Ports([ModelPort('Output model','out-model')]) descriptor = PipelineDescriptor() descriptor.name = name descriptor.set_info([]) parameters = node.parameters() parameters.set_string( 'names', value='', label='Model names: ', description=( 'Comma separated list of model names, eg. Rescale, SVC. ' 'If fewer names are given than models then default names will be used.')) parameters.set_boolean( 'flatten', value='True', label='Flatten: ', description=( 'Flattens multiple pipeline objects into a single pipeline ' 'containing all models')) __doc__ = SyML_abstract.generate_docstring2( description, parameters, inputs, outputs) def execute(self, node_context): out_model = node_context.output['out-model'] models = node_context.input.group('models') names_raw = node_context.parameters['names'].value flatten = node_context.parameters['flatten'].value name_list = [x.strip() for x in names_raw.split(',')] name_list = list(filter(lambda x: x != "", name_list)) if len(name_list) < len(models): for i in range(len(name_list), len(models)): model = models[i] model.load() desc = model.get_desc() name_list.append(desc.name) else: name_list = name_list[:len(models)] descs = [] skls = [] names = [] for i, model in enumerate(models): model.load() desc = model.get_desc() if flatten and isinstance(desc, PipelineDescriptor): names += [tpl[0] for tpl in desc.models] descs += [tpl[1] for tpl in desc.models] skls += [tpl[2] for tpl in desc.models] else: names.append(name_list[i]) descs.append(model.get_desc()) skls.append(model.get_skl()) desc = self.__class__.descriptor.new() out_model.set_desc(desc) out_model.set_skl(sklearn.pipeline.Pipeline(list(zip(names, skls)))) desc.models = list(zip(names, descs, skls)) out_model.save()
[docs]class SplitPipeline(node.Node): name = 'Pipeline decomposition' author = 'Mathias Broxvall' copyright = '(C) 2017 System Engineering Software Society' version = '0.1' icon = 'pipeline_split.svg' description = 'Pick out given model from a fitted pipeline' nodeid = 'org.sysess.sympathy.machinelearning.pipeline_split' tags = Tags(Tag.MachineLearning.Apply) inputs = Ports([ModelPort('model','model'),]) outputs = Ports([ModelPort('Output model','out-model')]) parameters = node.parameters() parameters.set_string( 'name', value='A', label='Model name or index: ', description='Index (0 to N) or name of model to pick out from pipeline') __doc__ = SyML_abstract.generate_docstring2( description, parameters, inputs, outputs) def execute(self, node_context): out_model = node_context.output['out-model'] model = node_context.input['model'] name = node_context.parameters['name'].value model.load() desc = model.get_desc() out_desc = None, None try: index = int(name) _, out_desc, out_skl = desc.models[index] except ValueError: for n, d, s in desc.models: if n == name: out_desc, out_skl = d, s break if out_desc is not None: out_model.set_desc(out_desc) out_model.set_skl(out_skl) out_model.save()