Source code for node_isolationforest

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import warnings

import sklearn
# Ignore a warning from numpy>=1.15.2 when importing sklearn.ensemble
# See issue #2768 for details.
with warnings.catch_warnings():
    warnings.simplefilter('ignore', DeprecationWarning)
    import sklearn.ensemble

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.decisiontrees import IsolationForestDescriptor

from sylib.machinelearning.descriptors import BoolType
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType



[docs]
class IsolationForest(SyML_abstract, node.Node):
    name = 'Isolation Forest'
    author = 'Mathias Broxvall'
    icon = 'isolation_forest.svg'
    description = (
        'Predicts outliers based on minimum path length of random trees with '
        'single nodes in the leafs.')
    nodeid = 'org.sysess.sympathy.machinelearning.isolation_forest'
    tags = Tags(Tag.MachineLearning.Unsupervised)

    descriptor = IsolationForestDescriptor()
    descriptor.name = name
    info = [
        [
            "Model",
            {'name': 'n_estimators',
             'dispname': 'Number of estimators',
             'type': IntType(min_value=0, default=100)},
            {'name': 'max_samples',
             'dispname': 'Number of samples',
             'type': UnionType([
                 IntType(),
                 FloatType(),
                 StringSelectionType(['auto'])],
                               default='auto'),
             'desc': (
                 'The number of samples to draw from X to train each base '
                 'estimator  expressed as number of samples (int), or a '
                 'fraction of all samples (float). If "auto" then a maximum '
                 'of 256 samples will be used (less when fewer input samples '
                 'given)'
             )},
            {'name': 'contamination',
             'dispname': 'Contamination',
             'type': FloatType(min_value=0, max_value=0.5, default=0.1)},
            {'name': 'max_features',
             'dispname': 'Number of features',
             'type': UnionType([
                 IntType(min_value=1),
                 FloatType(min_value=0.0, max_value=1.0)],
                default=1.0)},
            {'name': 'bootstrap',
             'dispname': 'Bootstrap',
             'type': BoolType(default=False)},
        ],
        [
            "Solver",
            {'name': 'n_jobs',
             'dispname': 'Number of jobs',
             'type': IntType(min_value=-1, default=1)},
            {'name': 'random_state',
             'dispname': 'Random seed',
             'type': UnionType([
                 IntType(), NoneType()], default=None)},
        ]
    ]

    descriptor.set_info(info, doc_class=sklearn.ensemble.IsolationForest)

    descriptor.set_attributes([
        {'name': 'estimators_samples_'},
        {'name': 'max_samples_'},
    ], doc_class=sklearn.ensemble.IsolationForest)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ = SyML_abstract.generate_docstring(
        description, descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)

        skl = sklearn.ensemble.IsolationForest(**kwargs)

        model.set_skl(skl)
        model.save()
Source code for node_isolationforest

Sympathy for Data

Navigation

Related Topics