Source code for node_DecisionTreeClassifier

# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import inspect
import sklearn
import sklearn.tree

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.decisiontrees import DecisionTreeDescriptor
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.utility import names_from_x

from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import NoneType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import UnionType



[docs]
class DecisionTreeClassifier(SyML_abstract, node.Node):
    name = 'Decision Tree Classifier'
    author = 'Mathias Broxvall'
    icon = 'tree.svg'
    description = (
        'Decision Trees (DTs) are a non-parametric supervised learning method'
        'used for classification and regression. The goal is to create a model'
        'that predicts the value of a target variable by learning simple'
        'decision rules inferred from the data features.')
    nodeid = 'org.sysess.sympathy.machinelearning.decision_tree_classifier'
    tags = Tags(Tag.MachineLearning.Supervised)

    # Test for existance of 'impurity_decrease' parameter (scikit-learn 0.19+)
    param_impurity_decrease = (
        'min_impurity_decrease' in inspect.signature(
            sklearn.tree.DecisionTreeClassifier.__init__).parameters
    )

    descriptor = DecisionTreeDescriptor()
    descriptor.name = name
    info = [
        [
            "Tree options",
            {'name': 'max_depth',
             'dispname': 'Maximum tree depth',
             'type': UnionType([IntType(min_value=1), NoneType()], default=3)},
            {'name': 'criterion',
             'dispname': 'Split quality criterion',
             'type': StringSelectionType(['gini', 'entropy'])},
            {'name': 'max_features',
             'dispname': 'Number of features to consider',
             'type': UnionType([IntType(min_value=1),
                               FloatType(min_value=0, max_value=1),
                               NoneType(),
                               StringSelectionType(['auto', 'sqrt', 'log2'])],
                               default=None)},
            {'name': 'min_samples_split',
             'dispname': 'Minimum samples required to split',
             'type': UnionType([IntType(min_value=0), FloatType(
                 min_value=0, max_value=1)], default=2)},
            {'name': 'min_samples_leaf',
             'dispname': 'Minimum samples required for leaf node',
             'type': UnionType([IntType(min_value=0), FloatType(
                 min_value=0, max_value=1)], default=1)},
            {'name': 'max_leaf_nodes',
             'dispname': 'Maximum of leaf nodes',
             'type': UnionType([IntType(min_value=0), NoneType()],
                               default=None)},
        ],
        [
            "Advanced options",
            {'name': 'min_weight_fraction_leaf',
             'dispname': 'Min. weighted fraction of weights for leaf node',
             'type': FloatType(default=0.)},
            {'name': 'splitter',
             'dispname': 'Splitting strategy',
             'type': StringSelectionType(['best', 'random'])},
            {'name': 'min_impurity_decrease',
             'dispname': 'Node splitting threshold',
             'type': FloatType(default=0.)},
        ],
        [
            "Model state",
            # {'name': 'presort',
            # 'dispname': 'Presort data',
            # 'type': BoolType(default=False)},
            {'name': 'random_state',
             'dispname': 'Random seed',
             'type': UnionType([NoneType(), IntType()], default=None)},
        ]
    ]

    descriptor.set_info(info, doc_class=sklearn.tree.DecisionTreeClassifier)

    descriptor.set_attributes([
        {'name': 'classes_'},
        {'name': 'feature_importances_', 'cnames': names_from_x},
        {'name': 'max_features_'},
        {'name': 'n_classes_'},
        {'name': 'n_features_'},
        {'name': 'n_outputs_'},
    ], doc_class=sklearn.tree.DecisionTreeClassifier)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ = SyML_abstract.generate_docstring(description, descriptor.info,
                                               descriptor.attributes, inputs,
                                               outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)
        skl = sklearn.tree.DecisionTreeClassifier(**kwargs)
        model.set_skl(skl)
        model.save()
Source code for node_DecisionTreeClassifier

Sympathy for Data

Navigation

Related Topics