# This file is part of Sympathy for Data.
# Copyright (c) 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import sklearn.neighbors
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.descriptors import Descriptor
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib.machinelearning.descriptors import StringType
from sylib.machinelearning.model import ModelPort
from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
[docs]
class KNeighborsClassifier(SyML_abstract, node.Node):
"""Neighbors-based classification is a type of instance-based learning or non-generalizing
learning: it does not attempt to construct a general internal model, but simply stores
instances of the training data. Classification is computed from a simple majority vote of the
nearest neighbors of each point: a query point is assigned the data class which has the most
representatives within the nearest neighbors of the point.
The learning is based on the `k` nearest neighbors of each query point, where `k` is an integer
value specified by the user. The optimal choice of the value `k` is highly data-dependent: in
general a larger `k` suppresses the effects of noise, but makes the classification boundaries
less distinct.
"""
name = 'k-Nearest Neighbors Classifier'
author = 'Alexander Aschikhin'
icon = 'knn.svg'
description = 'Classifier based on the k-nearest neighbors algorithm'
nodeid = 'org.sysess.sympathy.machinelearning.knn'
tags = Tags(Tag.MachineLearning.Supervised)
inputs = Ports([])
outputs = Ports([ModelPort('Output model', name='out-model')])
descriptor = Descriptor()
descriptor.name = name
info = [
[
'Model',
{'name': 'n_neighbors',
'dispname': 'Number of neighbors',
'type': IntType(min_value=1, default=5)},
{'name': 'weights',
'dispname': 'Weights',
'type': StringSelectionType(['uniform', 'distance'],
default='uniform')},
{'name': 'algorithm',
'dispname': 'Algorithm',
'type': StringSelectionType(
['ball_tree', 'kd_tree', 'brute', 'auto'], default='auto')},
],
[
'Advanced options',
{'name': 'leaf_size',
'dispname': 'Leaf size (for ball_tree or kd_tree)',
'type': IntType(min_value=1, default=30)},
{'name': 'metric',
'dispname': 'Metric',
'type': StringType(default='minkowski')},
{'name': 'p',
'dispname': 'Power parameter for the Minkowski metric',
'type': IntType(default=2)},
],
[
'Solver',
{'name': 'n_jobs',
'dispname': 'Number of jobs',
'type': IntType(min_value=-1, default=1)},
]
]
descriptor.set_info(info, doc_class=sklearn.neighbors.KNeighborsClassifier)
descriptor.set_attributes(
[], doc_class=sklearn.neighbors.KNeighborsClassifier)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
__doc__ += SyML_abstract.generate_docstring(
description, descriptor.info, descriptor.attributes, inputs, outputs)
def execute(self, node_context):
model = node_context.output['out-model']
desc = self.__class__.descriptor
model.set_desc(desc)
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
# ap = kwargs.pop('additional_params')
skl = sklearn.neighbors.KNeighborsClassifier(**kwargs)
model.set_skl(skl)
model.save()