# This file is part of Sympathy for Data.
# Copyright (c) 2023, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
"""
Some of the docstrings for this module have been automatically
extracted from the `scikit-learn <http://scikit-learn.org/>`_ library
and are covered by their respective licenses.
"""
import sklearn
import sklearn.cluster
from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags
from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.descriptors import Descriptor
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType
from sylib_aml.clustering2 import DBScanDescriptor
from packaging import version as pversion
sklearn_version = pversion.parse(sklearn.__version__)
[docs]
class DBScan(SyML_abstract, node.Node):
name = 'Density based spatial clustering (DBSCAN)'
author = 'Mathias Broxvall'
icon = 'DBScan.svg'
description = (
'Finds core samples of high density and expands clusters from them. '
'Good for data which contains clusters of similar density '
'This model can be given to predict directly without using fit.'
)
nodeid = 'com.sympathyfordata.advancedmachinelearning.dbscan'
tags = Tags(Tag.MachineLearning.Unsupervised)
descriptor = DBScanDescriptor()
descriptor.name = name
info = [
[
"Model",
{'name': 'eps',
'dispname': 'Max distance between two samples',
'type': FloatType(min_value=0, default=0.5)},
{'name': 'min_samples',
'dispname': 'Min samples for core point',
'type': IntType(default=5)},
{'name': 'metric',
'dispname': 'Metric',
'type': StringSelectionType([
'euclidean', 'cityblock', 'cosine', 'l1', 'l2', 'manhattan',
'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice',
'hamming', 'jaccard', 'mahalanobis',
'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'],
default='euclidean')},
],
[
"Advanced",
{'name': 'algorithm',
'dispname': 'Algorithm',
'type': StringSelectionType(
['auto', 'ball_tree', 'kd_tree', 'brute'],
default='auto')},
{'name': 'leaf_size',
'dispname': 'Leaf size (for BallTree or cKDTree)',
'type': IntType(default=30)},
{'name': 'p',
'dispname': 'Minkowski metric power',
'type': FloatType(min_value=1.0, default=2.0)},
{'name': 'n_jobs',
'dispname': 'Number of jobs',
'type': IntType(min_value=-1, default=1)},
]
]
descriptor.set_info(info, doc_class=sklearn.cluster.DBSCAN)
descriptor.set_attributes([
{'name': attr_name} for attr_name in [
'core_sample_indices_', 'components_', 'labels_'
]], doc_class=sklearn.cluster.DBSCAN)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort('Model', 'model')])
__doc__ = SyML_abstract.generate_docstring(
'', descriptor.info, descriptor.attributes, inputs, outputs)
def execute(self, node_context):
model = node_context.output['model']
desc = self.__class__.descriptor
model.set_desc(desc)
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
skl = sklearn.cluster.DBSCAN(**kwargs)
model.set_skl(skl)
model.save()
[docs]
class AgglomerativeClustering(SyML_abstract, node.Node):
name = 'Agglomerative Clustering'
author = 'Emil Staf'
icon = 'agglomerative_clustering.svg'
description = (
'Recursively merges the pair of clusters that minimally increases a '
'given linkage distance.')
nodeid = 'com.sympathyfordata.advancedmachinelearning' \
'.agglomerativeclustering'
tags = Tags(Tag.MachineLearning.Unsupervised)
descriptor = Descriptor()
descriptor.name = name
info = [
{'name': 'n_clusters',
'dispname': 'Number of clusters',
'type': IntType(default=2)},
{'name': 'affinity',
'dispname': 'Affinity',
'type': StringSelectionType(
['euclidean', 'l1', 'l2', 'manhattan', 'cosine', 'precomputed'],
default='euclidean')},
# memory?
# connectivity?
# compute_full_tree (only useful when specifying a connectivity matrix)
{'name': 'linkage',
'type': StringSelectionType(
['ward', 'complete', 'average', 'single'],
default='ward')},
]
descriptor.set_info(
info, doc_class=sklearn.cluster.AgglomerativeClustering)
descriptor.set_attributes([
{'name': attr_name} for attr_name in [
'labels_', 'n_leaves_', 'n_components', 'children_'
]], doc_class=sklearn.cluster.AgglomerativeClustering)
parameters = node.parameters()
SyML_abstract.generate_parameters(parameters, descriptor)
inputs = Ports([])
outputs = Ports([ModelPort('Model', 'model')])
__doc__ = SyML_abstract.generate_docstring(
'', descriptor.info, descriptor.attributes, inputs, outputs)
def execute(self, node_context):
model = node_context.output['model']
desc = self.__class__.descriptor
model.set_desc(desc)
kwargs = self.__class__.descriptor.get_parameters(
node_context.parameters)
params_120 = sklearn_version >= pversion.Version('1.2.0')
# Change parameter name from affinity to metric
if params_120:
kwargs['metric'] = kwargs.pop('affinity')
skl = sklearn.cluster.AgglomerativeClustering(**kwargs)
model.set_skl(skl)
model.save()