Source code for node_clustering2

# This file is part of Sympathy for Data.
# Copyright (c) 2023, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
"""
Some of the docstrings for this module have been automatically
extracted from the `scikit-learn <http://scikit-learn.org/>`_ library
and are covered by their respective licenses.
"""

import sklearn
import sklearn.cluster

from sympathy.api import node
from sympathy.api.nodeconfig import Ports, Tag, Tags

from sylib.machinelearning.model import ModelPort
from sylib.machinelearning.abstract_nodes import SyML_abstract
from sylib.machinelearning.descriptors import Descriptor
from sylib.machinelearning.descriptors import FloatType
from sylib.machinelearning.descriptors import IntType
from sylib.machinelearning.descriptors import StringSelectionType

from sylib_aml.clustering2 import DBScanDescriptor

from packaging import version as pversion
sklearn_version = pversion.parse(sklearn.__version__)



[docs]
class DBScan(SyML_abstract, node.Node):
    name = 'Density based spatial clustering (DBSCAN)'
    author = 'Mathias Broxvall'
    icon = 'DBScan.svg'
    description = (
        'Finds core samples of high density and expands clusters from them. '
        'Good for data which contains clusters of similar density '
        'This model can be given to predict directly without using fit.'
    )
    nodeid = 'com.sympathyfordata.advancedmachinelearning.dbscan'
    tags = Tags(Tag.MachineLearning.Unsupervised)

    descriptor = DBScanDescriptor()
    descriptor.name = name
    info = [
        [
            "Model",
            {'name': 'eps',
             'dispname': 'Max distance between two samples',
             'type': FloatType(min_value=0, default=0.5)},
            {'name': 'min_samples',
             'dispname': 'Min samples for core point',
             'type': IntType(default=5)},
            {'name': 'metric',
             'dispname': 'Metric',
             'type': StringSelectionType([
                 'euclidean', 'cityblock', 'cosine', 'l1', 'l2', 'manhattan',
                 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice',
                 'hamming', 'jaccard', 'mahalanobis',
                 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
                 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'],
                                         default='euclidean')},
        ],
        [
            "Advanced",
            {'name': 'algorithm',
             'dispname': 'Algorithm',
             'type': StringSelectionType(
                 ['auto', 'ball_tree', 'kd_tree', 'brute'],
                 default='auto')},
            {'name': 'leaf_size',
             'dispname': 'Leaf size (for BallTree or cKDTree)',
             'type': IntType(default=30)},
            {'name': 'p',
             'dispname': 'Minkowski metric power',
             'type': FloatType(min_value=1.0, default=2.0)},
            {'name': 'n_jobs',
             'dispname': 'Number of jobs',
             'type': IntType(min_value=-1, default=1)},
        ]
    ]

    descriptor.set_info(info, doc_class=sklearn.cluster.DBSCAN)

    descriptor.set_attributes([
        {'name': attr_name} for attr_name in [
            'core_sample_indices_', 'components_', 'labels_'
        ]], doc_class=sklearn.cluster.DBSCAN)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ = SyML_abstract.generate_docstring(
        '', descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)
        skl = sklearn.cluster.DBSCAN(**kwargs)

        model.set_skl(skl)
        model.save()




[docs]
class AgglomerativeClustering(SyML_abstract, node.Node):
    name = 'Agglomerative Clustering'
    author = 'Emil Staf'
    icon = 'agglomerative_clustering.svg'
    description = (
        'Recursively merges the pair of clusters that minimally increases a '
        'given linkage distance.')
    nodeid = 'com.sympathyfordata.advancedmachinelearning' \
             '.agglomerativeclustering'
    tags = Tags(Tag.MachineLearning.Unsupervised)

    descriptor = Descriptor()
    descriptor.name = name
    info = [
        {'name': 'n_clusters',
         'dispname': 'Number of clusters',
         'type': IntType(default=2)},
        {'name': 'affinity',
         'dispname': 'Affinity',
         'type': StringSelectionType(
             ['euclidean', 'l1', 'l2', 'manhattan', 'cosine', 'precomputed'],
             default='euclidean')},
        # memory?
        # connectivity?
        # compute_full_tree (only useful when specifying a connectivity matrix)
        {'name': 'linkage',
         'type': StringSelectionType(
             ['ward', 'complete', 'average', 'single'],
             default='ward')},
    ]
    descriptor.set_info(
        info, doc_class=sklearn.cluster.AgglomerativeClustering)

    descriptor.set_attributes([
        {'name': attr_name} for attr_name in [
            'labels_', 'n_leaves_', 'n_components', 'children_'
        ]], doc_class=sklearn.cluster.AgglomerativeClustering)

    parameters = node.parameters()
    SyML_abstract.generate_parameters(parameters, descriptor)

    inputs = Ports([])
    outputs = Ports([ModelPort('Model', 'model')])
    __doc__ = SyML_abstract.generate_docstring(
        '', descriptor.info, descriptor.attributes, inputs, outputs)

    def execute(self, node_context):
        model = node_context.output['model']
        desc = self.__class__.descriptor
        model.set_desc(desc)

        kwargs = self.__class__.descriptor.get_parameters(
            node_context.parameters)

        params_120 = sklearn_version >= pversion.Version('1.2.0')
        # Change parameter name from affinity to metric
        if params_120:
            kwargs['metric'] = kwargs.pop('affinity')
        skl = sklearn.cluster.AgglomerativeClustering(**kwargs)

        model.set_skl(skl)
        model.save()
Source code for node_clustering2

Sympathy for Data

Navigation

Related Topics