Source code for node_match_tables

# This file is part of Sympathy for Data.
# Copyright (c) 2013, 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import numpy as np
from sympathy.api import table
from sympathy.api import node as synode
from sympathy.api import node_helper
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags


class MatchTablesBase:
    author = 'Greger Cronquist'
    icon = 'match_tables.svg'
    tags = Tags(Tag.DataProcessing.TransformStructure)

    parameters = synode.parameters()
    (_fill_repeat_last,
     _fill_zero_or_empty,
     _fill_nan_or_empty) = _fill_options = [
         'Last value', '0.0 or empty string', 'np.NaN or empty string']
    parameters.set_list(
        'fill', value=[0], label='Extend values',
        description=(
            'Specify the values to use if the input has to be extended.'),
        plist=_fill_options,
        editor=synode.editors.combo_editor())

    def _match_table(self, parameters, guide, input_table):
        guide_length = guide.number_of_rows()
        table_length = input_table.number_of_rows()

        if guide_length == 0:
            output_table = input_table[:0]
        else:
            if guide_length == table_length:
                output_table = input_table
            elif guide_length < table_length:
                output_table = input_table[:guide_length]
            elif guide_length > table_length:
                output_table = table.File()
                fill_table = table.File()
                fill_method = parameters['fill'].selected
                length_diff = guide_length - table_length

                for name in input_table.column_names():
                    dtype = input_table.column_type(name)

                    if fill_method == self._fill_repeat_last:
                        # Repeat last value:
                        last_value = input_table.get_column_to_array(
                            name, table_length - 1)

                        if np.ma.is_masked(last_value):
                            column = np.ma.masked_all(length_diff, dtype=dtype)
                        else:
                            column = np.full(length_diff, last_value,
                                             dtype=dtype)
                    elif fill_method == self._fill_zero_or_empty:
                        # Use zero-like fill values.
                        if dtype.kind in ['S', 'U']:
                            dtype = dtype.kind + '1'
                        else:
                            dtype = dtype
                        column = np.zeros(length_diff, dtype=dtype)
                    elif fill_method == self._fill_nan_or_empty:
                        # Use NaN or empty string.
                        if dtype.kind in ['S', 'U']:
                            dtype = dtype.kind + '1'
                            column = np.zeros(length_diff, dtype=dtype)
                        else:
                            column = np.full(length_diff, np.nan, dtype='f2')
                    else:
                        raise AssertionError("Unknown extension method.")

                    fill_table[name] = column

                output_table.vjoin([input_table, fill_table])
                output_table.set_attributes(input_table.get_attributes())
                output_table.set_name(input_table.get_name())
        return output_table


[docs] class MatchTwoTables(MatchTablesBase, synode.Node): """ The Table connected to the upper input port is used as reference while the Table coming in through the lower port is the one that is going to be modified. The modification can either be a contraction or an extension of the Table depending if it is longer or shorter than the reference Table, respectively. The extension will be preformed according to one of the following strategies: - Use last value - Fill with zeroes (or empty strings/dates or similar) - Fill with NaNs (or None or similar) """ name = 'Match Table lengths' nodeid = 'org.sysess.sympathy.data.table.matchtwotables' description = ('Ensure that two Tables match (i.e. have the same number ' 'of rows) by adding or removing rows in one of them.') inputs = Ports([ Port.Table('Guide', name='guide'), Port.Table('Input Table', name='input')]) outputs = Ports([Port.Table('Length matched Table', name='output')]) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def execute(self, node_context): output = self._match_table(node_context.parameters, node_context.input['guide'], node_context.input['input']) node_context.output['output'].update(output)
[docs] @node_helper.list_node_decorator(['guide', 'input'], ['output']) class MatchTwoTablesMultiple(MatchTwoTables): name = 'Match Tables lengths' nodeid = 'org.sysess.sympathy.data.table.matchtwotablesmultiple'