Source code for node_match_tables

# This file is part of Sympathy for Data.
# Copyright (c) 2013, 2017, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import numpy as np
from sympathy.api import table
from sympathy.api import node as synode
from sympathy.api import node_helper
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags


class MatchTablesBase:
    author = 'Greger Cronquist'
    version = '1.0'
    icon = 'match_tables.svg'
    tags = Tags(Tag.DataProcessing.TransformStructure)

    parameters = synode.parameters()
    (_fill_repeat_last,
     _fill_zero_or_empty,
     _fill_nan_or_empty) = _fill_options = [
         'Last value', '0.0 or empty string', 'np.NaN or empty string']
    parameters.set_list(
        'fill', value=[0], label='Extend values',
        description=(
            'Specify the values to use if the input has to be extended.'),
        plist=_fill_options,
        editor=synode.editors.combo_editor())

    def _match_table(self, parameters, guide, input_table):
        guide_length = guide.number_of_rows()
        table_length = input_table.number_of_rows()

        if guide_length == 0:
            output_table = input_table[:0]
        else:
            if guide_length == table_length:
                output_table = input_table
            elif guide_length < table_length:
                output_table = input_table[:guide_length]
            elif guide_length > table_length:
                output_table = table.File()
                fill_table = table.File()
                fill_method = parameters['fill'].selected
                length_diff = guide_length - table_length

                for name in input_table.column_names():
                    dtype = input_table.column_type(name)

                    if fill_method == self._fill_repeat_last:
                        # Repeat last value:
                        last_value = input_table.get_column_to_array(
                            name, table_length - 1)

                        if np.ma.is_masked(last_value):
                            column = np.ma.masked_all(length_diff, dtype=dtype)
                        else:
                            column = np.full(length_diff, last_value,
                                             dtype=dtype)
                    elif fill_method == self._fill_zero_or_empty:
                        # Use zero-like fill values.
                        if dtype.kind in ['S', 'U']:
                            dtype = dtype.kind + '1'
                        else:
                            dtype = dtype
                        column = np.zeros(length_diff, dtype=dtype)
                    elif fill_method == self._fill_nan_or_empty:
                        # Use NaN or empty string.
                        if dtype.kind in ['S', 'U']:
                            dtype = dtype.kind + '1'
                            column = np.zeros(length_diff, dtype=dtype)
                        else:
                            column = np.full(length_diff, np.nan, dtype='f2')
                    else:
                        assert False, 'Unknown extension method.'

                    fill_table[name] = column

                output_table.vjoin([input_table, fill_table])
                output_table.set_attributes(input_table.get_attributes())
                output_table.set_name(input_table.get_name())
        return output_table


[docs]class MatchTwoTables(MatchTablesBase, synode.Node): """ To compare the number of rows in two :ref:`Tables` and resize one of them, in order to have two Tables with equal numbers of rows, is the functionality of the nodes in the considered category. For example, this may be helpful if one would like to horisontal join two Tables with different number of rows, which is not possible according to the definition of a Table, see :ref:`Tables` and :ref:`HJoin Table`. In the procedure of the node, the Table connected to the upper of the two inputs is used as reference while the Table coming in through the lower port is the one that is going to be modified. The modification can either be a contraction or an extension of the Table depending if it is longer or shorter than the reference Table, respectively. The extension will be preformed according to one of the following strategies: - Use last value - Fill with zeroes (or empty strings/dates or similar) - Fill with NaNs (or None or similar) """ name = 'Match Table lengths' nodeid = 'org.sysess.sympathy.data.table.matchtwotables' description = ('Ensure that two Tables match (i.e. have the same number ' 'of rows) by adding or removing rows in one of them.') inputs = Ports([ Port.Table('Guide', name='guide'), Port.Table('Input Table', name='input')]) outputs = Ports([Port.Table('Length matched Table', name='output')]) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def execute(self, node_context): output = self._match_table(node_context.parameters, node_context.input['guide'], node_context.input['input']) node_context.output['output'].update(output)
[docs]@node_helper.list_node_decorator(['guide', 'input'], ['output']) class MatchTwoTablesMultiple(MatchTwoTables): name = 'Match Tables lengths' nodeid = 'org.sysess.sympathy.data.table.matchtwotablesmultiple'