Source code for node_hjoin_tables

# This file is part of Sympathy for Data.
# Copyright (c) 2013, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import itertools
from sympathy.api import node as synode
from sympathy.api import node_helper
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags
from sympathy.api.exceptions import SyDataError


COMMON_DOCS = """
The operation of horizontal join, or HJoin, stacks the columns in the incoming
Tables horizontally, next to each other. The colunms in the output Table will
be union of columns from the input Tables.

If the option 'Complement missing rows' is unchecked (the default) then all the
inpnut Tables must have the same number of rows. Otherwise, Tables with fewer
rows are padded with masked values.

"""

MULTI_PORT_DOCS = """
If a column name exists in more than one input Table the column from the lower
port will take precedence and the corresponding column from the upper port will
be lost.

The second port can be duplicated, making it possible to join several inputs
using one node. To join a dynamic number of tables use
:ref:`org.sysess.sympathy.data.table.hjointablessingle`.
"""

SINGLE_PORT_DOCS = """
If a column name exists in more than one input Table such columns from Tables
later in the list will take precedence and the corresponding columns from Table
earlier in the list will be lost.

"""

TABLE_NAME_DOCS = """
Table name
==========
The node always tries to give the output table a name, so if the chosen
port has a table without name, the other port will be used. This is
to preserve backwards compatibility.
"""


def _join_table(table1, table2, first_name, mask=False, rename=False):
    if ((table1.number_of_rows() != table2.number_of_rows())
            and table1.number_of_columns() and table2.number_of_columns()
            and not mask):
        raise SyDataError(
            'Number of rows mismatch in tables ({} vs {})'.format(
                table1.number_of_rows(), table2.number_of_rows()))
    name1 = table1.get_name()
    name2 = table2.get_name()
    if not name1:
        name = name2
    elif not name2:
        name = name1
    else:
        name = name1 if first_name else name2
    table1.hjoin(table2, mask=mask, rename=rename)
    table1.set_name(name)
    return table1


class HJoinTableSuper(synode.Node):
    author = "Alexander Busck"
    icon = 'hjoin_table.svg'
    tags = Tags(Tag.DataProcessing.TransformStructure)
    related = [
        'org.sysess.sympathy.data.table.hjointable',
        'org.sysess.sympathy.data.table.hjointables',
        'org.sysess.sympathy.data.table.hjointablessingle',
        'org.sysess.sympathy.data.table.hsplittablenode',
    ]

    parameters = synode.parameters()
    parameters.set_boolean(
        'mask', value=True, label='Complement missing rows',
        description='Select if rows that are not represented in all '
                    'Tables should be complemented with masked values')
    parameters.set_boolean(
        'rename', value=False, label='Rename duplicate columns',
        description=('If true columns with same name as an earlier column '
                     'will be renamed and joined, otherwise columns will '
                     'overwrite existing data'))
    (_name_lower, _name_upper) = _name_options = ['Lower', 'Upper']
    parameters.set_list(
        'name',
        plist=_name_options,
        label='Input port name for joined table',
        description='Select which port decides the output table(s) names',
        value=[0],
        editor=synode.editors.combo_editor())


[docs]class HJoinTable(HJoinTableSuper): __doc__ = COMMON_DOCS + MULTI_PORT_DOCS + TABLE_NAME_DOCS name = 'HJoin Table' nodeid = 'org.sysess.sympathy.data.table.hjointable' description = ( 'HJoin Tables into a single Table, ' 'stacking columns next to each other') inputs = Ports([ Port.Custom('table', 'Input Table', name='port1', n=(1, 1)), Port.Custom('table', 'Input Table', name='port2', n=(1, None))]) outputs = Ports([Port.Table( 'Table with horizontally joined data', name='port1')]) def execute(self, ctx): out_table = ctx.output['port1'] iter_in_tables1 = iter(ctx.input.group('port1')) in_table1 = next(iter_in_tables1) for in_table2 in itertools.chain( iter_in_tables1, ctx.input.group('port2')): _join_table(in_table1, in_table2, ctx.parameters['name'].selected == self._name_upper, mask=ctx.parameters['mask'].value, rename=ctx.parameters['rename'].value) out_table.source(in_table1)
[docs]@node_helper.list_node_decorator(['port1', 'port2'], ['port1']) class HJoinTables(HJoinTable): nodeid = 'org.sysess.sympathy.data.table.hjointables' name = 'HJoin Tables pairwise' description = ( 'HJoin lists of Tables pairwise into a single list of Tables, ' 'stacking columns next to each other.')
[docs]class HJoinTablesSingle(synode.Node): __doc__ = COMMON_DOCS + SINGLE_PORT_DOCS + TABLE_NAME_DOCS author = "Greger Cronquist" icon = 'hjoin_table.svg' name = 'HJoin Tables' description = ( 'HJoin a list of Tables into a single Table, ' 'stacking columns next to each other') nodeid = 'org.sysess.sympathy.data.table.hjointablessingle' tags = Tags(Tag.DataProcessing.TransformStructure) related = [ 'org.sysess.sympathy.data.table.hjointable', 'org.sysess.sympathy.data.table.hjointables', 'org.sysess.sympathy.data.table.hjointablessingle', 'org.sysess.sympathy.data.table.hsplittablenode', ] inputs = Ports([Port.Tables('Input Tables', name='port1')]) outputs = Ports([ Port.Table('Table with horizontally joined data from the incoming ' 'list of Tables.', name='port1')]) parameters = synode.parameters() parameters.set_boolean( 'mask', value=True, label='Complement missing rows', description='Select if rows that are not represented in all ' 'Tables should be complemented with masked values') parameters.set_boolean( 'rename', value=False, label='Rename duplicate columns', description=('If true columns with same name as an earlier column ' 'will be renamed and joined, otherwise columns will ' 'overwrite existing data')) def execute(self, ctx): in_files = ctx.input['port1'] out_tablefile = ctx.output['port1'] if not in_files: return for i, table in enumerate(in_files): progress = (100.0 * i) / len(in_files) self.set_progress(progress) out_tablefile.hjoin( table, mask=ctx.parameters['mask'].value, rename=ctx.parameters['rename'].value) out_tablefile.set_name(in_files[-1].get_name())