Source code for node_merge_tables

# This file is part of Sympathy for Data.
# Copyright (c) 2013, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
"""
Merge two tables or two lists of tables (database style) using these nodes:
    - :ref:`Merge Table`
    - :ref:`Merge Tables`

Internally uses `pandas.DataFrame.merge <https://pandas.pydata.org/pandas-docs/
stable/generated/pandas.DataFrame.merge.html>`_ for More information see that
documentation.

Essentially, this node calls:

.. code-block:: python

    pandas.merge(
        input_a, input_b, how=join_operation,
        on=index_column)

Values for Join Operation are:

    - Union, similar to SQL full outer join
    - Intersection, similar to SQL inner join
    - Index from A, similar to SQL left outer join
    - Index from B, similar to SQL right outer join

"""
from sympathy.api import node
from sympathy.api import node_helper
from sympathy.api import table
from sympathy.api import exceptions
from sympathy.api.nodeconfig import Tag, Tags, adjust, Ports, Port
import pandas
import itertools


MERGE_OPERATIONS = dict([
    ('Union', 'outer'),
    ('Intersection', 'inner'),
    ('Index from A', 'left'),
    ('Index from B', 'right')])


[docs]class MergeTable(node.Node): author = 'Greger Cronquist' version = '1.0' description = 'Merge Tables while matching an Index' tags = Tags(Tag.DataProcessing.TransformStructure) icon = 'merge.svg' name = 'Merge Table' nodeid = 'org.sysess.data.table.mergetable' inputs = Ports([ Port.Table('Input A', name='Input A'), Port.Table('Input B', name='Input B'), ]) outputs = Ports([ Port.Table('Output', name='Output'), ]) parameters = node.parameters() parameters.set_list( 'index', label='Index column', values=[0], description='Column with indices to match', editor=node.editors.combo_editor(edit=True)) parameters.set_list( 'operation', label='Join operation', description='Column with y values.', list=list(MERGE_OPERATIONS.keys()), value=[0], editor=node.editors.combo_editor()) def adjust_parameters(self, ctx): adjust(ctx.parameters['index'], ctx.input['Input A']) def execute(self, ctx): index_param = ctx.parameters['index'] index_column = index_param.selected operation = ctx.parameters['operation'].selected table_a = ctx.input['Input A'] table_b = ctx.input['Input B'] out_table = ctx.output['Output'] if (table_a.is_empty() and not table_b.is_empty()): out_table.source(table_b) elif (table_b.is_empty() and not table_a.is_empty()): out_table.source(table_a) elif (table_b.is_empty() and table_a.is_empty()): return else: dataframe_a = table_a.to_dataframe() dataframe_b = table_b.to_dataframe() try: new_table = pandas.merge( dataframe_a, dataframe_b, how=MERGE_OPERATIONS[operation], on=index_column) except Exception: col_a = table_a._require_column(index_param) col_b = table_b._require_column(index_param) if col_a.dtype.kind != col_b.dtype.kind: # Assume problem due to unmatched types. raise exceptions.SyDataError( 'Failed to merge, are the two index columns of the ' 'compatible types?' ) raise out_table.source(table.File.from_dataframe(new_table)) attributes_a = table_a.get_attributes() attributes_b = table_b.get_attributes() attributes_c = tuple(dict(itertools.chain(attributes_a[i].items(), attributes_b[i].items())) for i in range(2)) out_table.set_attributes(attributes_c)
[docs]@node_helper.list_node_decorator(['Input A', 'Input B'], ['Output']) class MergeTables(MergeTable): name = 'Merge Tables' nodeid = 'org.sysess.data.table.mergetables'