# This file is part of Sympathy for Data.
# Copyright (c) 2013, 2017, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import re
from sympathy.api import node as synode
from sympathy.api import node_helper
from sympathy.api.nodeconfig import (Port, Ports, Tag, Tags,
adjust)
from sympathy.api.exceptions import sywarn
from sympathy.api import dtypes
COMMON_DOCS = """
There are many situations where you may want to throw away some of the columns
of a table. Perhaps the amount of data is large and you want to trim it to
increase performance, or perhaps some column was just needed as an intermediary
step in some analysis.
"""
BASE_SELECT_NODEIDS = [
'org.sysess.sympathy.data.table.selecttablecolumns',
'org.sysess.sympathy.data.table.selecttablecolumnstype',
'org.sysess.sympathy.data.table.selecttablecolumnsregex',
'org.sysess.sympathy.data.table.selecttablecolumnsfromtable',
]
[docs]
class SelectColumnsTable(synode.Node):
__doc__ = COMMON_DOCS
author = 'Alexander Busck & Erik der Hagopian'
description = 'Select columns from input to propagate to output.'
icon = 'select_table_columns.svg'
name = 'Select columns in Table'
nodeid = 'org.sysess.sympathy.data.table.selecttablecolumns'
tags = Tags(Tag.DataProcessing.Select)
related = (['org.sysess.sympathy.data.table.selecttablescolumns']
+ BASE_SELECT_NODEIDS)
inputs = Ports([Port.Table('Input')])
outputs = Ports([Port.Custom('table', 'Output', preview=True)])
parameters = synode.parameters()
editor = synode.editors.multilist_editor(edit=True)
parameters.set_list(
'columns', label='Select columns', description='Select columns.',
value=[], editor=editor)
def adjust_parameters(self, node_context):
adjust(node_context.parameters['columns'], node_context.input[0])
def update_parameters(self, old_params):
cols = old_params['columns']
if not cols.editor.get('mode', False):
try:
complement = old_params['complement'].value
del old_params['complement']
except KeyError:
complement = False
if complement:
cols.multiselect_mode = 'unselected'
else:
cols.multiselect_mode = 'selected_exists'
def execute(self, node_context):
in_table = node_context.input[0]
out_table = node_context.output[0]
self.select_columns(
in_table, out_table, node_context.parameters['columns'])
@staticmethod
def select_columns(input_table, output_table, parameter):
output_table.set_name(input_table.get_name())
output_table.set_table_attributes(input_table.get_table_attributes())
for name in parameter.selected_names(input_table.column_names()):
output_table.update_column(name, input_table, name)
[docs]
@node_helper.list_node_decorator([0], [0])
class SelectColumnsTables(SelectColumnsTable):
name = 'Select columns in Tables'
nodeid = 'org.sysess.sympathy.data.table.selecttablescolumns'
def select_columns(input_table, output_table, column_names, complement=False):
if complement:
input_column_names = input_table.column_names()
for column in column_names:
try:
input_column_names.remove(column)
except ValueError:
pass
for column in input_column_names:
output_table.update_column(column, input_table, column)
else:
for column in input_table.column_names():
if column in column_names:
output_table.update_column(column, input_table, column)
[docs]
class SelectTableColumnsFromTable(synode.Node):
name = 'Select columns in Table with Table'
description = ('Select columns in Table by using column '
'in selection Table.')
icon = 'select_table_columns.svg'
nodeid = 'org.sysess.sympathy.data.table.selecttablecolumnsfromtable'
author = 'Greger Cronquist'
tags = Tags(Tag.DataProcessing.Select)
related = (['org.sysess.sympathy.data.table.selecttablecolumnsfromtables']
+ BASE_SELECT_NODEIDS)
inputs = Ports([
Port.Table('Selection', name='port1'),
Port.Table('Input Table', name='port2')])
outputs = Ports([
Port.Table('Table with columns in Selection removed', name='port1')])
parameters = synode.parameters()
parameters.set_boolean(
'complement', value=False, label="Remove selected columns",
description=(
'When enabled, the selected columns will be removed. '
'When disabled, the non-selected columns will be '
'removed.'))
parameters.set_list(
'selection_column', label="Column with column names",
description=('Select column in Selection Table '
'used for column name filtration.'),
value=[0],
editor=synode.editors.combo_editor(edit=True, filter=True))
def adjust_parameters(self, node_context):
adjust(node_context.parameters['selection_column'],
node_context.input['port1'])
def execute(self, node_context):
"""Execute"""
selection_column_name = (
node_context.parameters['selection_column'].selected)
select_complement = node_context.parameters['complement'].value
selection_table = node_context.input['port1']
input_table = node_context.input['port2']
output_table = node_context.output['port1']
output_table.set_name(input_table.get_name())
output_table.set_table_attributes(input_table.get_table_attributes())
if input_table.is_empty():
return
if selection_column_name in selection_table.column_names():
column_names = [column_name for column_name in
selection_table.get_column_to_array(
selection_column_name)]
else:
sywarn('The selected column does not seem to exist. '
'Assuming empty input.')
column_names = []
select_columns(input_table, output_table, column_names,
complement=select_complement)
[docs]
@node_helper.list_node_decorator(['port2'], ['port1'])
class SelectTableColumnsFromTables(SelectTableColumnsFromTable):
name = 'Select columns in Tables with Table'
nodeid = 'org.sysess.sympathy.data.table.selecttablecolumnsfromtables'
[docs]
class SelectTableColumnsRegex(synode.Node):
"""
Select all columns whose names match a regular expressions (regex). For
more information about how to write regex, see :ref:`appendix_regex`.
"""
author = "Magnus Sandén"
name = 'Select columns in Table with Regex'
description = "Select all columns whose names match a regex."
nodeid = 'org.sysess.sympathy.data.table.selecttablecolumnsregex'
icon = 'select_table_columns.svg'
tags = Tags(Tag.DataProcessing.Select)
related = BASE_SELECT_NODEIDS
inputs = Ports([Port.Table('Input Table', name='port1')])
outputs = Ports([Port.Table(
'Table with a subset of the incoming columns', name='port2')])
parameters = synode.parameters()
parameters.set_boolean(
'complement', value=False,
label="Remove matching columns",
description=(
'When enabled, matching columns will be removed. '
'When disabled, non-matching columns will be removed.'))
parameters.set_boolean(
'full_match', value=False, # TODO Migrate to drop the parameter as it
# is only match at start
label="Match at column name start",
description=(
'When enabled, only matching pattern at the start'
'of column name will be found.'
'When disabled, matching pattern in any part of'
'column name will be found.'))
parameters.set_string(
'regex', label='Search',
description=(
'Regex search pattern for matching column names. Learn more about '
'Regular expression syntax in the documentation appendix.'),
value="")
def execute(self, node_context):
parameters = node_context.parameters
regex = re.compile(parameters['regex'].value)
complement = parameters['complement'].value
full_match = parameters['full_match'].value
input_table = node_context.input['port1']
output_table = node_context.output['port2']
output_table.set_name(input_table.get_name())
output_table.set_table_attributes(input_table.get_table_attributes())
regex_fn = regex.match if full_match else regex.search
for column in input_table.column_names():
if bool(regex_fn(column)) != complement:
output_table.update_column(column, input_table, column)
class SelectColumnTypesSuper(synode.Node):
"""Select columns of specific type to propagate."""
author = 'Andreas Tågerud'
description = 'Select column types from input to propagate to output.'
icon = 'select_table_columns.svg'
tags = Tags(Tag.DataProcessing.Select)
related = (['org.sysess.sympathy.data.table.selecttablescolumnstype']
+ BASE_SELECT_NODEIDS)
parameters = synode.parameters()
editor = synode.editors.multilist_editor(mode=False, filter=False)
parameters.set_list(
'types', label='Select types', description='Select types',
value=[], editor=editor)
def adjust_parameters(self, node_context):
node_context.parameters['types'].adjust(dtypes.typenames())
def _execute(self, in_table, out_table, types):
types = [dtypes.dtype(name).kind for name in types]
out_table.set_name(in_table.get_name())
out_table.set_table_attributes(in_table.get_table_attributes())
for name in in_table.column_names():
col_type = in_table.column_type(name).kind
if col_type == 'u':
# Consider unsigned integers as integers.
col_type = 'i'
if col_type in types:
out_table.update_column(name, in_table)
def _get_types(self, node_context):
return node_context.parameters['types'].selected_names(
dtypes.typenames())
[docs]
class SelectColumnTypesTable(SelectColumnTypesSuper):
name = 'Select columns by type in Table'
nodeid = 'org.sysess.sympathy.data.table.selecttablecolumnstype'
inputs = Ports([Port.Table('Input')])
outputs = Ports([Port.Table('Output')])
def execute(self, node_context):
in_table = node_context.input[0]
out_table = node_context.output[0]
self._execute(in_table, out_table, self._get_types(node_context))
[docs]
@node_helper.list_node_decorator([0], [0])
class SelectColumnTypesTables(SelectColumnTypesTable):
name = 'Select columns by type in Tables'
nodeid = 'org.sysess.sympathy.data.table.selecttablescolumnstype'