# This file is part of Sympathy for Data.
# Copyright (c) 2018 Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import numpy as np
from sympathy.api import node as synode
from sympathy.api import dtypes
from sympathy.api.nodeconfig import (Port, Ports, Tag, Tags,
adjust)
from sympathy.api import exceptions as syexc
from sympathy.api import masked
class ConvertError(Exception):
pass
MASKED_NODEIDS = [
'org.sysess.sympathy.table.fillmaskedvalues',
'org.sysess.sympathy.table.maskvalues',
'org.sysess.sympathy.table.dropmaskvalues',
]
_config_error_msg = (
'Failure in column {column}: could not convert {name}: '
'"{value}" to {type}.')
def _convert(value, dtype, name):
try:
return dtypes.numpy_value_from_dtype_str(dtype, value)
except ValueError as exc:
raise ConvertError(
dict(
name=name,
value=value,
type=dtypes.typename_from_kind(dtype.kind),
)
) from exc
def selected_columns_op(input_table, output_table, columns, set_progress,
update=True):
if update:
output_table.set_name(input_table.get_name())
output_table.set_table_attributes(input_table.get_table_attributes())
column_names = input_table.column_names()
selected_names = set(columns.selected_names(column_names))
n_column_names = len(column_names)
for i, name in enumerate(column_names):
set_progress(i * (100. / n_column_names))
if name in selected_names:
yield name
elif update:
output_table.update_column(name, input_table, name)
[docs]
class FillMaskedTable(synode.Node):
"""
Fill masked values in Table.
"""
author = 'Erik der Hagopian'
description = 'Fill masked values in Table.'
icon = 'select_table_columns.svg'
name = 'Fill masked values in Table'
nodeid = 'org.sysess.sympathy.table.fillmaskedvalues'
tags = Tags(Tag.DataProcessing.Select)
related = (MASKED_NODEIDS
+ ['org.sysess.sympathy.data.table.holdvaluetable'])
inputs = Ports([Port.Table('Input')])
outputs = Ports([Port.Table('Output')])
parameters = synode.parameters()
parameters.set_list(
'columns', label='Select columns', description='Select columns.',
value=[], editor=synode.editors.multilist_editor(edit=True))
parameters['columns']._passthrough = True
parameters.set_string(
'value', label='Value', description='Specify a fill value.',
value='')
def adjust_parameters(self, node_context):
adjust(node_context.parameters['columns'], node_context.input[0])
def execute(self, node_context):
in_table = node_context.input[0]
out_table = node_context.output[0]
self.fill_columns(
in_table, out_table, node_context.parameters['columns'],
self.set_progress, node_context.parameters['value'])
@staticmethod
def fill_columns(input_table, output_table, columns, set_progress, fill):
def fill_conv(column):
column_dtype = column.dtype
base_dtype = dtypes.numpy_dtype_factory_for_dtype(
column_dtype)
value = _convert(fill.value, base_dtype, 'Value')
value_dtype = value.dtype
new_dtype = dtypes.numpy_dtype_factory_for_dtypes(
column_dtype, value_dtype)
if new_dtype != column_dtype:
column = masked.astype(column, new_dtype)
return column.filled(value)
for name in selected_columns_op(input_table, output_table, columns,
set_progress):
array = input_table.get_column_to_array(name)
if isinstance(array, np.ma.MaskedArray):
try:
output_table.set_column_from_array(
name, fill_conv(array))
output_table.set_column_attributes(
name, input_table.get_column_attributes(name))
except ConvertError as ce:
raise syexc.SyConfigurationError(
_config_error_msg.format(column=name, **ce.args[0])
) from ce
else:
output_table.update_column(name, input_table, name)
[docs]
class MaskTable(synode.Node):
"""
Mask values in Table.
"""
author = 'Erik der Hagopian'
description = 'Mask values in Table.'
icon = 'select_table_columns.svg'
name = 'Mask values in Table'
nodeid = 'org.sysess.sympathy.table.maskvalues'
tags = Tags(Tag.DataProcessing.Select)
related = MASKED_NODEIDS
inputs = Ports([Port.Table('Input')])
outputs = Ports([Port.Table('Output')])
parameters = synode.parameters()
parameters.set_list(
'columns', label='Select columns', description='Select columns.',
value=[], editor=synode.editors.multilist_editor(edit=True))
parameters['columns']._passthrough = True
parameters.set_string(
'value', label='Value', description='Specify a value to mask.',
value='')
def adjust_parameters(self, node_context):
adjust(node_context.parameters['columns'], node_context.input[0])
def execute(self, node_context):
in_table = node_context.input[0]
out_table = node_context.output[0]
self.mask_columns(
in_table, out_table, node_context.parameters['columns'],
self.set_progress, node_context.parameters['value'])
@staticmethod
def mask_columns(input_table, output_table, columns, set_progress, fill):
def mask_conv(column):
dtype = dtypes.numpy_dtype_factory_for_dtype(
column.dtype)
value = _convert(fill.value, dtype, 'Value')
if dtype.kind == 'f' and np.isnan(value):
mask = np.isnan(column)
elif dtype.kind in ['m', 'M'] and np.isnat(value):
mask = np.isnat(column)
else:
mask = column == value
if isinstance(column, np.ma.MaskedArray):
mask |= column.mask
res = np.ma.MaskedArray(column.data, mask, dtype=dtype)
else:
res = np.ma.MaskedArray(column, mask, dtype=dtype)
return res
for name in selected_columns_op(input_table, output_table, columns,
set_progress):
try:
output_table.set_column_from_array(
name, mask_conv(input_table.get_column_to_array(name)))
output_table.set_column_attributes(
name, input_table.get_column_attributes(name))
except ConvertError as ce:
raise syexc.SyConfigurationError(
_config_error_msg.format(column=name, **ce.args[0])
) from ce
[docs]
class DropMaskTable(synode.Node):
author = 'Erik der Hagopian'
description = 'Drop either rows or columns with any masked values.'
icon = 'select_table_columns.svg'
name = 'Drop masked values in Table'
nodeid = 'org.sysess.sympathy.table.dropmaskvalues'
tags = Tags(Tag.DataProcessing.Select)
related = MASKED_NODEIDS + ['org.sysess.sympathy.data.table.dropnantable']
inputs = Ports([Port.Table('Input')])
outputs = Ports([Port.Table('Output')])
parameters = synode.parameters()
parameters.set_list(
'columns', label='Select columns', description='Select columns.',
value=[], editor=synode.editors.multilist_editor(edit=True))
parameters['columns']._passthrough = True
directions = ['Rows', 'Columns']
parameters.set_string(
'direction', label='Drop',
value=directions[0],
description='Select along which axis to drop values',
editor=synode.editors.combo_editor(options=directions))
def adjust_parameters(self, node_context):
adjust(node_context.parameters['columns'], node_context.input[0])
def execute(self, node_context):
in_table = node_context.input[0]
out_table = node_context.output[0]
self.drop_columns(
in_table, out_table, node_context.parameters['columns'],
self.set_progress, node_context.parameters['direction'])
@staticmethod
def drop_columns(input_table, output_table, columns, set_progress,
direction):
if direction.value == 'Columns':
for name in selected_columns_op(input_table, output_table, columns,
set_progress):
array = input_table.get_column_to_array(name)
if isinstance(array, np.ma.MaskedArray):
if not np.any(array.mask):
output_table.set_column_from_array(
name, array.data)
output_table.set_column_attributes(
name, input_table.get_column_attributes(name))
else:
output_table.update_column(name, input_table, name)
elif direction.value == 'Rows':
mask = np.zeros(input_table.number_of_rows(), dtype=bool)
for name in selected_columns_op(input_table, output_table, columns,
set_progress, update=False):
array = input_table.get_column_to_array(name)
if isinstance(array, np.ma.MaskedArray):
mask |= array.mask
if not np.any(mask):
output_table.update(input_table)
else:
for name in input_table.column_names():
array = input_table.get_column_to_array(name)
array = array[~mask]
if isinstance(array, np.ma.MaskedArray):
if not np.any(array.mask):
array = array.data
output_table.set_column_from_array(
name, array)
output_table.set_column_attributes(
name, input_table.get_column_attributes(name))