Source code for node_select_columns_in_table_dataset
# This file is part of Sympathy for Data.
# Copyright (c) 2021 Combine Control Systems
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
"""
There are many situations where you may want to throw away some of the columns
of a table. Perhaps the amount of data is large and you want to trim it to
increase performance, or perhaps some column was just needed as an intermediary
step in some analysis.
"""
from sympathy.api import node as synode
from sympathy.api.nodeconfig import Ports, Tag, Tags, adjust
from sylib_aml.dataset import DatasetPort
BASE_SELECT_NODEIDS = [
"org.sysess.sympathy.data.table.selecttablecolumns",
"org.sysess.sympathy.data.table.selecttablecolumnstype",
"org.sysess.sympathy.data.table.selecttablecolumnsregex",
"org.sysess.sympathy.data.table.selecttablecolumnsfromtable",
]
[docs]
class SelectColumnsTableDataset(synode.Node):
"""
Select columns to propagate.
"""
name = "Select columns in Table Dataset (Experimental)"
nodeid = (
"com.sympathyfordata.advancedmachinelearning."
"selectcolumnsintabledataset")
author = "Jannes Germishuys"
icon = "convert_column_types_table_ds.svg"
description = "Select columns from input to propagate to output."
tags = Tags(Tag.MachineLearning.Processing)
inputs = Ports([DatasetPort("Dataset", "dataset")])
outputs = Ports([DatasetPort("Dataset", "dataset")])
parameters = synode.parameters()
editor = synode.editors.multilist_editor(edit=True, mode=True)
parameters.set_list(
"columns",
label="Select columns",
description="Select columns.",
value=[],
editor=editor,
)
def adjust_parameters(self, node_context):
adjust(node_context.parameters["columns"], node_context.input[0])
def update_parameters(self, old_params):
cols = old_params["columns"]
if not cols.editor.get("mode", False):
try:
complement = old_params["complement"].value
del old_params["complement"]
except KeyError:
complement = False
if complement:
cols.multiselect_mode = "unselected"
else:
cols.multiselect_mode = "selected"
def execute(self, node_context):
input_ds = node_context.input["dataset"]
input_ds.load()
input_ds = input_ds.get_ds()
all_cols = list(input_ds["column_config"].keys())
if node_context.parameters["columns"].multiselect_mode == "selected":
keep_cols = node_context.parameters["columns"].value_names
elif (node_context.parameters["columns"].multiselect_mode ==
"unselected"):
keep_cols = list(
set(all_cols) - set(
node_context.parameters["columns"].value_names)
)
else:
raise ValueError(
"This method of selection is not currently supported for "
"datasets. Please try selected or unselected.")
for col in list(set(all_cols) - set(keep_cols)):
del input_ds["column_config"][col]
json = node_context.output["dataset"]
json.set_ds(input_ds)
json.save()