# This file is part of Sympathy for Data.
# Copyright (c) 2023, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
import sys
from sympathy.api import node, ParameterView
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags, join_doc
from sympathy.api.exceptions import SyUserCodeError, SyConfigurationError
from sympathy.utils.json_query import (
parse_query, Query, QueryParsingError, visit_matching, REMOVE,
is_multi_item_query)
from sympathy.utils.parameters import set_json_query
from sympathy.utils import preview
from sylib.util import compile_function
from PySide6 import QtWidgets
JSON_SELECT_NODES = [
'org.sysess.sympathy.selectjson',
'org.sysess.sympathy.removejson',
'org.sysess.sympathy.selectkeysinjson',
]
JSON_QUERY_DOCS = """
For more information about how to write Json queries, see
:ref:`appendix_json_queries`.
If :guilabel:`Match anywhere` is not checked (the default) the query must
match starting at the root of the Json structure. E.g. the query
``articles`` only matches if the root entry is a dictionary containing the
key "articles". Conversely, if :guilabel:`Match anywhere` is checked the
query ``articles`` can matches the key "articles" in any dictionary in the
whole structure. It can even match at several places despite the query just
being a simple key.
"""
PREDICATE_DOCS = """
Using predicates
----------------
A predicate is a function that takes a Json entry as its only argument and
returns a boolean. It can be entered either as a lambda expression or as a
function definition using ``def``., e.g::
lambda entry: entry['score'] >= 100
or::
def filter_by_score(entry):
return entry['score'] >= 100
{}
If evaluating the predicate raises an exception this is treated the same as
returning False.
"""
def parse_node_query(query: str) -> Query:
"""Transform any QueryParsingError into a SyConfigurationError."""
try:
return parse_query(query)
except QueryParsingError as e:
arrow_pos = e.offset or 0
msg = "\n".join([
"Couldn't parse query:",
query,
" " * arrow_pos + "^",
" " * arrow_pos + e.msg])
raise SyConfigurationError(msg) from e
class SuperNode(node.Node):
author = 'Magnus Sandén'
inputs = Ports([Port.Json('Input', name='input')])
outputs = Ports([
Port.Custom('json', 'Output', name='output', preview=True)])
def adjust_parameters(self, node_context):
input_port = node_context.input[0]
if input_port is not None and input_port.is_valid():
node_context.parameters['query'].adjust(input_port.get())
@staticmethod
def base_parameters():
parameters = node.parameters()
set_json_query(parameters)
parameters.set_boolean(
'anywhere', value=False, label='Match anywhere',
description='Match the query at any position in the Json '
'structure.')
return parameters
def visit_matching(self, node_context, callback, *, full=False):
inport = node_context.input[0]
parameters = node_context.parameters
query = parameters['query'].value
anywhere = parameters['anywhere'].value
query = parse_node_query(query)
return visit_matching(
inport.get(), query, callback, full=full, match_anywhere=anywhere)
def safe_predicate(predicate):
def inner(item):
if predicate is None:
return True
try:
return predicate(item)
except Exception:
return False
return inner
[docs]
class SelectJson(SuperNode):
__doc__ = join_doc(JSON_QUERY_DOCS, """
If :guilabel:`Only output matching` is checked, only output the parts that
actually match the query, discarding any containing lists or dictionaries.
If the query is capable of matching multiple entries (i.e. if it includes
slices or patterns or if "Match anywhere" is checked), all the matching
entries are placed in a list.
""", PREDICATE_DOCS.format("""
When predicate is used each selected entry in the Json structure is passed
to the predicate function which can then return True for entries that
should be added to the output and False for those that should not.
"""))
name = 'Select Json'
description = (
'Filter a Json structure, keeping entries that match a query.')
nodeid = 'org.sysess.sympathy.selectjson'
icon = 'select_json.svg'
tags = Tags(Tag.DataProcessing.Select)
related = JSON_SELECT_NODES
parameters = SuperNode.base_parameters()
parameters.set_boolean(
'only_matching', value=False, label='Only output matching',
description='When checked, only output the parts that actually '
'match the query, discaring any parent containers. '
'If the query is capable of matching multiple entries '
'(i.e. if it includes slices or patterns or if "Match '
'anywhere" is checked), all the matching entries are '
'placed in a list.')
parameters.set_boolean(
'use_predicate', value=False, label='Filter by predicate',
description='When checked, each entry in the data selected by '
'the query will be passed to the predicate function '
'and will only be included in the output if the '
'predicate function returns True.')
parameters.set_string(
'predicate', value="lambda entry: True", label='Predicate',
description='The predicate can be entered as a python function '
'(def or lambda) which takes an entry in the Json '
'structure as its only argument and returns either '
'True if that entry should be included, or False if it '
'shouldn\'t',
editor=node.editors.code_editor(language='python', single_line=True))
controllers = node.controller(
when=node.field('use_predicate', 'checked'),
action=node.field('predicate', 'enabled'))
def execute(self, node_context):
outport = node_context.output[0]
parameters = node_context.parameters
query = parameters['query'].value
anywhere = parameters['anywhere'].value
only_matching = parameters['only_matching'].value
predicate = None
if parameters['use_predicate'].value:
try:
predicate = compile_function(parameters['predicate'].value)
except Exception as exc:
raise SyUserCodeError(sys.exc_info()) from exc
predicate = safe_predicate(predicate)
if only_matching:
res = []
def append_if(item):
if predicate(item):
res.append(item)
self.visit_matching(node_context, append_if)
if is_multi_item_query(query, anywhere):
# Multi item query
outport.set(res)
elif res:
# Single item query with a result
outport.set(res[0])
else:
# Single item query with no result
outport.set(None)
else:
def select_all(item):
if predicate(item):
return item
else:
return REMOVE
outport.set(self.visit_matching(node_context, select_all))
[docs]
class RemoveJson(SuperNode):
__doc__ = join_doc(JSON_QUERY_DOCS, PREDICATE_DOCS.format("""
When predicate is used each selected entry in the Json structure is passed
to the predicate function which can then return True for entries that
should be removed from the output and False for those that should be kept.
"""))
name = 'Remove Json'
description = (
'Filter a Json structure, removing entries that match a query.')
nodeid = 'org.sysess.sympathy.removejson'
icon = 'select_json.svg'
tags = Tags(Tag.DataProcessing.Select)
related = JSON_SELECT_NODES
parameters = SuperNode.base_parameters()
parameters.set_boolean(
'use_predicate', value=False, label='Filter by predicate',
description='When checked, each entry in the data selected by '
'the query will be passed to the predicate function '
'and will only be removed if the predicate function '
'returns True.')
parameters.set_string(
'predicate', value="lambda entry: True", label='Predicate',
description='The predicate can be entered as a python function '
'(def or lambda) which takes an entry in the Json '
'structure as its only argument and returns either '
'True if that entry should be removed, or False if '
'it shouldn\'t',
editor=node.editors.code_editor(language='python', single_line=True))
controllers = node.controller(
when=node.field('use_predicate', 'checked'),
action=node.field('predicate', 'enabled'))
def execute(self, node_context):
outport = node_context.output[0]
parameters = node_context.parameters
predicate = None
if parameters['use_predicate'].value:
try:
predicate = compile_function(parameters['predicate'].value)
except Exception as exc:
raise SyUserCodeError(sys.exc_info()) from exc
predicate = safe_predicate(predicate)
def remove(item):
if predicate(item):
return REMOVE
else:
return item
outport.set(self.visit_matching(node_context, remove, full=True))
[docs]
class SelectKeysInJson(SuperNode):
__doc__ = join_doc(JSON_QUERY_DOCS, """
If the query matches anythin other than a dictionary, that entry is kept
as is in the output.
Modes
-----
By using different modes for the :guilabel:`Keys` parameter you can achieve
different behaviors. With :guilabel:`Use selected` only selected keys are
kept in the output. With :guilabel:`Use and require selected` the same keys
are kept in the output, but it becomes an error if any selected key is
missing from any of the dictionaries. With :guilabel:`Use unselected` only
keys that are *not* selected are kept in the output. Finally, with
:guilabel:`Use all` all the keys are kept in the output.
""")
name = 'Select keys in Json'
description = (
'Filter a Json structure, keeping/removing keys in selected '
'dictionaries.')
nodeid = 'org.sysess.sympathy.selectkeysinjson'
icon = 'select_json.svg'
tags = Tags(Tag.DataProcessing.Select)
related = JSON_SELECT_NODES
parameters = SuperNode.base_parameters()
parameters.set_list(
'keys', label='Select keys',
description='Select keys to keep in the selected dictionaries.',
editor=node.editors.multilist_editor(edit=True))
def execute(self, node_context):
outport = node_context.output[0]
keys_param = node_context.parameters['keys']
def filter_keys(item):
if not isinstance(item, dict):
return item
keys = keys_param.selected_names(item.keys())
return {key: item[key] for key in keys}
outport.set(self.visit_matching(node_context, filter_keys))
def exec_parameter_view(self, node_context):
indata = None
input_port = node_context.input[0]
if input_port is not None and input_port.is_valid():
indata = input_port.get()
params_widget = SelectKeysInJsonGui(node_context.parameters, indata)
preview_widget = preview.PreviewWidget(
self, node_context, node_context.parameters)
widget = preview.ParameterPreviewWidget(
params_widget, preview_widget)
return widget
class SelectKeysInJsonGui(ParameterView):
def __init__(self, parameters, indata):
super().__init__()
self._indata = indata
self._parameters = parameters
query_gui = parameters['query'].gui()
anywhere_gui = parameters['anywhere'].gui()
keys_gui = parameters['keys'].gui()
self._keys_editor = keys_gui.editor()
layout = QtWidgets.QVBoxLayout()
layout.addWidget(query_gui)
layout.addWidget(anywhere_gui)
layout.addWidget(keys_gui)
self.setLayout(layout)
self.update_keys(parameters['query'].value)
query_gui.valueChanged.connect(self.update_keys)
anywhere_gui.valueChanged.connect(self.update_keys)
def update_keys(self, *_):
query = self._parameters['query'].value
anywhere = self._parameters['anywhere'].value
keys = set()
def add_keys(item):
if isinstance(item, dict):
keys.update(item.keys())
try:
visit_matching(self._indata, query, add_keys,
match_anywhere=anywhere)
except Exception:
pass
self._keys_editor._set_items(keys)
[docs]
class JsonCalculator(SuperNode):
__doc__ = join_doc(JSON_QUERY_DOCS, """
The calculation
---------------
The supplied python function can be entered either as a lambda expression
or as a function definition using ``def``. It is called once for each
matching entry in the Json structure. The matching entry is passed to the
function and is replaced in the output by whatever is returned by the
function.
""")
name = 'Json Calculator'
description = (
'Apply a user-defined function on Json entries matching a query.')
nodeid = 'org.sysess.sympathy.jsoncalculator'
icon = 'json_calculator.svg'
tags = Tags(Tag.DataProcessing.Calculate)
related = ['org.sysess.sympathy.create.createjson']
parameters = SuperNode.base_parameters()
parameters.set_string(
'calculation', value='lambda entry: entry', label='Calculation',
description='Calculation function (either a lambda or def) which '
'takes a Json entry as only argument and returns a '
'new Json entry.',
editor=node.editors.code_editor(language='python'))
def execute(self, node_context):
outport = node_context.output[0]
calculation = compile_function(
node_context.parameters['calculation'].value)
outport.set(self.visit_matching(node_context, calculation, full=True))