Source code for node_filterjson

# This file is part of Sympathy for Data.
# Copyright (c) 2023, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import sys

from sympathy.api import node, ParameterView
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags, join_doc
from sympathy.api.exceptions import SyUserCodeError, SyConfigurationError
from sympathy.utils.json_query import (
    parse_query, Query, QueryParsingError, visit_matching, REMOVE,
    is_multi_item_query)
from sympathy.utils.parameters import set_json_query
from sympathy.utils import preview
from sylib.util import compile_function

from PySide6 import QtWidgets


JSON_SELECT_NODES = [
    'org.sysess.sympathy.selectjson',
    'org.sysess.sympathy.removejson',
    'org.sysess.sympathy.selectkeysinjson',
]


JSON_QUERY_DOCS = """
For more information about how to write Json queries, see
:ref:`appendix_json_queries`.

If :guilabel:`Match anywhere` is not checked (the default) the query must
match starting at the root of the Json structure. E.g. the query
``articles`` only matches if the root entry is a dictionary containing the
key "articles". Conversely, if :guilabel:`Match anywhere` is checked the
query ``articles`` can matches the key "articles" in any dictionary in the
whole structure. It can even match at several places despite the query just
being a simple key.
"""


PREDICATE_DOCS = """
Using predicates
----------------
A predicate is a function that takes a Json entry as its only argument and
returns a boolean. It can be entered either as a lambda expression or as a
function definition using ``def``., e.g::

    lambda entry: entry['score'] >= 100

or::

    def filter_by_score(entry):
        return entry['score'] >= 100

{}

If evaluating the predicate raises an exception this is treated the same as
returning False.
"""


def parse_node_query(query: str) -> Query:
    """Transform any QueryParsingError into a SyConfigurationError."""
    try:
        return parse_query(query)
    except QueryParsingError as e:
        arrow_pos = e.offset or 0

        msg = "\n".join([
            "Couldn't parse query:",
            query,
            " " * arrow_pos + "^",
            " " * arrow_pos + e.msg])
        raise SyConfigurationError(msg)


class SuperNode(node.Node):
    author = 'Magnus Sandén'

    inputs = Ports([Port.Json('Input', name='input')])
    outputs = Ports([
        Port.Custom('json', 'Output', name='output', preview=True)])

    def adjust_parameters(self, node_context):
        input_port = node_context.input[0]
        if input_port is not None and input_port.is_valid():
            node_context.parameters['query'].adjust(input_port.get())

    @staticmethod
    def base_parameters():
        parameters = node.parameters()
        set_json_query(parameters)
        parameters.set_boolean(
            'anywhere', value=False, label='Match anywhere',
            description='Match the query at any position in the Json '
                        'structure.')
        return parameters

    def visit_matching(self, node_context, callback, *, full=False):
        inport = node_context.input[0]
        parameters = node_context.parameters
        query = parameters['query'].value
        anywhere = parameters['anywhere'].value
        query = parse_node_query(query)

        return visit_matching(
            inport.get(), query, callback, full=full, match_anywhere=anywhere)


def safe_predicate(predicate):
    def inner(item):
        if predicate is None:
            return True
        try:
            return predicate(item)
        except Exception:
            return False
    return inner


[docs]class SelectJson(SuperNode): __doc__ = join_doc(JSON_QUERY_DOCS, """ If :guilabel:`Only output matching` is checked, only output the parts that actually match the query, discarding any containing lists or dictionaries. If the query is capable of matching multiple entries (i.e. if it includes slices or patterns or if "Match anywhere" is checked), all the matching entries are placed in a list. """, PREDICATE_DOCS.format(""" When predicate is used each selected entry in the Json structure is passed to the predicate function which can then return True for entries that should be added to the output and False for those that should not. """)) name = 'Select Json' description = ( 'Filter a Json structure, keeping entries that match a query.') nodeid = 'org.sysess.sympathy.selectjson' icon = 'select_json.svg' tags = Tags(Tag.DataProcessing.Select) related = JSON_SELECT_NODES parameters = SuperNode.base_parameters() parameters.set_boolean( 'only_matching', value=False, label='Only output matching', description='When checked, only output the parts that actually ' 'match the query, discaring any parent containers. ' 'If the query is capable of matching multiple entries ' '(i.e. if it includes slices or patterns or if "Match ' 'anywhere" is checked), all the matching entries are ' 'placed in a list.') parameters.set_boolean( 'use_predicate', value=False, label='Filter by predicate', description='When checked, each entry in the data selected by ' 'the query will be passed to the predicate function ' 'and will only be included in the output if the ' 'predicate function returns True.') parameters.set_string( 'predicate', value="lambda entry: True", label='Predicate', description='The predicate can be entered as a python function ' '(def or lambda) which takes an entry in the Json ' 'structure as its only argument and returns either ' 'True if that entry should be included, or False if it ' 'shouldn\'t', editor=node.editors.code_editor(language='python', single_line=True)) controllers = node.controller( when=node.field('use_predicate', 'checked'), action=node.field('predicate', 'enabled')) def execute(self, node_context): outport = node_context.output[0] parameters = node_context.parameters query = parameters['query'].value anywhere = parameters['anywhere'].value only_matching = parameters['only_matching'].value predicate = None if parameters['use_predicate'].value: try: predicate = compile_function(parameters['predicate'].value) except Exception: raise SyUserCodeError(sys.exc_info()) predicate = safe_predicate(predicate) if only_matching: res = [] def append_if(item): if predicate(item): res.append(item) self.visit_matching(node_context, append_if) if is_multi_item_query(query, anywhere): # Multi item query outport.set(res) elif res: # Single item query with a result outport.set(res[0]) else: # Single item query with no result outport.set(None) else: def select_all(item): if predicate(item): return item else: return REMOVE outport.set(self.visit_matching(node_context, select_all))
[docs]class RemoveJson(SuperNode): __doc__ = join_doc(JSON_QUERY_DOCS, PREDICATE_DOCS.format(""" When predicate is used each selected entry in the Json structure is passed to the predicate function which can then return True for entries that should be removed from the output and False for those that should be kept. """)) name = 'Remove Json' description = ( 'Filter a Json structure, removing entries that match a query.') nodeid = 'org.sysess.sympathy.removejson' icon = 'select_json.svg' tags = Tags(Tag.DataProcessing.Select) related = JSON_SELECT_NODES parameters = SuperNode.base_parameters() parameters.set_boolean( 'use_predicate', value=False, label='Filter by predicate', description='When checked, each entry in the data selected by ' 'the query will be passed to the predicate function ' 'and will only be removed if the predicate function ' 'returns True.') parameters.set_string( 'predicate', value="lambda entry: True", label='Predicate', description='The predicate can be entered as a python function ' '(def or lambda) which takes an entry in the Json ' 'structure as its only argument and returns either ' 'True if that entry should be removed, or False if ' 'it shouldn\'t', editor=node.editors.code_editor(language='python', single_line=True)) controllers = node.controller( when=node.field('use_predicate', 'checked'), action=node.field('predicate', 'enabled')) def execute(self, node_context): outport = node_context.output[0] parameters = node_context.parameters predicate = None if parameters['use_predicate'].value: try: predicate = compile_function(parameters['predicate'].value) except Exception: raise SyUserCodeError(sys.exc_info()) predicate = safe_predicate(predicate) def remove(item): if predicate(item): return REMOVE else: return item outport.set(self.visit_matching(node_context, remove, full=True))
[docs]class SelectKeysInJson(SuperNode): __doc__ = join_doc(JSON_QUERY_DOCS, """ If the query matches anythin other than a dictionary, that entry is kept as is in the output. Modes ----- By using different modes for the :guilabel:`Keys` parameter you can achieve different behaviors. With :guilabel:`Use selected` only selected keys are kept in the output. With :guilabel:`Use and require selected` the same keys are kept in the output, but it becomes an error if any selected key is missing from any of the dictionaries. With :guilabel:`Use unselected` only keys that are *not* selected are kept in the output. Finally, with :guilabel:`Use all` all the keys are kept in the output. """) name = 'Select keys in Json' description = ( 'Filter a Json structure, keeping/removing keys in selected ' 'dictionaries.') nodeid = 'org.sysess.sympathy.selectkeysinjson' icon = 'select_json.svg' tags = Tags(Tag.DataProcessing.Select) related = JSON_SELECT_NODES parameters = SuperNode.base_parameters() parameters.set_list( 'keys', label='Select keys', description='Select keys to keep in the selected dictionaries.', editor=node.editors.multilist_editor(edit=True)) def execute(self, node_context): outport = node_context.output[0] keys_param = node_context.parameters['keys'] def filter_keys(item): if not isinstance(item, dict): return item keys = keys_param.selected_names(item.keys()) return {key: item[key] for key in keys} outport.set(self.visit_matching(node_context, filter_keys)) def exec_parameter_view(self, node_context): indata = None input_port = node_context.input[0] if input_port is not None and input_port.is_valid(): indata = input_port.get() params_widget = SelectKeysInJsonGui(node_context.parameters, indata) preview_widget = preview.PreviewWidget( self, node_context, node_context.parameters) widget = preview.ParameterPreviewWidget( params_widget, preview_widget) return widget
class SelectKeysInJsonGui(ParameterView): def __init__(self, parameters, indata): super().__init__() self._indata = indata self._parameters = parameters query_gui = parameters['query'].gui() anywhere_gui = parameters['anywhere'].gui() keys_gui = parameters['keys'].gui() self._keys_editor = keys_gui.editor() layout = QtWidgets.QVBoxLayout() layout.addWidget(query_gui) layout.addWidget(anywhere_gui) layout.addWidget(keys_gui) self.setLayout(layout) self.update_keys(parameters['query'].value) query_gui.valueChanged.connect(self.update_keys) anywhere_gui.valueChanged.connect(self.update_keys) def update_keys(self, *_): query = self._parameters['query'].value anywhere = self._parameters['anywhere'].value keys = set() def add_keys(item): if isinstance(item, dict): keys.update(item.keys()) try: visit_matching(self._indata, query, add_keys, match_anywhere=anywhere) except Exception: pass self._keys_editor._set_items(keys)
[docs]class JsonCalculator(SuperNode): __doc__ = join_doc(JSON_QUERY_DOCS, """ The calculation --------------- The supplied python function can be entered either as a lambda expression or as a function definition using ``def``. It is called once for each matching entry in the Json structure. The matching entry is passed to the function and is replaced in the output by whatever is returned by the function. """) name = 'Json Calculator' description = ( 'Apply a user-defined function on Json entries matching a query.') nodeid = 'org.sysess.sympathy.jsoncalculator' icon = 'json_calculator.svg' tags = Tags(Tag.DataProcessing.Calculate) related = ['org.sysess.sympathy.create.createjson'] parameters = SuperNode.base_parameters() parameters.set_string( 'calculation', value='lambda entry: entry', label='Calculation', description='Calculation function (either a lambda or def) which ' 'takes a Json entry as only argument and returns a ' 'new Json entry.', editor=node.editors.code_editor(language='python')) def execute(self, node_context): outport = node_context.output[0] calculation = compile_function( node_context.parameters['calculation'].value) outport.set(self.visit_matching(node_context, calculation, full=True))