Source code for node_json2table

# This file is part of Sympathy for Data.
# Copyright (c) 2018, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
from sympathy.api import table
from sympathy.api import node as synode
from sympathy.api import node_helper
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags, deprecated_node
from sympathy.api.exceptions import SyDataError

from sylib.json_table import (
    JsonTableWidget, JsonTable, add_jsontable_parameters, TABLE_KIND,
    json_to_table, FLATTEN, EMBED, SKIP, ERROR)


@deprecated_node('6.0.0', 'New Json to Table node')
class JsonToTable(synode.Node):
    """
    Convert a JSON file to a Table

    There are two kinds of tables that can be created:

        * Single row – where the JSON structure is simply flattened
        * Multiple rows - where the JSON structure is recursively expanded to
          create several rows

    If a single-row table is created, there is an option to minimize the
    column names to remove unnecessary path information from the JSON keys.

    For example from the JSON:

    .. code-block:: python

        {
            "version":"1.0",
            "software":"sfd",
            "items" : {
                "a":"1",
                "b":"2",
                 "c":"3"
            }
        }

    we can create the following single-row table

    .. code-block:: python

        version    software    items.a    items.b    items.c
        ----------------------------------------------------
        1.0        sfd         1          2          3

    and the column names can be *minimized* to

     .. code-block:: python

        version    software    a    b    c
        -------------------------------------
        1.0        sfd         1    2    3


    If a multiple rows-table is created, the recursive algorithm might identify
    keys and therefore columns that are lacking some values. One can choose to
    fill in the missing values with a **empty string**, a **nan** string or
    **mask** the value.

    For example from the JSON:

    .. code-block:: python

        {
            "version":"1.0",
            "software":"sfd",
            "items" : [
                {
                    "a":"1",
                    "b":"2",
                    "c":"3"
                },
                {
                    "a":"67",
                    "b":"77",
                    "d":"97"
                }
            ]
        }

    we can create the following multiple-rows table

    .. code-block:: python

        version    software    a    b    c    d
        -------------------------------------------
        1.0        sfd         1    2    3    ---
        1.0        sfd         67   77   ---  97

    where the ``c`` column is masked in the second row and the ``d``
    column is masked in the first row.

    If the algorithm that creates tnhe multi-row table fails to produce
    the desired table, it might be worth using other nodes to remove,
    select or split the JSON structure on some key.
    """
    name = 'JSON to Table (deprecated)'
    author = 'Samuel Genheden'
    version = '0.1'
    icon = 'json2table.svg'
    tags = Tags(Tag.DataProcessing.Convert)
    nodeid = 'org.sysess.sympathy.data.json.jsontotable'

    inputs = Ports([Port.Json('Input JSON object', name='input')])
    outputs = Ports([Port.Table('Output table', name='output')])
    parameters = synode.parameters()
    add_jsontable_parameters(parameters)

    def exec_parameter_view(self, node_context):
        return JsonTableWidget(node_context.parameters)

    def execute(self, node_context):
        table_kind = node_context.parameters['table_kind'].value
        min_col_names = node_context.parameters['minimize_col_names'].value
        nomask = node_context.parameters['nomask'].value

        tbl = JsonTable(node_context.input[0])
        if not tbl.is_group():
            value = node_context.input[0].get()
            raise SyDataError(
                f'Inputs must be list or dict, it is '
                f'{value} ({type(value).__name__}).')
        if table_kind == TABLE_KIND["SINGLE"]:
            node_context.output[0].source(
                tbl.create_single_row_table(min_col_names))
        else:
            node_context.output[0].source(
                tbl.create_multiple_rows_table(nomask))


@node_helper.list_node_decorator(['input'], ['output'])
class JsonsToTables(JsonToTable):
    name = "JSONs to Tables (deprecated)"
    nodeid = "org.sysess.sympathy.data.json.jsonstotables"


STRATEGIES = {
    FLATTEN: 'Flatten',
    EMBED: 'Embed json',
    SKIP: 'Skip',
    ERROR: 'Error',
 }


[docs]class JsonToTableNew(synode.Node): """ This node requires the input json to be a list of dicts. Each dict becomes one row in the output table with the keys as column names. For example: .. code-block:: json [{"a": 1, "b": 2}, {"a": 3, "b": 4}] = = a b = = 1 2 3 4 = = As a convenience a single dict is also accepted and will always produce a table with a single row. .. code-block:: json {"a": 1, "b": 2} = = a b = = 1 2 = = Column types ============ If a column would contain mixed data types, the values are converted to the first type of string, float, integer, and bool that is also present in the column. Missing values ============== If a row is missing a key that exist for some other rows, those missing values will be masked in the output Table. The same goes for None values in the input. If all values in a column would be masked (i.e. there were only None values for that key), then that column is skipped and not included in the output at all. Lists and dicts inside the row data =================================== All scalar json types (int, float, bool, string, and None) can be put in a table directly, but if the row data contains any lists or dictionaries, then those need to be handled somehow. This node offers a few different strategies for how to deal with such lists and dicts. If the input data contains no such lists or dicts, then this option has no effect. **Flatten** The default strategy is "Flatten" which looks into any lists and dicts in the row data and tries to "flatten" it into more columns. .. code-block:: json [{"userid": 1, "name": {"first": "Alice", "last": "Alison"}, "hobbies": ["Painting", "Stamp collecting"]}, {"userid": 2, "name": {"first": "Bob", "last": "McBobbin"}, "hobbies": ["Crocheting", "Yoga"]}] ====== ========== ========= ========== ================ userid name.first name.last hobbies[0] hobbies[1] ====== ========== ========= ========== ================ 1 Alice Alison Painting Stamp collecting 2 Bob McBobbin Crocheting Yoga ====== ========== ========= ========== ================ If two or more different paths in the json data would produce columns with the same name, the node gives a warning and skips that column entirely. In the following example there are name conflicts for "name.last" and "hobbies[1]": .. code-block:: json [{"userid": 1, "name": {"first": "Alice", "last": "Alison"}, "hobbies": ["Painting", "Stamp collecting"]}, {"userid": 2, "name": {"first" "Bob"}, "name.last": "McBobbin", "hobbies": ["Crocheting"], "hobbies[1]": "Yoga"}] ====== ========== ========== userid name.first hobbies[0] ====== ========== ========== 1 Alice Painting 2 Bob Crocheting ====== ========== ========== **Embed** Creates a json-encoded string for any list or dict in the row data. .. code-block:: json [{"userid": 1, "name": {"first": "Alice", "last": "Alison"}, "hobbies": ["Painting", "Stamp collecting"]}, {"userid": 2, "name": {"first": "Bob", "last": "McBobbin"}, "hobbies": ["Crocheting", "Yoga"]}] +--------+-----------------------+-----------------------+ | userid | name | hobbies | +========+=======================+=======================+ | 1 | | {"first": "Alice", | | ["Painting", | | | | "last": "Alison"} | | "Stamp collecting"] | +--------+-----------------------+-----------------------+ | 2 | | {"first": "Bob", | | ["Crocheting", | | | | "last": "McBobbin"} | | "Yoga"] | +--------+-----------------------+-----------------------+ **Skip** Skips any list or dict values in the row data. **Error** Gives an error if any row in the input contains a list or a dict. """ name = 'Json to Table' description = "Convert a list of similar Json dicts into rows in a Table." author = 'Magnus Sandén' icon = 'json_to_table.svg' tags = Tags(Tag.DataProcessing.Convert) nodeid = 'org.sysess.sympathy.data.json.jsontotablenew' related = [ 'org.sysess.sympathy.selectjson', 'org.sysess.sympathy.removejson', 'org.sysess.sympathy.selectkeysinjson', ] parameters = synode.parameters() parameters.set_string( 'list_dict_strategy', label='Strategy for dealing with lists and dicts', value=FLATTEN, description=( 'If there are lists or dicts inside the data for a row, those ' 'need to be handled somehow. See documentation for more ' 'information about each strategy.'), editor=synode.editors.combo_editor(options=STRATEGIES)) inputs = Ports([Port.Json('Input Json object', name='input')]) outputs = Ports([Port.Table('Output table', name='output')]) def execute(self, node_context): list_dict_strategy = ( node_context.parameters['list_dict_strategy'].value) indata = node_context.input['input'].get() keys, rows = json_to_table(indata, list_dict_strategy) output = table.File.from_rows(keys, rows) node_context.output['output'].source(output)