# This file is part of Sympathy for Data.
# Copyright (c) 2018, Combine Control Systems AB
#
# SYMPATHY FOR DATA COMMERCIAL LICENSE
# You should have received a link to the License with Sympathy for Data.
from sympathy.api import table
from sympathy.api import node as synode
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags
from sylib.json_table import (
json_to_table, FLATTEN, EMBED, SKIP, ERROR)
STRATEGIES = {
FLATTEN: 'Flatten',
EMBED: 'Embed json',
SKIP: 'Skip',
ERROR: 'Error',
}
[docs]
class JsonToTableNew(synode.Node):
"""
This node accepts a list of dicts as the input json.
Each dict becomes one row in the output table with the keys as column names.
For example:
.. code-block:: json
[{"a": 1, "b": 2},
{"a": 3, "b": 4}]
= =
a b
= =
1 2
3 4
= =
As a convenience a list of lists is also accepted and will produce a
table with index-based column names. For example:
.. code-block:: json
[["a", "b"],
["c", "d"]]
= =
0 1
= =
a b
c d
= =
Finally a single dict is also accepted and will always produce a
table with a single row.
.. code-block:: json
{"a": 1, "b": 2}
= =
a b
= =
1 2
= =
Column types
============
If a column would contain mixed data types, the values are converted to the
first type of string, float, integer, and bool that is also present in the
column.
Missing values
==============
If a row is missing a key that exist for some other rows, those missing
values will be masked in the output Table. The same goes for None values in
the input. If all values in a column would be masked (i.e. there were only
None values for that key), then that column is skipped and not included in
the output at all.
Lists and dicts inside the row data
===================================
All scalar json types (int, float, bool, string, and None) can be put in a
table directly, but if the row data contains any lists or dictionaries,
then those need to be handled somehow. This node offers a few different
strategies for how to deal with such lists and dicts. If the input data
contains no such lists or dicts, then this option has no effect.
**Flatten**
The default strategy is "Flatten" which looks into any lists and dicts
in the row data and tries to "flatten" it into more columns.
.. code-block:: json
[{"userid": 1,
"name": {"first": "Alice", "last": "Alison"},
"hobbies": ["Painting", "Stamp collecting"]},
{"userid": 2,
"name": {"first": "Bob", "last": "McBobbin"},
"hobbies": ["Crocheting", "Yoga"]}]
====== ========== ========= ========== ================
userid name.first name.last hobbies[0] hobbies[1]
====== ========== ========= ========== ================
1 Alice Alison Painting Stamp collecting
2 Bob McBobbin Crocheting Yoga
====== ========== ========= ========== ================
If two or more different paths in the json data would produce columns
with the same name, the node gives a warning and skips that column
entirely. In the following example there are name conflicts for
"name.last" and "hobbies[1]":
.. code-block:: json
[{"userid": 1,
"name": {"first": "Alice", "last": "Alison"},
"hobbies": ["Painting", "Stamp collecting"]},
{"userid": 2,
"name": {"first" "Bob"},
"name.last": "McBobbin",
"hobbies": ["Crocheting"],
"hobbies[1]": "Yoga"}]
====== ========== ==========
userid name.first hobbies[0]
====== ========== ==========
1 Alice Painting
2 Bob Crocheting
====== ========== ==========
**Embed**
Creates a json-encoded string for any list or dict in the row data.
.. code-block:: json
[{"userid": 1,
"name": {"first": "Alice", "last": "Alison"},
"hobbies": ["Painting", "Stamp collecting"]},
{"userid": 2,
"name": {"first": "Bob", "last": "McBobbin"},
"hobbies": ["Crocheting", "Yoga"]}]
+--------+-----------------------+-----------------------+
| userid | name | hobbies |
+========+=======================+=======================+
| 1 | | {"first": "Alice", | | ["Painting", |
| | | "last": "Alison"} | | "Stamp collecting"] |
+--------+-----------------------+-----------------------+
| 2 | | {"first": "Bob", | | ["Crocheting", |
| | | "last": "McBobbin"} | | "Yoga"] |
+--------+-----------------------+-----------------------+
**Skip**
Skips any list or dict values in the row data.
**Error**
Gives an error if any row in the input contains a list or a dict.
"""
name = 'Json to Table'
description = "Convert a list of similar Json dicts into rows in a Table."
author = 'Magnus Sandén'
icon = 'json_to_table.svg'
tags = Tags(Tag.DataProcessing.Convert)
nodeid = 'org.sysess.sympathy.data.json.jsontotablenew'
related = [
'org.sysess.sympathy.selectjson',
'org.sysess.sympathy.removejson',
'org.sysess.sympathy.selectkeysinjson',
]
parameters = synode.parameters()
parameters.set_string(
'list_dict_strategy',
label='Strategy for dealing with lists and dicts',
value=FLATTEN,
description=(
'If there are lists or dicts inside the data for a row, those '
'need to be handled somehow. See documentation for more '
'information about each strategy.'),
editor=synode.editors.combo_editor(options=STRATEGIES))
inputs = Ports([Port.Json('Input Json object', name='input')])
outputs = Ports([Port.Table('Output table', name='output')])
def execute(self, node_context):
list_dict_strategy = (
node_context.parameters['list_dict_strategy'].value)
indata = node_context.input['input'].get()
keys, rows = json_to_table(indata, list_dict_strategy)
output = table.File.from_rows(keys, rows)
node_context.output['output'].source(output)