# This file is part of Sympathy for Data.
# Copyright (c) 2016, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data. If not, see <http://www.gnu.org/licenses/>.
import numpy as np
from sympathy.api import node as synode
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags, adjust
from sympathy.api.exceptions import SyConfigurationError
[docs]class HistogramCalculation(synode.Node):
"""
This node takes a table and calculates a histogram from one of its columns.
The output consists of bin edges and bin values and can for instance be
used in a histogram plot in the node :ref:`Figure`.
Masked values in the data column are ignored. Masked values in the weights
column are treated as 1.
"""
author = 'Magnus Sandén'
version = '0.1'
icon = 'histogram_calculation.svg'
name = 'Histogram calculation'
description = 'Calculate the histogram of a given signal.'
nodeid = 'org.sysess.sympathy.dataanalysis.histogramcalc'
tags = Tags(Tag.Analysis.Statistic)
parameters = synode.parameters()
combo_editor = synode.editors.combo_editor(edit=True)
combo_editor_w_empty = synode.editors.combo_editor(
include_empty=True, edit=True)
parameters.set_list('data_column', label="Data column:",
description='Column to create histogram for.',
editor=combo_editor)
parameters.set_list('weights_column', label="Weights column:",
description=('If you choose a weights column, '
'each value in the data column only '
'contributes its associated weight '
'towards the bin count, instead of 1.'),
editor=combo_editor_w_empty)
parameters.set_integer('bins', label="Bins:", value=10,
description='Number of bins.')
parameters.set_boolean('auto_range', label="Auto range", value=True,
description=('When checked, use data range as '
'histogram range.'))
parameters.set_float('x_min', label="X min:", value=0.0,
description='Minimum x value.')
parameters.set_float('x_max', label="X max:", value=1.0,
description='Maximum x value.')
parameters.set_boolean('normed', label="Density",
description=('When checked, the result is the '
'value of the probability density '
'function at each bin, normalized '
'such that the integral of the '
'histogram is 1.'))
controllers = synode.controller(
when=synode.field('auto_range', 'checked'),
action=(synode.field('x_min', 'disabled'),
synode.field('x_max', 'disabled')))
inputs = Ports([Port.Table('Input data', name='in')])
outputs = Ports([Port.Table('Histogram data', name='out')])
def update_parameters(self, parameters):
parameters['weights_column'].editor['include_empty'] = True
def adjust_parameters(self, node_context):
adjust(node_context.parameters['data_column'],
node_context.input['in'])
adjust(node_context.parameters['weights_column'],
node_context.input['in'])
def execute(self, node_context):
parameters = node_context.parameters
bins = parameters['bins'].value
density = parameters['normed'].value
data_param = parameters['data_column']
auto_range = parameters['auto_range'].value
if auto_range:
range_ = None
else:
x_min = parameters['x_min'].value
x_max = parameters['x_max'].value
range_ = x_min, x_max
data = node_context.input['in']._require_column(data_param)
weights_param = parameters['weights_column']
if not weights_param.selected:
weights = None
else:
weights = node_context.input['in']._require_column(weights_param)
# Handle masked arrays
if isinstance(weights, np.ma.MaskedArray):
weights.fill(1)
if isinstance(data, np.ma.MaskedArray):
mask = data.mask
data = data.compressed()
if weights is not None:
weights = weights[np.logical_not(mask)]
# Handle NaNs
if data.dtype.kind == 'f':
nan_mask = np.isnan(data)
if weights is not None and weights.dtype.kind == 'f':
nan_mask |= np.isnan(weights)
data = data[~nan_mask]
if weights is not None:
weights = weights[~nan_mask]
# Handle datetimes
datetime_dtype = None
if data.dtype.kind in 'mM':
if density:
raise SyConfigurationError(
"Density can't be used with data of types datetime or "
"timedelta.")
datetime_dtype = data.dtype
data = data.astype('int64')
bin_values, bin_edges = np.histogram(
data, bins=bins, density=density, weights=weights, range=range_)
# Handle datetimes
if datetime_dtype is not None:
bin_edges = bin_edges.astype(datetime_dtype)
node_context.output['out'].set_column_from_array(
"Bin values", bin_values)
node_context.output['out'].set_column_from_array(
"Bin min edges", bin_edges[:-1])
node_context.output['out'].set_column_from_array(
"Bin max edges", bin_edges[1:])