Source code for node_histogram_calculation

# This file is part of Sympathy for Data.
# Copyright (c) 2016, Combine Control Systems AB
#
# Sympathy for Data is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# Sympathy for Data is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sympathy for Data.  If not, see <http://www.gnu.org/licenses/>.
import numpy as np

from sympathy.api import node as synode
from sympathy.api.nodeconfig import Port, Ports, Tag, Tags, adjust
from sympathy.api.exceptions import SyConfigurationError, SyDataError


[docs]class HistogramCalculation(synode.Node): """ This node takes a table and calculates a histogram from one of its columns. The input Data column must be of numeric, datetime or timedelta type. Weights column (if selected) must be of numeric type. The output consists of bin edges and bin values and can for instance be used in a histogram plot in the node :ref:`Figure`. Masked values in the data column are ignored. Masked values in the weights column are treated as 1. """ author = 'Magnus Sandén' version = '0.1' icon = 'histogram_calculation.svg' name = 'Histogram calculation' description = 'Calculate the histogram of a given signal.' nodeid = 'org.sysess.sympathy.dataanalysis.histogramcalc' tags = Tags(Tag.Analysis.Statistic) parameters = synode.parameters() combo_editor = synode.editors.combo_editor(edit=True) combo_editor_w_empty = synode.editors.combo_editor( include_empty=True, edit=True) parameters.set_list('data_column', label="Data column:", description='Column to create histogram for. Requires ' 'numeric, datetime or timedelta type.', editor=combo_editor) parameters.set_list('weights_column', label="Weights column:", description=('If you choose a weights column, ' 'each value in the data column only ' 'contributes its associated weight ' 'towards the bin count, instead of 1. ' 'Requires numeric type.'), editor=combo_editor_w_empty) parameters.set_integer('bins', label="Bins:", value=10, description='Number of bins.') parameters.set_boolean('auto_range', label="Auto range", value=True, description=('When checked, use data range as ' 'histogram range.')) parameters.set_float('x_min', label="X min:", value=0.0, description='Minimum x value.') parameters.set_float('x_max', label="X max:", value=1.0, description='Maximum x value.') parameters.set_boolean('normed', label="Density", description=('When checked, the result is the ' 'value of the probability density ' 'function at each bin, normalized ' 'such that the integral of the ' 'histogram is 1.')) controllers = synode.controller( when=synode.field('auto_range', 'checked'), action=(synode.field('x_min', 'disabled'), synode.field('x_max', 'disabled'))) inputs = Ports([Port.Table('Input data', name='in')]) outputs = Ports([Port.Table('Histogram data', name='out')]) def update_parameters(self, parameters): parameters['weights_column'].editor['include_empty'] = True def adjust_parameters(self, node_context): adjust(node_context.parameters['data_column'], node_context.input['in']) adjust(node_context.parameters['weights_column'], node_context.input['in']) def execute(self, node_context): parameters = node_context.parameters bins = parameters['bins'].value density = parameters['normed'].value data_param = parameters['data_column'] auto_range = parameters['auto_range'].value if auto_range: range_ = None else: x_min = parameters['x_min'].value x_max = parameters['x_max'].value range_ = x_min, x_max data = node_context.input['in']._require_column(data_param) table_attributes = {'data': data_param.selected} weights_param = parameters['weights_column'] if not weights_param.selected: weights = None else: weights = node_context.input['in']._require_column(weights_param) table_attributes['weights'] = weights_param.selected # Handle non-numeric arrays if data.dtype.kind in 'SUV': raise SyDataError("Data column requires data of numeric, datetime " "or timedelta type.") if weights is not None and weights.dtype.kind not in 'bcfiu': raise SyDataError("Weights column requires data of numeric type.") # Handle masked arrays if isinstance(weights, np.ma.MaskedArray): weights.fill(1) if isinstance(data, np.ma.MaskedArray): mask = data.mask data = data.compressed() if weights is not None: weights = weights[np.logical_not(mask)] # Handle NaNs if data.dtype.kind == 'f': nan_mask = np.isnan(data) if weights is not None and weights.dtype.kind == 'f': nan_mask |= np.isnan(weights) data = data[~nan_mask] if weights is not None: weights = weights[~nan_mask] # Handle datetimes datetime_dtype = None if data.dtype.kind in 'mM': if density: raise SyConfigurationError( "Density can't be used with data of types datetime or " "timedelta.") datetime_dtype = data.dtype data = data.astype('int64') bin_values, bin_edges = np.histogram( data, bins=bins, density=density, weights=weights, range=range_) # Handle datetimes if datetime_dtype is not None: bin_edges = bin_edges.astype(datetime_dtype) node_context.output['out'].set_column_from_array( "Bin values", bin_values) node_context.output['out'].set_column_from_array( "Bin min edges", bin_edges[:-1]) node_context.output['out'].set_column_from_array( "Bin max edges", bin_edges[1:]) node_context.output['out'].set_table_attributes(table_attributes)