Source code for sdp_par_model.reports

"""
This file contains methods for generating reports for SKA SDP
parametric model data using especially matplotlib and Jupyter. Having
these functions separate allows us to keep notebooks free of clutter.
"""
from __future__ import print_function  # Makes Python-3 style print() function available in Python 2.x

import re
import warnings

import csv
from IPython.display import clear_output, display, HTML, FileLink
from ipywidgets import FloatProgress, ToggleButtons, Text, Layout
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import sympy
import subprocess
import os

try:
    import pymp
    HAVE_PYMP=True
except:
    HAVE_PYMP=False

from .parameters.definitions import HPSOs, Pipelines, Telescopes, Bands, Constants as c
from .parameters.container import ParameterContainer
from . import evaluate as imp
from .config import PipelineConfig

# Possible verbosity levels
verbose_display     = ['Overview', 'Details', 'Debug']

# Possible calculated results to display in the notebook
RESULT_MAP = [
    # Table Row Title              Unit          Default? Sum?   Expression
    ('-- Parameters --',           '',           True,    False, lambda tp: ''                    ),
    ('Telescope',                  '',           True,    False, lambda tp: tp.telescope          ),
    ('Band',                       '',           True,    False, lambda tp: str(tp.band) if tp.band is not None else ''),
    ('Frequency Min',              'GHz',        False,   False, lambda tp: tp.freq_min/c.giga    ),
    ('Frequency Max',              'GHz',        False,   False, lambda tp: tp.freq_max/c.giga    ),
    ('Pipeline',                   '',           True,    False, lambda tp: str(tp.pipeline)      ),
    ('Baseline coalescing',        '',           True,    False, lambda tp: tp.blcoal             ),
    ('On-the-fly kernels',         '',           True,    False, lambda tp: tp.on_the_fly         ),
    ('Scale predict by facet',     '',           True,    False, lambda tp: tp.scale_predict_by_facet),
    ('Max # of channels',          '',           True,    False, lambda tp: tp.Nf_max             ),
    ('Max Baseline',               'km',         True,    False, lambda tp: tp.Bmax / c.kilo      ),
    ('Dump time',                  's',          False,   False, lambda tp: tp.Tint_used          ),
    ('Observation Time',           's',          False,   False, lambda tp: tp.Tobs               ),
    ('Pointing Time',              's',          False,   False, lambda tp: tp.Tpoint             ),
    ('Total Time',                 's',          False,   False, lambda tp: tp.Texp               ),
    ('Snapshot Time',              's',          True,    False, lambda tp: tp.Tsnap              ),
    ('Facets',                     '',           True,    False, lambda tp: tp.Nfacet             ),
    ('w-stacking planes',          '',           True,    False, lambda tp: tp.Nwstack            ),
    ('w-stacking planes predict',  '',           True,    False, lambda tp: tp.Nwstack_predict    ),
    ('Stations/antennas',          '',           False,   False, lambda tp: tp.Na                 ),
    ('Max Baseline [per bin]',     'km',         False,   False, lambda tp: [ bin['b'] / c.kilo
                                                                              for  bin in tp.baseline_bins ]),
    ('Baseline fraction [per bin]','%',          False,   False, lambda tp: [ 100*bin['bfrac']
                                                                              for  bin in tp.baseline_bins ]),
    ('-- Image --',                '',           True,    False, lambda tp: ''                    ),
    ('Facet FoV size',             'deg',        False,   False, lambda tp: tp.Theta_fov/c.degree ),
    ('Total FoV size',             'deg',        False,   False, lambda tp: tp.Theta_fov_total/c.degree),
    ('Output FoV size',            'deg',        False,   False, lambda tp: tp.Theta_fov_out/c.degree),
    ('PSF size',                   'arcs',       False,   False, lambda tp: tp.Theta_beam/c.arcsecond,),
    ('Pixel size',                 'arcs',       False,   False, lambda tp: tp.Theta_pix/c.arcsecond),
    ('Facet side length',          'pixels',     True,    False, lambda tp: tp.Npix_linear        ),
    ('Image side length',          'pixels',     True,    False, lambda tp: tp.Npix_linear_fov_total),
    ('Output image side length',   'pixels',     True,    False, lambda tp: tp.Npix_linear_out    ),
    ('Grid side dimension',        'lambda',     True,    False, lambda tp: tp.Lambda_grid        ),
    ('Baselines dimension',        'lambda',     True,    False, lambda tp: tp.Lambda_bl          ),
    ('Epsilon (approx)',           '',           False,   False, lambda tp: tp.epsilon_f_approx   ),
    ('Qbw',                        '',           False,   False, lambda tp: tp.Qbw                ),
    ('Max subband ratio',          '',           False,   False, lambda tp: tp.max_subband_freq_ratio),
    ('Number subbands',            '',           False,   False, lambda tp: tp.Nsubbands          ),
    ('Station/antenna diameter',   '',           False,   False, lambda tp: tp.Ds                 ),

    ('-- Time Coalescing --',      '',           False,   False, lambda tp: ''                    ),
    ('Ionospheric timescale',      's',          False,   False, lambda tp: tp.Tion               ),
    ('Coalesce time full',         's',          False,   False, lambda tp: tp.Tcoal_skipper      ),
    ('Coalesce time pred',         's',          False,   False, lambda tp: tp.Tcoal_predict      ),
    ('Coalesce time bw',           's',          False,   False, lambda tp: tp.Tcoal_backward     ),
    ('Combined samples full',      '',           False,   False, lambda tp: tp.combine_time_samples),
    ('Combined samples facet',     '',           False,   False, lambda tp: tp.combine_time_samples_facet),
    ('Kernel time pred',           's',          False,   False, lambda tp: tp.Tkernel_predict    ),
    ('Kernel time backward',       's',          False,   False, lambda tp: tp.Tkernel_backward   ),
    ('Visibilities kernel pred',   '',           False,   False, lambda tp: tp.Nvis_gcf_predict   ),
    ('Visibilities kernel bw',     '',           False,   False, lambda tp: tp.Nvis_gcf_backward  ),
    ('Oversampling used pred',     '',           False,   False, lambda tp: tp.Ngcf_used_predict  ),
    ('Oversampling used bw',       '',           False,   False, lambda tp: tp.Ngcf_used_backward ),

    ('-- Channelization --',       '',           False,   False, lambda tp: ''                    ),
    ('Channels predict, no-smear', '',           False,   False, lambda tp: tp.Nf_no_smear_predict),
    ('Channels backward, no-smear','',           False,   False, lambda tp: tp.Nf_no_smear_backward),
    ('Frequencies predict ifft',   '',           False,   False, lambda tp: tp.Nf_FFT_predict     ),
    ('Frequencies predict kernels','',           False,   False, lambda tp: tp.Nf_gcf_predict     ),
    ('Frequencies predict de-grid','',           False,   False, lambda tp: tp.Nf_vis_predict     ),
    ('Frequencies total',          '',           False,   False, lambda tp: tp.Nf_vis             ),
    ('Frequencies backward kernels','',          False,   False, lambda tp: tp.Nf_gcf_backward    ),
    ('Frequencies backward grid',  '',           False,   False, lambda tp: tp.Nf_vis_backward    ),
    ('Frequencies backward fft',   '',           False,   False, lambda tp: tp.Nf_FFT_backward    ),
    ('Channels out',               '',           False,   False, lambda tp: tp.Nf_out             ),

    ('-- Visibility --',           '',           False,   False, lambda tp: ''                    ),
    ('Ingest rate',                'M/s',        False,   False, lambda tp: tp.Rvis_ingest / c.mega),
    ('Averaged rate',              'M/s',        False,   False, lambda tp: tp.Rvis / c.mega      ),
    ('Predict rate',               'M/s',        False,   False, lambda tp: tp.Rvis_predict  / c.mega),
    ('Backward rate',              'M/s',        False,   False, lambda tp: tp.Rvis_backward  / c.mega),

    ('-- Kernel Sizes --',         '',           False,   False, lambda tp: ''                    ),
    ('Delta W earth',              'lambda',     False,   False, lambda tp: tp.DeltaW_Earth       ),
    ('Delta W snapshot',           'lambda',     False,   False, lambda tp: tp.DeltaW_SShot       ),
    ('Delta W max',                'lambda',     False,   False, lambda tp: tp.DeltaW_max         ),
    ('Delta W stack',              'lambda',     False,   False, lambda tp: tp.DeltaW_stack       ),
    ('Delta W theory',             'lambda',     False,   False, lambda tp: 1/(1-max(0,1-2*tp.Theta_fov**2)**0.5)),

    ('-- Kernel Sizes --',         '',           False,   False, lambda tp: ''                    ),
    ('W kernel support pred',      'uv-pixels',  False,   False, lambda tp: tp.Ngw_predict        ),
    ('AW kernel support pred',     'uv-pixels',  False,   False, lambda tp: tp.Nkernel_AW_predict ),
    ('W kernel support bw',        'uv-pixels',  False,   False, lambda tp: tp.Ngw_backward       ),
    ('AW kernel support bw',       'uv-pixels',  False,   False, lambda tp: tp.Nkernel_AW_backward),

    ('-- Data --',                 '',           True,    False, lambda tp: ''                    ),
    ('Snapshot vis size',          'GB',         True,    True,  lambda tp: tp.Msnap / c.giga     ),
    ('Facet size',                 'GB',         True,    False, lambda tp: tp.Mfacet / c.giga    ),
    ('Image size',                 'GB',         True,    False, lambda tp: tp.Mimage / c.giga    ),
    ('Image cube size',            'GB',         True,    False, lambda tp: tp.Mimage_cube / c.giga),
    ('Calibration output',         'GB',         True,    False, lambda tp: tp.Mcal_out / c.giga  ),

    ('Calibration process interval','s',         True,    False, lambda tp: tp.Tsolve             ),
    ('Calibration process in',     'MB',         True,    False, lambda tp: tp.Mcal_solve_in / c.mega),
    ('Calibration process out',    'MB',         True,    False, lambda tp: tp.Mcal_solve_out / c.mega),

    ('Cleaning memory',            'PetaBytes',  True,    True,  lambda tp: tp.M_MSMFS/c.peta     ),
    ('Working (cache) memory',     'TeraBytes',  True,    True,  lambda tp: tp.Mw_cache/c.tera    ),
    ('-- I/O --',                  '',           True,    False, lambda tp: ''                    ),
    ('Input Buffer Size',          'PetaBytes',  True,    True,  lambda tp: tp.Minput/c.peta      ),
    ('Total buffer ingest rate',   'TeraBytes/s',True,    False, lambda tp: tp.Rvis_ingest*tp.Nbeam*tp.Npp*tp.Mvis/c.tera),
    #('Rosies buffer size',   'PetaBytes',       True,       False, lambda tp: tp.Tobs*tp.buffer_factor*tp.Rvis_ingest*tp.Nbeam*tp.Npp*tp.Mvis/c.peta),
    ('Output size',                'TB',         True,    True,  lambda tp: tp.Mout / c.tera      ),

    ('-> ',                        'TeraBytes',  True,    True,  lambda tp: tp.get_products('Mwcache', scale=c.tera)),
    ('Buffer Read Rate',           'TeraBytes/s',True,    True,  lambda tp: tp.Rio/c.tera         ),
    ('Facet visibility rate',      'TeraBytes/s',True,    False, lambda tp: tp.Rfacet_vis/c.tera  ),
    ('Image Write Rate',           'TeraBytes/s',True,    True,  lambda tp: tp.Rimage/c.tera      ),

    #('Inter-Facet I/O Rate',       'TeraBytes/s',True,    True,  lambda tp: tp.Rinterfacet/c.tera ),
    #('-> ',                        'TeraBytes/s',True,    True,  lambda tp: tp.get_products('Rinterfacet', scale=c.tera)),

    ('-- Compute --',              '',           True,    False, lambda tp: ''                    ),
    ('Total Compute Requirement',  'PetaFLOP/s', True,    True,  lambda tp: tp.Rflop/c.peta       ),
    ('-> ',                        'PetaFLOP/s', True,    True,  lambda tp: tp.get_products('Rflop', scale=c.peta)),
]


[docs]def toggles(opts, *args, **kwargs): """ Helper for creating toggle buttons from given options """ return ToggleButtons(options=opts, *args, **kwargs)
[docs]def adjusts(): """ Create widget for adjustments (with some suggestions) """ return Text(placeholder='e.g. blcoal=False Bmax=40*1000 Nsource=100', layout=Layout(width='95%'))
[docs]def make_band_toggles(): """Create connected telescope/band toggle widgets that only allow selection of valid combinations""" telescope_toggles = toggles(sorted(Telescopes.available_teles), description="telescope1") band_toggles = toggles(sorted(Bands.available_bands)) def _update_toggles(*_args): band_toggles.options = tuple(sorted(Bands.telescope_bands[telescope_toggles.value])) _update_toggles(); telescope_toggles.observe(_update_toggles, 'value') return telescope_toggles, band_toggles
[docs]def make_hpso_pipeline_toggles(): """Create connected HPSO/pipeline toggle widgets that only allow selection of valid combinations""" hpso_toggles = toggles(sorted(HPSOs.available_hpsos)) pipeline_toggles = toggles(sorted(Pipelines.available_pipelines)) def update_pipeline_toggles(*_args): pipeline_toggles.options = tuple(sorted(HPSOs.hpso_pipelines[hpso_toggles.value])) update_pipeline_toggles(); hpso_toggles.observe(update_pipeline_toggles, 'value') return hpso_toggles, pipeline_toggles
[docs]def get_result_sum(resultMap): """ Returns the corresponding entries of whether expressions should be summed or concatenated in a list. :param resultMap: :returns: """ return list(map(lambda row: row[3], resultMap))
[docs]def get_result_expressions(resultMap, tp): """ Returns the expression that needs to be evaluated :param resultMap: :param tp: :returns: """ def expr(row): try: return row[4](tp) except AttributeError: return "(undefined)" return list(map(expr, resultMap))
[docs]def mk_result_map_rows(verbosity = 'Overview'): """ Collects result map information for a given row set :param verbosity: Row set to show. If None, we will use default rows. :returns: A tuple of the result map, the sorted list of the row names and a list of the row units. """ if verbosity == 'Overview': result_map = list(filter(lambda row: row[2], RESULT_MAP)) else: result_map = RESULT_MAP return (result_map, list(map(lambda row: row[0], result_map)), list(map(lambda row: row[1], result_map)))
[docs]def default_rflop_plotting_colours(rows): """ Defines a default colour order used in plotting Rflop components :returns: List of HTML colour codes as string """ # Stolen from D3's category20 cat20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5'] return (cat20 + cat20)[:len(rows)]
[docs]def format_result(value): """ Format a result value for viewing. As we expect that most numbers should be in a "nice" range this means we truncate number accuracy by default. """ # Attempt to make proper floats out of sympy expressions if isinstance(value, sympy.Expr): try: value = float(value.evalf()) except e: pass # Floating point values up to 4 digits if isinstance(value, float): return min(['%.4g' % value, '%.5g' % value], key=len) # Lists: Apply formating recursively if isinstance(value, list): s = '[' for v in value: if len(s) > 1: s += ', ' s += format_result(v) return s + ']' # Otherwise: Trust default formatting return '%s' % value
[docs]def format_result_cell(val, color='black', colspan=1, typ='td'): """ Format a result value for inclusing in a table. """ return '<%s colspan="%d" style="text-align:left"><font color="%s">%s</font></%s>' % ( typ, colspan, color, format_result(val), typ)
[docs]def format_result_cells(val, color='black', max_cols=1): """ Format a result value for inclusing in a table. If the value is a list, we genrate multiple cells up to "max_cells". """ row_html = '' if type(val) == list and len(val) <= max_cols: for v in val: row_html += format_result_cell(v, color, 1) if len(val) < max_cols: row_html += format_result_cell('', color, max_cols-len(val)) else: row_html += format_result_cell(val, color, max_cols) return row_html
[docs]def show_table(title, labels, values, units, docs=None): """ Plots a table of label-value pairs :param title: string :param labels: string list / tuple :param values: string list / tuple :param units: string list / tuple :param docs: Optional documentation per row :returns: """ s = '<h3>%s:</h3><table>\n' % title assert len(labels) == len(values) assert len(labels) == len(units) max_cols = max([1] + [len(v) for v in values if type(v) == list]) extra_cols = (2 if docs is None else 3) for i in range(len(labels)): if labels[i].startswith('--'): s += '<tr>%s</tr>' % format_result_cell(labels[i], colspan=max_cols+extra_cols, typ='th') continue def row(label, val): row_html = '<tr><td>%s</td>' % label row_html += format_result_cells(val, color='blue', max_cols=max_cols) row_html += '<td style="text-align:left">%s</td>' % units[i] if docs is not None: row_html += '<td style="text-align:left">%s</td>' % docs[i] return row_html + '</tr>\n' if not isinstance(values[i], dict): s += row(labels[i], values[i]) else: for name in sorted(values[i].keys()): s += row(labels[i] + name, values[i][name]) s += '</table>' display(HTML(s))
[docs]def show_table_compare(title, labels, values_1, values_2, units): """ Plots a table that for a set of labels, compares each' value with the other :param title: :param labels: :param values_1: :param values_2: :param units: :returns: """ s = '<h4>%s:</h4><table>\n' % title assert len(labels) == len(values_1) assert len(labels) == len(values_2) assert len(labels) == len(units) max_cols = max([1] + [len(v) for v in values_1+values_2 if type(v) == list]) for i in range(len(labels)): if labels[i].startswith('--'): s += '<tr>%s</tr>' % format_result_cell(labels[i], colspan=2*max_cols+2, typ='th') continue def row(label, val1, val2): row_html = '<tr><td>%s</td>' % label row_html += format_result_cells(val1, color='darkcyan', max_cols=max_cols) row_html += format_result_cells(val2, color='blue', max_cols=max_cols) row_html += '<td style="text-align:left">%s</td></tr>\n' % units[i] return row_html if not isinstance(values_1[i], dict) and not isinstance(values_2[i], dict): s += row(labels[i], values_1[i], values_2[i]) else: for name in sorted(set(values_1[i]).union(values_2[i])): s += row(labels[i] + name, values_1[i].get(name, 0), values_2[i].get(name, 0)) s += '</table>' display(HTML(s))
[docs]def plot_line_datapoints(title, x_values, y_values, xlabel=None, ylabel=None): """ Plots a series of (x,y) values using a line and data-point visualization. :param title: :param x_values: :param y_values: :returns: """ pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session assert len(x_values) == len(y_values) plt.plot(x_values, y_values, 'ro', x_values, y_values, 'b') plt.title('%s\n' % title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.ylim((0, max(y_values))) plt.show()
[docs]def plot_2D_surface(title, x_values, y_values, z_values, contours=None, xlabel=None, ylabel=None, zlabel=None, nlevels=15): """ Plots a series of (x,y) values using a line and data-point visualization. :param title: The plot's title :param x_values: a 1D numpy array :param y_values: a 1D numpy array :param z_values: a 2D numpy array, indexed as (x,y) :param contours: optional array of values at which contours should be drawn :param zlabel: :param ylabel: :param xlabel: :returns: """ colourmap = 'coolwarm' # options include: 'afmhot', 'coolwarm' contour_colour = [(1., 0., 0., 1.)] # red pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session sizex = len(x_values) sizey = len(y_values) assert np.shape(z_values)[0] == sizey assert np.shape(z_values)[1] == sizex xx = np.tile(x_values, (sizey, 1)) yy = np.transpose(np.tile(y_values, (sizex, 1))) C = pylab.contourf(xx, yy, z_values, nlevels, alpha=.75, cmap=colourmap) pylab.colorbar(shrink=.92) if contours is not None: C = pylab.contour(xx, yy, z_values, levels = contours, colors=contour_colour, linewidths=[2], linestyles='dashed') plt.clabel(C, inline=1, fontsize=10) C.ax.set_xlabel(xlabel) C.ax.set_ylabel(ylabel) C.ax.set_title(title, fontsize=16) pylab.show()
[docs]def plot_3D_surface(title, x_values, y_values, z_values, contours=None, xlabel=None, ylabel=None, zlabel=None, nlevels=15): """ Plots a series of (x,y) values using a line and data-point visualization. :param title: The plot's title :param x_values: a 1D numpy array :param y_values: a 1D numpy array :param z_values: a 2D numpy array, indexed as (x,y) :param contours: optional array of values at which contours should be drawn :param zlabel: :param ylabel: :param xlabel: :returns: """ colourmap = cm.coolwarm # options include: 'afmhot', 'coolwarm' contour_colour = [(1., 0., 0., 1.)] # red pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session assert len(x_values) == len(y_values) sizex = len(x_values) sizey = len(y_values) assert np.shape(z_values)[0] == sizey assert np.shape(z_values)[1] == sizex xx = np.tile(x_values, (sizey, 1)) yy = np.transpose(np.tile(y_values, (sizex, 1))) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(xx, yy, z_values, rstride=1, cstride=1, cmap=colourmap, linewidth=0.2, alpha=0.6, antialiased=True, shade=True) fig.colorbar(surf, shrink=0.5, aspect=5) if contours is not None: cset = ax.contour(xx, yy, z_values, contours, zdir='z', linewidths = (2.0), colors=contour_colour) plt.clabel(cset, inline=1, fontsize=10) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_zlabel(zlabel) ax.set_title(title, fontsize=16) plt.show()
[docs]def plot_pie(title, labels, values, colours=None): """ Plots a pie chart :param title: :param labels: :param values: a numpy array :param colours: """ assert len(labels) == len(values) if colours is not None: assert len(colours) == len(values) nr_slices = len(values) # The values need to sum to one, for a pie plot. Let's enforce that. values_norm = values / np.linalg.norm(values) pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session # The slices will be ordered and plotted counter-clockwise. explode = np.ones(nr_slices) * 0.05 # The radial offset of the slices plt.pie(values_norm, explode=explode, labels=labels, colors=colours, autopct='%1.1f%%', shadow=True, startangle=90) # Set aspect ratio to be equal so that pie is drawn as a circle. plt.axis('equal') plt.title('%s\n' % title) plt.show()
[docs]def save_pie(title, labels, values, filename, colours=None): """ Works exactly same way as plot_pie(), but instead of plotting, saves a pie chart to SVG output file. Useful for exporting results to documents and such :param title: :param labels: :param values: a numpy array :param filename: :param colours: """ assert len(labels) == len(values) if colours is not None: assert len(colours) == len(values) nr_slices = len(values) # The values need to sum to one, for a pie plot. Let's enforce that. values_norm = values / np.linalg.norm(values) pylab.rcParams['figure.figsize'] = 8, 6 # that's default image size for this interactive session # The slices will be ordered and plotted counter-clockwise. explode = np.ones(nr_slices) * 0.05 # The radial offset of the slices plt.pie(values_norm, explode=explode, labels=labels, colors=colours, autopct='%1.1f%%', shadow=True, startangle=90) # Set aspect ratio to be equal so that pie is drawn as a circle. plt.axis('equal') plt.title('%s\n' % title) plt.savefig(filename, format='svg', dpi=1200)
[docs]def plot_stacked_bars(title, labels, value_labels, dictionary_of_value_arrays, colours=None, width=0.35, save=None, xticks_rot='horizontal'): """ Plots a stacked bar chart, with any number of columns and components per stack (must be equal for all bars) :param title: :param labels: The label belonging to each bar :param dictionary_of_value_arrays: A dictionary that maps each label to an array of values (to be stacked). :param colours: :returns: """ # Do some sanity checks number_of_elements = len(dictionary_of_value_arrays) if colours is not None: assert number_of_elements == len(colours) for key in dictionary_of_value_arrays: assert len(list(dictionary_of_value_arrays[key])) == len(list(labels)) #Plot a stacked bar chart nr_bars = len(labels) indices = np.arange(nr_bars) # The indices of the bars bottoms = {} # The height of each bar, by key # Collect bars to generate. We want the first bar to end up at # the top, therefore we determine their position starting from # the back. valueSum = np.zeros(nr_bars) for key in reversed(list(value_labels)): bottoms[key] = valueSum valueSum = valueSum + np.array(dictionary_of_value_arrays[key]) for index, key in enumerate(value_labels): values = np.array(dictionary_of_value_arrays[key]) if colours is not None: plt.bar(indices, values, width, color=colours[index], bottom=bottoms[key]) else: plt.bar(indices, values, width, bottom=bottom[key]) for x, v, b in zip(indices, values, bottoms[key]): if v >= np.amax(np.array(valueSum)) / 40: plt.text(x+width/2, b+v/2, "%.1f%%" % (100 * v / valueSum[x]), horizontalalignment='center', verticalalignment='center') plt.xticks(indices+width/2., labels, rotation=xticks_rot) plt.title(title) plt.legend(value_labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) #plt.legend(dictionary_of_value_arrays.keys(), loc=1) # loc=2 -> legend upper-left if not save is None: plt.savefig(save, format='pdf', dpi=1200, bbox_inches = 'tight') pylab.show()
[docs]def check_pipeline_config(cfg, pure_pipelines): """ Check pipeline configuration, displaying a message in the Notebook for every problem found. Returns whether the configuration is usable at all. """ (okay, messages) = cfg.is_valid(pure_pipelines=pure_pipelines) for msg in messages: display(HTML('<p><font color="red"><b>{0}</b></font></p>'.format(msg))) if not okay: display(HTML('<p><font color="red">Adjust to recompute.</font></p>')) return okay
[docs]def compare_telescopes_default(telescope_1, band_1, pipeline_1, adjusts_1, telescope_2, band_2, pipeline_2, adjusts_2, verbosity='Overview'): """ Evaluates two telescopes, both operating in a given band and pipeline, using their default parameters. A bit of an ugly bit of code, because it contains both computations and display code. But it does make for pretty interactive results. Plots the results side by side. :param telescope_1: :param telescope_2: :param band_1: :param band_2: :param pipeline_1: :param pipeline_2: :param adjusts_1: Configuration adjustments for telescope 1. See PipelineConfig. :param adjusts_2: Configuration adjustments for telescope 2. See PipelineConfig. :param verbosity: amount of output to generate """ # Make configurations and check cfg_1 = PipelineConfig(telescope=telescope_1, band=band_1, pipeline=pipeline_1, adjusts=adjusts_1) cfg_2 = PipelineConfig(telescope=telescope_2, band=band_2, pipeline=pipeline_2, adjusts=adjusts_2) if not check_pipeline_config(cfg_1, pure_pipelines=True) or \ not check_pipeline_config(cfg_2, pure_pipelines=True): return # Determine which rows to show (result_map, result_titles, result_units) = mk_result_map_rows(verbosity) # Loop through telescope configurations, collect results display(HTML('<font color="blue">Computing the result -- this may take several seconds.</font>')) detailed = (verbosity=='Debug') tels_result_values = [ _compute_results(cfg_1, result_map, detailed, detailed), _compute_results(cfg_2, result_map, detailed, detailed), ] display(HTML('<font color="blue">Done computing.</font>')) # Show comparison table show_table_compare('Computed Values', result_titles, tels_result_values[0], tels_result_values[1], result_units) # Show comparison stacked bars products_1 = tels_result_values[0][-1] products_2 = tels_result_values[1][-1] labels = sorted(set(products_1).union(products_2)) colours = default_rflop_plotting_colours(labels) telescope_labels = (cfg_1.describe(), cfg_2.describe()) values = { label: (products_1.get(label,0),products_2.get(label,0)) for label in labels } plot_stacked_bars('Computational Requirements (PetaFLOPS)', telescope_labels, labels, values, colours)
[docs]def evaluate_hpso_optimized(hpso, hpso_pipe, adjusts='', verbosity='Overview'): """ Evaluates a High Priority Science Objective by optimizing NFacet and Tsnap to minimize the total FLOP rate :param hpso: HPSO pipeline to evaluate :param verbosity: """ # Make and check pipeline configuration display(HTML('<font color="blue">Evaluating...</font>')) cfg = PipelineConfig(hpso=hpso, hpso_pipe=hpso_pipe, adjusts=adjusts) if not check_pipeline_config(cfg, pure_pipelines=True): return # Determine which rows to calculate & show (result_map, result_titles, result_units) = mk_result_map_rows(verbosity) # Compute detailed = (verbosity=='Debug') result_values = _compute_results(cfg, result_map, detailed, detailed) display(HTML('<font color="blue">Done computing. Results follow:</font>')) # Show table of results show_table('Computed Values', result_titles, result_values, result_units) # Show pie graph of FLOP counts values = result_values[-1] # the last value colours = default_rflop_plotting_colours(set(values)) plot_pie('FLOP breakdown for %s' % HPSOs.hpso_telescopes[hpso], values.keys(), list(values.values()), colours)
[docs]def evaluate_telescope_optimized(telescope, band, pipeline, max_baseline="default", Nf_max="default", blcoal=True, on_the_fly=False, scale_predict_by_facet=True, verbosity='Overview'): """ Evaluates a telescope with manually supplied parameters, but then automatically optimizes NFacet and Tsnap to minimize the total FLOP rate for the supplied parameters :param telescope: :param band: :param pipeline: :param max_baseline: :param Nf_max: :param blcoal: Baseline dependent coalescing (before gridding) :param on_the_fly: :param verbosity: """ # Make configuration cfg = PipelineConfig(telescope=telescope, pipeline=pipeline, band=band, Bmax=max_baseline, Nf_max=Nf_max, blcoal=blcoal, on_the_fly=on_the_fly, scale_predict_by_facet=scale_predict_by_facet) if not cfg.is_valid(pure_pipelines=True)[0]: return # Determine rows to show (result_map, result_titles, result_units) = mk_result_map_rows(verbosity) # Compute display(HTML('<font color="blue">Computing the result -- this may take several seconds.' '</font>')) detailed = (verbosity=='Debug') result_values = _compute_results(cfg, result_map, detailed, detailed) display(HTML('<font color="blue">Done computing. Results follow:</font>')) # Make table show_table('Computed Values', result_titles, result_values, result_units) # Make pie plot values = result_values[-1] # the last value colours = default_rflop_plotting_colours(set(values)) plot_pie('FLOP breakdown for %s' % telescope, values.keys(), list(values.values()), colours)
def _pipeline_configurations(telescopes, bands, pipelines, adjusts={}): """Make a list of all valid configuration combinations in the list.""" configs = [] for telescope in telescopes: for band in bands: for pipeline in pipelines: cfg = PipelineConfig(telescope=telescope, band=band, pipeline=pipeline, adjusts=adjusts) # Check whether the configuration is valid (okay, msgs) = cfg.is_valid() if okay: configs.append(cfg) return configs
[docs]def write_csv_pipelines(filename, telescopes, bands, pipelines, adjusts="", verbose=False, parallel=0): """ Evaluates all valid configurations of this telescope and dumps the result as a CSV file. """ # Make configuration list configs = _pipeline_configurations(telescopes, bands, pipelines, adjusts) # Calculate rows = RESULT_MAP # Everything - hardcoded for now results = _batch_compute_results(configs, rows, parallel, verbose, True) # Write CSV _write_csv(filename, results, rows)
[docs]def write_csv_hpsos(filename, hpsos, adjusts="", verbose=False, parallel=0): """ Evaluates all valid configurations of this telescope and dumps the result as a CSV file. """ # Make configuration list configs = [] for hpso in sorted(hpsos): for pipe in HPSOs.hpso_pipelines[hpso]: cfg = PipelineConfig(hpso=hpso, hpso_pipe=pipe, adjusts=adjusts) configs.append(cfg) # Calculate rows = RESULT_MAP # Everything - hardcoded for now results = _batch_compute_results(configs, rows, parallel, verbose, True) # Write CSV _write_csv(filename, results, rows)
def _compute_results(pipelineConfig, result_map, verbose=False, detailed=False, adjusts={}): """A private method for computing a set of results. :param pipelineConfig: Complete pipeline configuration :param result_map: results to produce :param verbose: Chattiness of parameter generation :param detailed: Produce detailed output results? :returns: result value array """ # Loop through pipeliness to collect result values result_value_array = [] # Calculate the telescope parameters tp = pipelineConfig.calc_tel_params(verbose=verbose, adjusts=adjusts) # Evaluate expressions from map result_expressions = get_result_expressions(result_map, tp) results_for_pipeline = imp.evaluate_expressions(result_expressions, tp) result_value_array.append(results_for_pipeline) # Now transpose, then sum up results from pipelines per row result_values = [] transposed_results = zip(*result_value_array) sum_results = get_result_sum(result_map) for (row_values, sum_it) in zip(transposed_results, sum_results): # Sum up baseline dependency unless in detailed mode if not detailed and all([isinstance(vals, list) for vals in row_values]): if sum_it: row_values = [ sum(vals) for vals in row_values ] else: row_values = [ [vals[0],"..",vals[-1]] for vals in row_values ] # Then also try to sum up pipeline results, if possible if sum_it: try: result_values.append(sum(row_values)) continue except TypeError: pass if len(row_values) == 1: result_values.append(row_values[0]) else: result_values.append(list(row_values)) return result_values def _batch_compute_results(configs, result_map, parallel=0, verbose=False, detailed=False, quiet=False): """Calculate a whole bunch of pipeline configurations. """ if not quiet: display(HTML('<font color="blue">Calculating %d configurations -- this may take quite a while.</font>' % len(configs))) # Parallelise if requested if parallel > 0 and HAVE_PYMP: configQueue = pymp.shared.list(configs) results = pymp.shared.dict() f = FloatProgress(min=0, max=len(configs)) display(f) with pymp.Parallel(parallel) as p: try: while True: if p.thread_num == 0: f.value = len(configs) - len(configQueue) f.description = "%d/%d" % (f.value, len(configs)) config = configQueue.pop() results[config.describe()] = _batch_compute_results( [config], result_map, verbose=verbose, detailed=detailed,quiet=True) except IndexError: pass return list([res for cfg in configs for res in results[cfg.describe()]]) results = [] for cfg in configs: # Check that the configuration is valid, skip if it isn't (okay, msgs) = cfg.is_valid() if not okay: display(HTML('<p>Skipping %s (%s)</p>' % (cfg.describe(), ", ".join(msgs)))) continue # Compute, add to results if verbose: display(HTML('<p>Calculating %s...</p>' % cfg.describe())) results.append((cfg, _compute_results(cfg, result_map, verbose=verbose, detailed=detailed))) return results def _write_csv(filename, results, rows): """ Writes pipeline calculation results as a CSV file """ with open(filename, 'w') as csvfile: w = csv.writer(csvfile) # Output row with configurations w.writerow([''] + list(map(lambda r: r[0].describe(), results))) # Output actual results for i, row in enumerate(rows): rowTitle = row[0] rowUnit = row[1] if rowUnit != '': rowUnit = ' [' + rowUnit + ']' # Convert lists to dictionaries resultRow = map(lambda r: r[1][i], results) resultRow = list(map(lambda r: dict(enumerate(r)) if isinstance(r,list) else r, resultRow)) # Dictionary? Expand dicts = list(filter(lambda r: isinstance(r, dict), resultRow)) if len(list(dicts)) > 0: # Collect labels labels = set() for d in dicts: labels = labels.union(d.keys()) # Show all of them, properly sorted. Non-dicts # (errors) are simply shoved into the first row. first = True for label in labels: def printRow(r): if isinstance(r, dict): return r.get(label, '') elif first: return r return '' w.writerow([rowTitle + str(label) + rowUnit] + list(map(printRow, resultRow))) first = False else: # Simple write out as-is w.writerow([rowTitle + rowUnit] + resultRow) display(HTML('<font color="blue">Results written to %s.</font>' % filename))
[docs]def read_csv(filename): """ Reads pipeline calculation results from a CSV file as written by _write_csv. """ display(HTML('<font color="blue">Reading %s...</font>' % filename)) with open(filename, 'r') as csvfile: r = csv.reader(csvfile) it = iter(r) # First row must be headings (i.e. the configurations) headings = next(it) results = [] # Data in the rest of them for row in it: resultRow = [] for h, v in zip(headings[1:], row[1:]): resultRow.append((h, v)) results.append((row[0], resultRow)) return results
# Strip modifiers from rows _convert_old = re.compile('(\\d\\d)(\\w)?(DPrep.|ICAL)') _convert_old_max = re.compile('max_([\\w_]+)_(spectral|continuum)') def _strip_modifiers(head, do_it=True): # Pipeline renamings head = head.replace('Fast_Img', Pipelines.FastImg) # Does it match the old HPSO naming scheme? Convert m = _convert_old.match(head) if m: hpso = m.group(1) if m.group(2) is not None: hpso += m.group(2).lower() head = "hpso%s (%s)" % (hpso, m.group(3)) m = _convert_old_max.match(head) if m: band = m.group(1) if band == 'Band5_MID': band = 'mid_band5a_1' pipeline = ('DPrepC' if m.group(2) == 'spectral' else 'DPrepA') head = 'max_%s (%s)' % (band, pipeline) head = head.lower() if do_it: return re.sub('\\[[^\\]]*\\]', '', head).strip(' ') return head
[docs]def lookup_csv(results, column_name, row_name, ignore_units=True, ignore_modifiers=True): """ Lookup a value in a CSV table :param results: CSV table as returned by read_csv :param column_name: Column (pipeline/HPSO name) to look up :param row_name: Row (parameter name) to look up :param ignore_units: Ignore units when matching rows (say, [s]) :param ignore_modifiers: Ignore modifiers when matching columns (say, [blcoal]) :returns: Value if found, None otherwise """ # Strip names row_name = _strip_modifiers(row_name, ignore_units) column_name = _strip_modifiers(column_name, ignore_modifiers) # Lookup table? Short-cut if isinstance(results, dict): row = results.get(row_name) if row is None: return None return row.get(column_name) for row_name2, row in results: # Right row? if row_name != _strip_modifiers(row_name2, ignore_units): continue # Find column for column_name2, val in row: if column_name != _strip_modifiers(column_name2, ignore_modifiers): continue # Found! return val return None
[docs]def strip_csv(csv, ignore_units=True, ignore_modifiers=True): return { _strip_modifiers(row_name, ignore_units) : { _strip_modifiers(column_name, ignore_modifiers) : v for column_name, v in cols } for row_name, cols in csv }
[docs]def compare_csv(result_file, ref_file, ignore_modifiers=True, ignore_units=True, row_threshold=0.01, export_html='', return_diffs=False): """ Read and compare two CSV files with telescope parameters :param result_file: CSV file with telescope parameters :param ref_file: CSV file with reference parameters :param ignore_modifiers: Ignore modifiers when matching columns (say, [blcoal]) :param ignore_units: Ignore units when matching rows (say, [s]) :param export_html: File to write table to. If empty, will be shown inline. :param return_diffs: Whether to also return differences as a dictionary :returns: Sum of differences found in specified rows, if requested """ # Read results and reference. Make lookup dictionary for the latter. results = read_csv(result_file) ref = dict(read_csv(ref_file)) ref = { _strip_modifiers(name, ignore_units): row for (name, row) in ref.items() } # Headings stbl = '<table><tr><th></th><th>Mean</th><th>Min</th><th>Max</th>' for head, _ in results[0][1]: stbl += '<th>%s</th>' % head stbl += '</tr>\n' # Loop through rows all_diffs = [] all_diff_sums = [] for row_name, row in results: # Heading? if row_name.startswith('--'): stbl += '<tr><th colspan="%d" style="text-align:left">%s</th></tr>' % (len(row)+4, row_name) continue else: shead = '<tr><td>%s</td>' % row_name # Accumulate difference? diffs = [] # Locate reference results refRow = ref.get(_strip_modifiers(row_name, ignore_units), []) refRow = map(lambda h_v: (_strip_modifiers(h_v[0], ignore_modifiers), h_v[1]), refRow) refRow = dict(refRow) # Loop through values s = "" for head, val in row: head = _strip_modifiers(head, ignore_modifiers) # Number? try: # Non-null value num = float(val) if num == 0: raise ValueError # Try to get reference as number, too ref_num = None if head in refRow: try: ref_num = float(refRow[head]) except ValueError: ref_num = None # Determine difference diff = None if not (ref_num is None or ref_num == 0): diff = 100*(num-ref_num)/ref_num # Relative difference - use number as # reference for negative changes, as -50% is # about as bad as +200%. diff_rel = max(diff, 100*(ref_num-num)/num) diffs.append(diff) # Output if not diff is None: s += '<td bgcolor="#%2x%2x00">%s&nbsp;<small>(%+d%%)</small></td>' % ( int(min(diff_rel/50*255, 255)), int(255-min(max(0, diff_rel-50)/50*255, 255)), format_result(num), diff) else: s += '<td>%s</td>' % format_result(num) except ValueError: # Get reference as string ref_str = refRow.get(head) # No number, output as is if not ref_str is None: if val == ref_str: if val == '': s += '<td></td>' else: s += '<td bgcolor="#00ff00">%s&nbsp;<small>(same)</small></td>' % val diffs.append(0) else: s += '<td bgcolor="#ffff00">%s&nbsp;<small>(!= %s)</small></td>' % (val, ref_str) diffs.append(100) else: s += '<td>%s</td>' % val all_diffs.append((row_name, diffs)) s += '</tr>\n' if len(diffs) != 0: sdiff = '<td>%+.3g%%</td><td>%+.3g%%</td><td>%+.3g%%</td>' % ( np.mean(diffs), np.min(diffs), np.max(diffs)) all_diff_sums.append((row_name, np.mean(diffs), np.min(diffs), np.max(diffs))) else: sdiff = '<td colspan=3></td>' if len(diffs) == 0 or np.max(np.abs(diffs)) >= row_threshold: stbl += shead + sdiff + s stbl += '</table>' # Write HTML report to file - or display if export_html != '': f = open(export_html, 'w') print("<!doctype html>", file=f) print("<html>", file=f) print(" <title>SDP Parametric Model Result Comparison</title>", file=f) print(" <body><p>Comparing %s against %s</p>" % (result_file, ref_file), file=f) print(stbl, file=f) print(" </body>", file=f) print("</html>", file=f) f.close() display(FileLink(export_html)) else: display(HTML('<h3>Comparison:</h3>')) display(HTML(stbl)) if return_diffs: return all_diff_sums
[docs]def find_csvs(csv_path = "../data/csv"): """ Returns a map of all CSV files currently checked into the Git repository. :returns: A dictionary of (rev, hpsos/pipelines) pairs to file names """ # Get all reference files from Git history refs = subprocess.check_output(["git", "log", "--pretty=format:", "--name-only", "--reverse", csv_path]).split() refs = list(map(lambda r: os.path.relpath(r.decode(), "notebooks"), reversed(refs))) csv_map = {} p = re.compile("[\\d\\-]*\\-([a-z0-9]+)_(pipelines|hpsos)\\.") for ref in refs: if not os.path.isfile(ref): continue m = p.match(os.path.basename(ref)) if m and m.group(1) != "costing": csv_map[(m.group(1), m.group(2))] = ref return csv_map
[docs]def newest_csv(csv_map, typ = 'hpsos', rev = 'HEAD', ignore_rev = False): """ Finds the CSV closest to the given revision according to the Git history :param csv_map: CSV map, see find_csvs :param typ: Type of CSV to look for (hpsos/pipelines) :param rev: Git revision to start from :param ignore_rev: Don't return "rev" itself, begin search with parents :return: CSV file name if found, None otherwise """ # Obtain list of all ancestors, sorted hierarchically and by date. # This means that we go by graph order for commits that appear in # the same history, but by date if commits appear in separate # branches. ancestors = subprocess.check_output( ["git", "log", "--date-order", "--format=%h", rev]).decode().split() # Find first ancestor that has a CSV for ancestor in (ancestors[1:] if ignore_rev else ancestors): if (ancestor, typ) in csv_map: return csv_map[(ancestor, typ)] return None
[docs]def stack_bars_pipelines(title, telescopes, bands, pipelines, adjusts={}, parallel=0, save=None): """ Evaluates all valid configurations of this telescope and shows results as stacked bars. """ # Make configurations configs = _pipeline_configurations(telescopes, bands, pipelines, adjusts) # Calculate rows = [RESULT_MAP[-1]] # Products only results = _batch_compute_results(configs, rows, parallel) products = list(map(lambda r: r[1][-1], results)) labels = sorted(set().union(*list(map(lambda p: p.keys(), products)))) colours = default_rflop_plotting_colours(labels) tel_labels = list(map(lambda cfg: cfg.describe().replace(" ", "\n"), configs)) values = { label: list(map(lambda p: p.get(label, 0), products)) for label in labels } # Show stacked bar graph plot_stacked_bars(title, tel_labels, labels, values, colours, width=0.7, save=save)
[docs]def stack_bars_hpsos(title, hpsos, adjusts={}, parallel=0, save=None): """ Evaluates all valid configurations of this telescope and dumps the result as a CSV file. """ # Make configuration list configs = [] for hpso in hpsos: for hpso_pipe in HPSOs.hpso_pipelines[hpso]: cfg = PipelineConfig(hpso=hpso, hpso_pipe=hpso_pipe, adjusts=adjusts) configs.append(cfg) rows = [RESULT_MAP[-1]] # Products only results = _batch_compute_results(configs, rows, parallel) products = list(map(lambda r: r[1][-1], results)) labels = sorted(set().union(*list(map(lambda p: p.keys(), products)))) colours = default_rflop_plotting_colours(labels) tel_labels = list(map(lambda cfg: cfg.describe(), configs)) values = { label: list(map(lambda p: p.get(label, 0), products)) for label in labels } # Show stacked bar graph plot_stacked_bars(title, tel_labels, labels, values, colours, width=0.7, xticks_rot='vertical', save=save)