SKA PST STAT Architecture

Classes

The following diagram below shows the classes involved in the core software architecture of the SKA PST STAT component.

@startuml PST Stat Class Diagram
allow_mixing

package ska::pst::common
{
class ApplicationManager <<abstract>>
class FileSegmentProducer
}

package ska::pst::smrb
{
class SmrbSegmentProducer
}

class StatApplicationManager {
+ StatApplicationManager()
+ ~StatApplicationManager()
+ perform_configure_beam(config: AsciiHeader)
+ perform_configure_scan(config: AsciiHeader)
+ perform_start_scan()
+ perform_scan()
+ perform_end_scan()
+ perform_deconfigure_scan()
+ perform_deconfigure_beam()
+ set_timeout()
- processor: std::unique_ptr<StatProcessor>
- producer: std::shared_ptr<SmrbSegmentProducer>
}

class FileProcessor {
+ FileProcessor(config: AsciiHeader)
+ ~FileProcessor()
+ process()
- processor: std::shared_ptr<StatProcessor>
- reader: std::unique_ptr<FileSegmentProducer>
}

class StatProcessor {
+ StatProcessor(data_config: AsciiHeader, weights_config: AsciiHeader)
+ ~StatProcessor()
+ add_publisher(std::unique_ptr<StatPublisher>)
+ bool validate_segment(segment: SegmentProducer::Segment)
+ bool process(segment: SegmentProducer::Segment)
+ interrupt()
- storage: std::shared_ptr<StatStorage>
- computer: std::unique_ptr<StatComputer>
- publishers: std::vector<std::unique_ptr<StatPublisher>>
- config: AsciiHeader
}

class StatComputer {
+ StatComputer(data_config: AsciiHeader, weights_config: AsciiHeader, storage: StatStorage)
+ ~StatComputer()
+ bool compute(segment: SegmentProducer::Segment)
+ initialise()
+ interrupt()
- storage: std::shared_ptr<StatStorage>
- config: AsciiHeader
}

class StatStorage {
+ StatStorage(config: AsciiHeader)
+ resize(uint32_t ntime_bins, uint32_t nfreq_bins)
+ reset()
+ uint32_t get_npol() const
+ uint32_t get_ndim() const
+ uint32_t get_nchan() const
+ uint32_t get_nbin() const
+ uint32_t get_nrebin() const
+ uint32_t get_ntime_bins() const
+ uint32_t get_nfreq_bins() const
+ uint32_t get_ntime_vals() const
+ double get_total_sample_time() const
+ void set_total_sample_time(double)
+ bool is_storage_resized() const
+ bool is_storage_reset() const
+ mean_frequency_avg: std::vector<std::vector<float>>
+ mean_frequency_avg_rfi_excised: std::vector<std::vector<float>>
+ variance_frequency_avg: std::vector<std::vector<float>>
+ variance_frequency_avg_rfi_excised: std::vector<std::vector<float>>
+ mean_spectrum: std::vector<std::vector<std::vector<float>>>
+ variance_spectrum: std::vector<std::vector<std::vector<float>>>
+ mean_spectral_power: std::vector<std::vector<float>>
+ maximum_spectral_power: std::vector<std::vector<float>>
+ histogram_1d_freq_avg: std::vector<std::vector<std::vector<uint32_t>>>
+ histogram_1d_freq_avg_rfi_excised: std::vector<std::vector<std::vector<uint32_t>>>
+ rebinned_histogram_2d_freq_avg: std::vector<std::vector<std::vector<uint32_t>>>
+ rebinned_histogram_2d_freq_avg_rfi_excised: std::vector<std::vector<std::vector<uint32_t>>>
+ rebinned_histogram_1d_freq_avg: std::vector<std::vector<std::vector<uint32_t>>>
+ rebinned_histogram_1d_freq_avg_rfi_excised: std::vector<std::vector<std::vector<uint32_t>>>
+ num_clipped_samples_spectrum: std::vector<std::vector<std::vector<uint32_t>>>
+ num_clipped_samples: std::vector<std::vector<uint32_t>>
+ num_clipped_samples_rfi_excised: std::vector<std::vector<uint32_t>>
+ spectrogram: std::vector<std::vector<std::vector<float>>>
+ timeseries: std::vector<std::vector<std::vector<float>>>
+ timeseries_rfi_excised: std::vector<std::vector<std::vector<float>>>
+ rfi_mask_lut: std::vector<bool>
+ scalar_stats_t : struct
}

class StatPublisher <<abstract>> {
# config: AsciiHeader
+ StatPublisher(config: AsciiHeader)
+ ~StatPublisher()
{abstract} + publish(std::shared_ptr<StatStorage>)
}

class ScalarStatPublisher implements StatPublisher {
+ ScalarStatPublisher(config: AsciiHeader)
+ ~ScalarStatPublisher()
+ publish(std::shared_ptr<StatStorage>)
+ reset()
+ scalar_stats_t get_scalar_stats();
}

class StatHdf5FileWriter implements StatPublisher {
+ StatHdf5FileWriter(config: AsciiHeader)
+ ~StatHdf5FileWriter()
+ publish(std::shared_ptr<StatStorage>)
}

StatProcessor *-- StatComputer
StatProcessor *-- StatPublisher
StatProcessor o-- StatStorage
StatComputer o-- StatStorage
StatPublisher o-- StatStorage

ApplicationManager <|-- StatApplicationManager
StatApplicationManager o-- SmrbSegmentProducer
StatApplicationManager *-- StatProcessor

FileProcessor *-- StatProcessor
FileProcessor o- FileSegmentProducer

component ska_pst_stat_core
ska_pst_stat_core -- "uses" StatApplicationManager
component ska_pst_stat_file_processor
ska_pst_stat_file_processor -- "uses" FileProcessor

@enduml — Class diagram showing main classes involved

StatProcessor

This is the core class to handle the processing of voltage data. It has been designed to work on data that is either coming from shared memory ring buffers during a scan or via memory mapped (mmap) files.

Applications, such as ska_pst_stat_core or ska_pst_stat_file_processor, that perform statistical calculations will use this class directly rather than performing their own orchestration.

During instantiation, this class will create a StatStorage instance with the correct sizes based on configuration. It creates instances of StatComputer and StatHdf5FileWriter passing along a shared pointer to the StatStorage instance.

This is not threadsafe, calls to the process method should ensure that the calls to it are threadsafe.

The StatProcessor asserts that there is data at least the length of one RESOLUTION bytes (i.e. NPOL * NDIM * NBITS * NCHAN * UDP_NSAMP / 8). If there is a fractional amount it will only calculate the statistics of an integer multiple of RESOLUTION bytes.

StatComputer

This class is the main class for performing the statistical computations.

This class is designed to be re-used between different blocks of data perform a calculation and updates the StatStorage struct.

See the StatHdf5FileWriter section for the output statistics that are calculated.

StatHdf5FileWriter

A utility class used for writing out the computed statistics to a file system. Instances of this class are passed a shared StatsStorage and the output path of where to write statistics to. Every call to write will serialise the StatStorage to a new HDF5 file.

HDF5 was chosen given it is an open standard, rather than creating new structured file format.

HDF5 Data Structure

The output HDF5 file includes a HEADER section and each computed data structures is a separate HDF5 dataset.

The header of the HDF5 file includes the following fields:

EB_ID - the execution block that the output statistics file is for.

TELESCOPE - the telescope that was used to capture the data (i.e SKALow or SKAMid)

SCAN_ID - the scan that the output statistics file is for.

BEAM_ID - the beam used to capture the voltage data used in computing the statistics.

UTC_START - the start time of the observation as an ISO 8601 string.

T_MIN - the fractional offset of a second from UTC_START.

T_MAX - the difference between T_MAX and T_MIN is the length of time, in seconds, of the voltage timeseries for which statistics are computed

FREQ - the centre frequency, in MHz, for the voltage data.

BW - the bandwidth of data, in MHz, for the voltage data.

START_CHAN - the starting channel number for the voltage data. This allows subbands of data to be processed.

NPOL - the number of polarisations of the voltage data. For SKA this is always 2.

NDIM - the number of dimensions of the voltage data. For SKA this is 2 as the system is recording complex voltage data.

NCHAN - the number of channels that is in the voltage data.

NCHAN_DS - the number of frequency bins used in the spectrogram output.

NDAT_DS - the number of temporal bins used in the spectrogram and timeseries outputs.

NBIN_HIST - the number of bins that are used in the 1-dimensional histogram.

NREBIN - the number of channel bins used in the re-binned histograms.

CHAN_FREQ - an array of centre frequency for each of the channels.

FREQUENCY_BINS - the centre frequency, in MHz, for each frequency bin used in the spectrogram.

TIMESERIES_BINS - the observation offset, measured in seconds, for each of the tempral bins used in timeseries and spectrograms

NUM_SAMPLES - number of samples used for calculating statistics

NUM_SAMPLES_RFI_EXCISED - number of samples used for calculating RFI excised statistics

NUM_SAMPLES_SPECTRUM - number of samples per channel for calculating channel statistics

NUM_INVALID_PACKETS - number of invalid packets received while calculating statistics

The output data of the HDF5 includes the following datasets:

FILE_FORMAT_VERSION - this is used to define the format of the file and is used within the Python data access library to be able to process a file even if there are future changes to the format

MEAN_FREQUENCY_AVG - the mean of the data for each polarisation and dimension, averaged over all channels.

MEAN_FREQUENCY_AVG_RFI_EXCISED - the mean of the data for each polarisation and dimension, averaged over all channels, expect those flagged for RFI.

VARIANCE_FREQUENCY_AVG - the variance of the data for each polarisation and dimension, averaged over all channels.

VARIANCE_FREQUENCY_AVG_RFI_EXCISED - the variance of the data for each polarisation and dimension, averaged over all channels, expect those flagged for RFI.

MEAN_SPECTRUM - the mean of the data for each polarisation, dimension and channel.

VARIANCE_SPECTRUM - the variance of the data for each polarisation, dimension and channel.

MEAN_SPECTRAL_POWER - mean power of the data for each polarisation and channel.

MAX_SPECTRAL_POWER - maximum power of the data for each polarisation and channel.

HISTOGRAM_1D_FREQ_AVG - histogram of the input data integer states for each polarisation and dimension, averaged over all channels.

HISTOGRAM_1D_FREQ_AVG_RFI_EXCISED - histogram of the input data integer states for each polarisation and dimension, averaged over all channels, expect those flagged for RFI.

HISTOGRAM_REBINNED_2D_FREQ_AVG - rebinned 2D histogram of the input data integer states for each polarisation, averaged over all channels.

HISTOGRAM_REBINNED_2D_FREQ_AVG_RFI_EXCISED - rebinned 2D histogram of the input data integer states for each polarisation, averaged over all channels, expect those flagged for RFI.

HISTOGRAM_REBINNED_1D_FREQ_AVG - rebinned histogram of the input data integer states for each polarisation and dimension, averaged over all channels

HISTOGRAM_REBINNED_1D_FREQ_AVG_RFI_EXCISED - rebinned histogram of the input data integer states for each polarisation and dimension, averaged over all channels, expect those flagged for RFI.

NUM_CLIPPED_SAMPLES_SPECTRUM - number of clipped input samples (maximum level) for each polarisation, dimension and channel.

NUM_CLIPPED_SAMPLES - number of clipped input samples (maximum level) for each polarisation, dimension, averaged over all channels

NUM_CLIPPED_SAMPLES_RFI_EXCISED - number of clipped input samples (maximum level) for each polarisation, dimension, averaged over all channels, expect those flagged for RFI.

SPECTROGRAM - spectrogram of the data for each polarisation, rebinned in frequency to NCHAN_DS bins and in time to NDAT_DS bins.

TIMESERIES - time series of the data for each polarisation, rebinned in time to NDAT_DS bins, averaged over all frequency channels. This includes max, min, and mean of the power in each bin.

TIMESERIES_RFI_EXCISED - time series of the data for each polarisation, rebinned in time to NDAT_DS bins, averaged over all frequency channels, expect those flagged by RFI. This includes max, min, and mean of the power in each bin.

StatStorage

This class provides an abstraction to all of the storage required to hold the statistics products computed by the StatComputer. The class will be constructed with configuration parameters stored in a ska::pst::common::AsciiHeader with the following required parameters:

NPOL Number of polarisations in the input data stream (will always be 2).

NDIM Number of dimensions of each time sample (will always be 2).

NCHAN Number of channels in the input data stream.

NBIT Number of bits per sample in the input data stream.

NREBIN Number of bins in the re-binned input state histograms.

The class provides public methods to resize the storage and to reset all the values of the storage to zero. As documented in the StatStorage Class API, the class exposes all of the storage fields as 1, 2 or 3-dimension std::vector attributes of the appropriate types.

StatApplicationManager

This class is an implementation of the ska::pst::common::ApplicationManager class and is used by the ska_pst_stat_core process to manage the lifecycle of configuring the system and performing a scan.

When the application is in a ScanConfigured state this class will have created an instance of the StatProcessor class which will be used during a scan to perform the actual calculation of the statistics and writing the outputs to a file.

FileProcessor

This class is used by the ska_pst_stat_file_processor application to process a specific set of data and weights files. When the application runs it will read a config file into a ska::pst::common::AsciiHeader that is passed into the constructor of this class. When an instance of this class is created it will create an instance of a ska::pst::common::FileSegmentProducer, a StatProcessor, and a StatPublisher (specifically the StatHdf5FileWriter).

Sequences

Processing of a block of data

@startuml PST STAT Processor
actor Client
Client -> StatProcessor: processor = StatProcessor(config)
activate StatProcessor
activate StatComputer
StatProcessor -> StatStorage: storage = StatStorage()
activate StatStorage
StatProcessor -> StatComputer: computer = StatComputer(config, storage)
Client <-- StatProcessor
Client -> StatPublisher : publisher = StatPublisher(config)
activate StatPublisher
Client <-- StatPublisher
Client -> StatProcessor: append(publisher)
loop while has data
Client -> StatProcessor: process(segment)
StatProcessor -> StatStorage: reset()
StatProcessor -> StatComputer: initialise()
StatProcessor -> StatComputer: process(segment)
StatComputer --> StatStorage: updates
StatProcessor <-- StatComputer
StatProcessor -> StatPublisher: publish(storage)
StatProcessor <-- StatPublisher
Client <-- StatProcessor
end
Client --> StatProcessor: drop
deactivate StatProcessor
deactivate StatComputer
deactivate StatPublisher
deactivate StatStorage
@enduml — Sequence diagram for processing statistics with the StatProcessor class, common to both StatApplicationManager and FileProcessor sequences

Processing data during a scan

@startuml PST STAT Client using SmrbSegmentProducer sequence
actor Client
Client -> StatApplicationManager: configure beam
activate StatApplicationManager
StatApplicationManager -> SmrbSegmentProducer: SmrbSegmentProducer(data_key, weights_key, viewer)
activate SmrbSegmentProducer
Client -> StatApplicationManager: configure scan
StatApplicationManager -> SmrbSegmentProducer: connect
StatApplicationManager <-- SmrbSegmentProducer: connected
StatApplicationManager -> StatProcessor: StatProcessor(config)
activate StatProcessor
StatApplicationManager <-- StatProcessor: configured
Client <-- StatApplicationManager: scan configured
group scan
Client -> StatApplicationManager: start scan
StatApplicationManager -> StatApplicationManager: start background processing
loop in background while scanning
StatApplicationManager -> SmrbSegmentProducer: next_segment()
StatApplicationManager <-- SmrbSegmentProducer: return segment
StatApplicationManager -> StatProcessor: process(segment)
StatApplicationManager <-- StatProcessor
StatApplicationManager -> StatApplicationManager: wait stat interval
end
Client -> StatApplicationManager: end scan
StatApplicationManager -> StatProcessor: interrupt
StatApplicationManager -> StatApplicationManager: stop background processing
end
Client -> StatApplicationManager: deconfigure scan
StatApplicationManager --> StatProcessor: drop
deactivate StatProcessor
Client <-- StatApplicationManager: scan deconfigured
Client -> StatApplicationManager: deconfigure beam
StatApplicationManager -> SmrbSegmentProducer: disconnect
deactivate SmrbSegmentProducer
Client <-- StatApplicationManager: beam deconfigured
@enduml — Sequence diagram for processing statistics during a scan with the StatApplicationManager class

Processing files after a scan

@startuml PST STAT DADA file processor
actor Client
Client -> FileProcessor: create(config)
activate FileProcessor
FileProcessor -> StatProcessor: StatProcessor(config)
activate StatProcessor
Client -> FileProcessor: process(data_file_path, weights_file_path)
FileProcessor -> FileSegmentProducer: FileSegmentProducer(file)
activate FileSegmentProducer
FileSegmentProducer -> "Data File": mmap
activate "Data File"
FileSegmentProducer -> "Weights File": mmap
activate "Weights File"

loop for configured number of blocks or EOF
FileProcessor -> FileSegmentProducer: next_segment()
FileSegmentProducer --> "Data File": next_block
FileSegmentProducer --> "Weights File": next_block
FileProcessor <-- FileSegmentProducer: return segment
FileProcessor -> StatProcessor: process(segment)
FileProcessor <-- StatProcessor
end

FileProcessor --> FileSegmentProducer: drop
FileSegmentProducer --> "Data File": release
deactivate "Data File"
FileSegmentProducer --> "Weights File": release
deactivate "Weights File"
deactivate FileSegmentProducer

' FileProcessor -> FileSegmentProducer: create
' activate FileSegmentProducer
' FileSegmentProducer -> File: mmap
' activate File
' loop while data present
' FileProcessor -> FileSegmentProducer: read next
' FileProcessor <-- FileSegmentProducer: return next block ptr or null
' alt data present
' FileProcessor -> StatProcessor: process(segment)
' else no data preset
' Client <-- FileProcessor: process complete
' end
' end

@enduml — Sequence diagram for processing statistics from a file using the FileProcessor class