# -*- coding: utf-8 -*-
#
# This file is part of the SKA PST project
#
# Distributed under the terms of the BSD 3-clause new license.
# See LICENSE for more info.
"""This module defines the model classes when processing HDF5 STAT data."""
from __future__ import annotations
__all__ = [
"StatisticsData",
"StatisticsMetadata",
"HDF5_HEADER_TYPE_V1_0_0",
"map_hdf5_key",
]
from dataclasses import dataclass
from typing import Dict, List, Literal
import h5py
import nptyping as npt
import numpy as np
from ska_pst.stat.hdf5.consts import (
FILE_FORMAT_VERSION_1_0_0,
FILE_FORMAT_VERSION_1_1_0,
HDF5_BEAM_ID,
HDF5_BW,
HDF5_CHAN_FREQ,
HDF5_EB_ID,
HDF5_FREQ,
HDF5_FREQUENCY_BINS,
HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG,
HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG_RFI_EXCISED,
HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG,
HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG_RFI_EXCISED,
HDF5_NBIN_HIST,
HDF5_NCHAN,
HDF5_NCHAN_DS,
HDF5_NDAT_DS,
HDF5_NDIM,
HDF5_NPOL,
HDF5_NREBIN,
HDF5_NUM_INVALID_PACKETS,
HDF5_NUM_SAMPLES,
HDF5_NUM_SAMPLES_RFI_EXCISED,
HDF5_NUM_SAMPLES_SPECTRUM,
HDF5_NUM_WEIGHT_SAMPLES,
HDF5_POLARISATIONS,
HDF5_SCAN_ID,
HDF5_START_CHAN,
HDF5_T_MAX,
HDF5_T_MIN,
HDF5_TELESCOPE,
HDF5_TIMESERIES_BINS,
HDF5_UTC_START,
VALID_FILE_FORMAT_VERSIONS,
Polarisation,
)
KEY_MAP: Dict[str, str] = {
HDF5_BW: "bandwidth_mhz",
HDF5_FREQ: "frequency_mhz",
HDF5_NBIN_HIST: "histogram_nbin",
HDF5_CHAN_FREQ: "channel_freq_mhz",
HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG: "rebinned_histogram_1d_freq_avg",
HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG_RFI_EXCISED: "rebinned_histogram_1d_freq_avg_rfi_excised",
HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG: "rebinned_histogram_2d_freq_avg",
HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG_RFI_EXCISED: "rebinned_histogram_2d_freq_avg_rfi_excised",
}
[docs]def map_hdf5_key(hdf5_key: str) -> str:
"""Map a key from a HDF5 attribute/dataset to a model dataclass property."""
try:
return KEY_MAP[hdf5_key]
except KeyError:
return hdf5_key.lower()
string_dt = h5py.string_dtype(encoding="utf-8")
uint32_dt = np.uint32
uint32_array_dt = h5py.vlen_dtype(uint32_dt)
uint64_dt = np.uint64
float_dt = np.float32
double_dt = np.float64
double_array_dt = h5py.vlen_dtype(double_dt)
_BASE_HDF5_HEADER_TYPE = [
(HDF5_EB_ID, string_dt),
(HDF5_TELESCOPE, string_dt),
(HDF5_SCAN_ID, uint64_dt),
(HDF5_BEAM_ID, string_dt),
(HDF5_UTC_START, string_dt),
(HDF5_T_MIN, double_dt),
(HDF5_T_MAX, double_dt),
(HDF5_FREQ, double_dt),
(HDF5_BW, double_dt),
(HDF5_START_CHAN, uint32_dt),
(HDF5_NPOL, uint32_dt),
(HDF5_NDIM, uint32_dt),
(HDF5_NCHAN, uint32_dt),
(HDF5_NCHAN_DS, uint32_dt),
(HDF5_NDAT_DS, uint32_dt),
(HDF5_NBIN_HIST, uint32_dt),
(HDF5_NREBIN, uint32_dt),
(HDF5_CHAN_FREQ, double_array_dt),
(HDF5_FREQUENCY_BINS, double_array_dt),
(HDF5_TIMESERIES_BINS, double_array_dt),
(HDF5_NUM_SAMPLES, uint32_dt),
(HDF5_NUM_SAMPLES_RFI_EXCISED, uint32_dt),
(HDF5_NUM_SAMPLES_SPECTRUM, uint32_array_dt),
(HDF5_NUM_INVALID_PACKETS, uint32_dt),
]
HDF5_HEADER_TYPE_V1_0_0 = np.dtype(_BASE_HDF5_HEADER_TYPE)
HDF5_HEADER_TYPE_V1_1_0 = np.dtype(
[
*_BASE_HDF5_HEADER_TYPE,
(HDF5_NUM_WEIGHT_SAMPLES, uint32_dt),
(HDF5_POLARISATIONS, string_dt),
]
)
[docs]@dataclass(kw_only=True, frozen=True)
class StatisticsData:
"""A data class that represents the statistics loaded from the HDF5 file."""
# pylint: disable=line-too-long
mean_frequency_avg: npt.NDArray[Literal["NPol, NDim"], npt.Float32]
"The mean of the data for each polarisation and dimension, averaged over all channels."
mean_frequency_avg_rfi_excised: npt.NDArray[Literal["NPol, NDim"], npt.Float32]
"The mean of the data for each polarisation and dimension, averaged over all channels, expect those flagged for RFI." # noqa: E501
variance_frequency_avg: npt.NDArray[Literal["NPol, NDim"], npt.Float32]
"The variance of the data for each polarisation and dimension, averaged over all channels."
variance_frequency_avg_rfi_excised: npt.NDArray[Literal["NPol, NDim"], npt.Float32]
"The variance of the data for each polarisation and dimension, averaged over all channels, expect those flagged for RFI." # noqa: E501
mean_spectrum: npt.NDArray[Literal["NPol, NDim, NChan"], npt.Float32]
"The mean of the data for each polarisation, dimension and channel."
variance_spectrum: npt.NDArray[Literal["NPol, NDim, NChan"], npt.Float32]
"The variance of the data for each polarisation, dimension and channel."
mean_spectral_power: npt.NDArray[Literal["NPol, NChan"], npt.Float32]
"Mean power spectra of the data for each polarisation and channel."
max_spectral_power: npt.NDArray[Literal["NPol, NChan"], npt.Float32]
"Maximum power spectra of the data for each polarisation and channel."
histogram_1d_freq_avg: npt.NDArray[Literal["NPol, NDim, NBin"], npt.UInt32]
"Histogram of the input data integer states for each polarisation and dimension, averaged over all channels." # noqa: E501
histogram_1d_freq_avg_rfi_excised: npt.NDArray[Literal["NPol, NDim, NBin"], npt.UInt32]
"Histogram of the input data integer states for each polarisation and dimension, averaged over all channels, expect those flagged for RFI." # noqa: E501
rebinned_histogram_2d_freq_avg: npt.NDArray[Literal["NPol, NRebin, NRebin"], npt.UInt32]
"Rebinned 2D histogram of the input data integer states for each polarisation, averaged over all channels." # noqa: E501
rebinned_histogram_2d_freq_avg_rfi_excised: npt.NDArray[Literal["NPol, NRebin, NRebin"], npt.UInt32]
"Rebinned 2D histogram of the input data integer states for each polarisation, averaged over all channels, expect those flagged for RFI." # noqa: E501
rebinned_histogram_1d_freq_avg: npt.NDArray[Literal["NPol, NDim, NRebin"], npt.UInt32]
"Rebinned histogram of the input data integer states for each polarisation and dimension, averaged over all channels." # noqa: E501
rebinned_histogram_1d_freq_avg_rfi_excised: npt.NDArray[Literal["NPol, NDim, NRebin"], npt.UInt32]
"Rebinned histogram of the input data integer states for each polarisation and dimension, averaged over all channels, expect those flagged for RFI." # noqa: E501
num_clipped_samples_spectrum: npt.NDArray[Literal["NPol, NDim, NChan"], npt.UInt32]
"Number of clipped input samples (maximum level) for each polarisation, dimension and channel."
num_clipped_samples: npt.NDArray[Literal["NPol, NDim"], npt.UInt32]
"Number of clipped input samples (maximum level) for each polarisation, dimension, averaged over all channels." # noqa: E501
num_clipped_samples_rfi_excised: npt.NDArray[Literal["NPol, NDim"], npt.UInt32]
"Number of clipped input samples (maximum level) for each polarisation, dimension, averaged over all channels, except those flagged for RFI." # noqa: E501
spectrogram: npt.NDArray[Literal["NPol, NFreqBin, NTimeBin"], npt.Float32]
"Spectrogram of the data for each polarisation, averaged a configurable number of temporal and spectral bins (default ~1000)." # noqa: E501
timeseries: npt.NDArray[Literal["NPol, NTimeBin, 3"], npt.Float32]
"Time series of the data for each polarisation, rebinned in time to ntime_bins, averaged over all frequency channels." # noqa: E501
timeseries_rfi_excised: npt.NDArray[Literal["NPol, NTimeBin, 3"], npt.Float32]
"Time series of the data for each polarisation, re-binned in time."
min_weights: npt.NDArray[Literal["NChan"], npt.Float32]
"The minimum of the weights for each channel."
max_weights: npt.NDArray[Literal["NChan"], npt.Float32]
"The maximum of the weights for each channel."
mean_weights: npt.NDArray[Literal["NChan"], npt.Float32]
"The mean of the weights for each channel."
# pylint: enable=line-too-long