Source code for ska_pst.stat.stats

# -*- coding: utf-8 -*-
#
# This file is part of the SKA PST project
#
# Distributed under the terms of the BSD 3-clause new license.
# See LICENSE for more info.
"""This module with the Statistics model class."""

from __future__ import annotations

import logging
import pathlib
from dataclasses import dataclass
from typing import List, Literal, cast

import h5py
import nptyping as npt
import numpy as np
import pandas as pd
from ska_pst.stat.hdf5 import (
    Dimension,
    Polarisation,
    StatisticsData,
    StatisticsMetadata,
    TimeseriesDimension,
    get_stat_file_format,
)
from ska_pst.stat.hdf5.consts import (
    HDF5_BEAM_ID,
    HDF5_BW,
    HDF5_CHAN_FREQ,
    HDF5_EB_ID,
    HDF5_FILE_FORMAT_VERSION,
    HDF5_FREQ,
    HDF5_FREQUENCY_BINS,
    HDF5_HEADER,
    HDF5_HISTOGRAM_1D_FREQ_AVG,
    HDF5_HISTOGRAM_1D_FREQ_AVG_RFI_EXCISED,
    HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG,
    HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG_RFI_EXCISED,
    HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG,
    HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG_RFI_EXCISED,
    HDF5_MAX_SPECTRAL_POWER,
    HDF5_MAX_WEIGHTS,
    HDF5_MEAN_FREQUENCY_AVG,
    HDF5_MEAN_FREQUENCY_AVG_RFI_EXCISED,
    HDF5_MEAN_SPECTRAL_POWER,
    HDF5_MEAN_SPECTRUM,
    HDF5_MEAN_WEIGHTS,
    HDF5_MIN_WEIGHTS,
    HDF5_NBIN_HIST,
    HDF5_NCHAN,
    HDF5_NCHAN_DS,
    HDF5_NDAT_DS,
    HDF5_NDIM,
    HDF5_NPOL,
    HDF5_NREBIN,
    HDF5_NUM_CLIPPED_SAMPLES,
    HDF5_NUM_CLIPPED_SAMPLES_RFI_EXCISED,
    HDF5_NUM_CLIPPED_SAMPLES_SPECTRUM,
    HDF5_NUM_INVALID_PACKETS,
    HDF5_NUM_SAMPLES,
    HDF5_NUM_SAMPLES_RFI_EXCISED,
    HDF5_NUM_SAMPLES_SPECTRUM,
    HDF5_NUM_WEIGHT_SAMPLES,
    HDF5_POLARISATIONS,
    HDF5_SCAN_ID,
    HDF5_SPECTROGRAM,
    HDF5_START_CHAN,
    HDF5_T_MAX,
    HDF5_T_MIN,
    HDF5_TELESCOPE,
    HDF5_TIMESERIES,
    HDF5_TIMESERIES_BINS,
    HDF5_TIMESERIES_RFI_EXCISED,
    HDF5_UTC_START,
    HDF5_VARIANCE_FREQUENCY_AVG,
    HDF5_VARIANCE_FREQUENCY_AVG_RFI_EXCISED,
    HDF5_VARIANCE_SPECTRUM,
)

# The following are used as headers within Pandas data frames
POLARISATION: str = "Polarisation"
DIMENSION: str = "Dimension"
CHANNEL: str = "Channel"
TEMPORAL_BIN: str = "Temporal bin"
BIN: str = "Bin"
BIN_COUNT: str = "Count"
RFI_EXCISED: str = "RFI Excised"
MEAN: str = "Mean"
MIN: str = "Min"
MAX: str = "Max"
VARIANCE: str = "Var."
CLIPPED: str = "Clipped"
CHANNEL_FREQ_MHZ: str = "Channel Freq (MHz)"
TIME_OFFSET: str = "Time offset"

_logger: logging.Logger = logging.getLogger(__name__)


[docs]@dataclass(kw_only=True, frozen=True)
class Statistics:
    """
    Data class used to abstract over HDF5 file.

    Instances of this should be created by passing the location of a STAT
    file to the :py:meth:`load_from_file` method.
    """

    metadata: StatisticsMetadata
    data: StatisticsData

[docs]    @staticmethod
    def load_from_file(file_path: pathlib.Path | str) -> Statistics:
        """
        Load a HDF5 STAT file and return an instance of the Statistics class.

        :param file_path: the path to the file to load the statistics from
        :type file_path: pathlib.Path | str
        :return: the statistics from the HDF5 file as a Python class
        :rtype: Statistics
        """
        file_path = pathlib.Path(file_path)
        assert file_path.exists(), f"Expected {file_path} to exist."

        with h5py.File(file_path, "r") as f:
            # we only have a size of 1 for header
            file_format_version_bytes = cast(bytes, f[HDF5_FILE_FORMAT_VERSION][()])
            file_format_version: str = file_format_version_bytes.decode("utf-8")  # pylint: disable=E1101
            hdf5_header: h5py.Dataset = f[HDF5_HEADER][0]

            stat_file_format = get_stat_file_format(file_format_version)
            nchan: int = hdf5_header[HDF5_NCHAN]

            min_weights: np.ndarray = np.zeros(nchan, dtype=np.float32)
            max_weights: np.ndarray = np.zeros(nchan, dtype=np.float32)
            mean_weights: np.ndarray = np.zeros(nchan, dtype=np.float32)
            num_weight_samples: int = 0

            polarisations = "Both"
            if stat_file_format.has_polarisations:
                polarisations = hdf5_header[HDF5_POLARISATIONS].decode("utf-8")  # pylint: disable=E1101

            if stat_file_format.has_weights:
                min_weights = f[HDF5_MIN_WEIGHTS][...]
                max_weights = f[HDF5_MAX_WEIGHTS][...]
                mean_weights = f[HDF5_MEAN_WEIGHTS][...]
                num_weight_samples = hdf5_header[HDF5_NUM_WEIGHT_SAMPLES]

            metadata = StatisticsMetadata(
                file_format_version=file_format_version,  # pylint: disable=E1101
                eb_id=hdf5_header[HDF5_EB_ID].decode("utf-8"),  # pylint: disable=E1101
                telescope=hdf5_header[HDF5_TELESCOPE].decode("utf-8"),  # pylint: disable=E1101
                scan_id=hdf5_header[HDF5_SCAN_ID],
                beam_id=hdf5_header[HDF5_BEAM_ID].decode("utf-8"),  # pylint: disable=E1101
                utc_start=hdf5_header[HDF5_UTC_START].decode("utf-8"),  # pylint: disable=E1101
                t_min=hdf5_header[HDF5_T_MIN],
                t_max=hdf5_header[HDF5_T_MAX],
                frequency_mhz=hdf5_header[HDF5_FREQ],
                bandwidth_mhz=hdf5_header[HDF5_BW],
                start_chan=hdf5_header[HDF5_START_CHAN],
                npol=hdf5_header[HDF5_NPOL],
                ndim=hdf5_header[HDF5_NDIM],
                nchan=hdf5_header[HDF5_NCHAN],
                nchan_ds=hdf5_header[HDF5_NCHAN_DS],
                ndat_ds=hdf5_header[HDF5_NDAT_DS],
                histogram_nbin=hdf5_header[HDF5_NBIN_HIST],
                nrebin=hdf5_header[HDF5_NREBIN],
                channel_freq_mhz=hdf5_header[HDF5_CHAN_FREQ][...],
                timeseries_bins=hdf5_header[HDF5_TIMESERIES_BINS][...],
                frequency_bins=hdf5_header[HDF5_FREQUENCY_BINS][...],
                num_samples=hdf5_header[HDF5_NUM_SAMPLES],
                num_samples_rfi_excised=hdf5_header[HDF5_NUM_SAMPLES_RFI_EXCISED],
                num_samples_spectrum=hdf5_header[HDF5_NUM_SAMPLES_SPECTRUM][...],
                num_invalid_packets=hdf5_header[HDF5_NUM_INVALID_PACKETS],
                num_weight_samples=num_weight_samples,
                has_weights=stat_file_format.has_weights,
                polarisations=polarisations,
            )

            data = StatisticsData(
                mean_frequency_avg=f[HDF5_MEAN_FREQUENCY_AVG][...],
                mean_frequency_avg_rfi_excised=f[HDF5_MEAN_FREQUENCY_AVG_RFI_EXCISED][...],
                variance_frequency_avg=f[HDF5_VARIANCE_FREQUENCY_AVG][...],
                variance_frequency_avg_rfi_excised=f[HDF5_VARIANCE_FREQUENCY_AVG_RFI_EXCISED][...],
                mean_spectrum=f[HDF5_MEAN_SPECTRUM][...],
                variance_spectrum=f[HDF5_VARIANCE_SPECTRUM][...],
                mean_spectral_power=f[HDF5_MEAN_SPECTRAL_POWER][...],
                max_spectral_power=f[HDF5_MAX_SPECTRAL_POWER][...],
                histogram_1d_freq_avg=f[HDF5_HISTOGRAM_1D_FREQ_AVG][...],
                histogram_1d_freq_avg_rfi_excised=f[HDF5_HISTOGRAM_1D_FREQ_AVG_RFI_EXCISED][...],
                rebinned_histogram_2d_freq_avg=f[HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG][...],
                rebinned_histogram_2d_freq_avg_rfi_excised=f[HDF5_HISTOGRAM_REBINNED_2D_FREQ_AVG_RFI_EXCISED][
                    ...
                ],
                rebinned_histogram_1d_freq_avg=f[HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG][...],
                rebinned_histogram_1d_freq_avg_rfi_excised=f[HDF5_HISTOGRAM_REBINNED_1D_FREQ_AVG_RFI_EXCISED][
                    ...
                ],
                num_clipped_samples_spectrum=f[HDF5_NUM_CLIPPED_SAMPLES_SPECTRUM][...],
                num_clipped_samples=f[HDF5_NUM_CLIPPED_SAMPLES][...],
                num_clipped_samples_rfi_excised=f[HDF5_NUM_CLIPPED_SAMPLES_RFI_EXCISED][...],
                spectrogram=f[HDF5_SPECTROGRAM][...],
                timeseries=f[HDF5_TIMESERIES][...],
                timeseries_rfi_excised=f[HDF5_TIMESERIES_RFI_EXCISED][...],
                min_weights=min_weights,
                max_weights=max_weights,
                mean_weights=mean_weights,
            )

            return Statistics(metadata=metadata, data=data)

    @property
    def npol(self: Statistics) -> int:
        """Get the number of polarisations."""
        return self.metadata.npol

    @property
    def polarisations(self: Statistics) -> List[Polarisation]:
        """
        Get a list of the polarisations in the statistics file.

        For DSP.FT it is possible that only 1 polarisation has been selected.
        This property is used as to select the correct polarisations.
        """
        return Polarisation.from_string(self.metadata.polarisations)

    @property
    def ndim(self: Statistics) -> int:
        """
        Get the number of dimensions of voltage data.

        This value should be 2 as SKAO uses complex voltage data and the statistics has real and imaginary
        dimensions.
        """
        return self.metadata.ndim

    @property
    def nchan(self: Statistics) -> int:
        """Get the number of channels for the voltage data."""
        return self.metadata.nchan

    @property
    def channel_numbers(self: Statistics) -> npt.NDArray[Literal["NChan"], npt.Int]:
        """Get an array of channel numbers."""
        return np.arange(self.metadata.start_chan, self.metadata.end_chan + 1)

    @property
    def frequency_bins(self: Statistics) -> npt.NDArray[Literal["NFreqBin"], npt.Float64]:
        """Get the frequency bins used in the spectrogram data."""
        return self.metadata.frequency_bins

    @property
    def timeseries_bins(self: Statistics) -> npt.NDArray[Literal["NTimeBin"], npt.Float64]:
        """Get the timeseries bins used in the spectrogram and timeseries data."""
        return self.metadata.timeseries_bins

    @property
    def header(self: Statistics) -> pd.DataFrame:
        """
        Get the header metadata for the data file.

        This returns a Pandas data frame of the header data from the HDF5 file. This the user of the
        API to see what is in the HEADER dataset without the need of using a HDF5 view tool

        The header has the following fields:

        .. list-table::
            :header-rows: 1

            * - Key
              - Example
              - Description
            * - File Format Version
              - 1.1.0
              - the version of the SKA PST STAT file format that the file is from.
            * - Execution Block ID
              - eb-m001-20230921-245
              - the execution block ID of the generated data file
            * - Telescope
              - SKALow
              - the telescope used for the generated data file (i.e. SKALow or SKAMid)
            * - Scan ID
              - 42
              - the ID of the scan that the file was generated from.
            * - Beam ID
              - 1
              - the PST BEAM ID that was used for the scan
            * - UTC Start Time
              - 2023-10-23-11:00:00
              - an ISO formatted string of the UTC time at the start of the scan
            * - Start Scan Offset
              - 0.0
              - the time offset, in seconds, from the UTC start time to represent the time at the start of
                the data in the file.
            * - End Scan Offset
              - 0.106168
              - the time offset, in seconds, from the UTC start time to represent the time at the end of
                data in the file.
            * - Frequency (MHz)
              - 87.5
              - the centre frequency for the data as a whole
            * - Bandwidth (MHz)
              - 75.0
              - the bandwidth of data
            * - Start Channel Number
              - 0
              - the starting channel number
            * - End Channel Number
              - 431
              - the last channel that the data is for
            * - Num. Polarisations
              - 2
              - number of polarisations. For v1.0.0 this is always 2 but it can be 1 later file formats
            * - Polarisations
              - A,B
              - the polarisations of the data. For v1.0.0 file format this will be A,B
            * - Num. Dimensions
              - 2
              - number of dimensions in the data (should be 2 for complex data)
            * - Num. Channels
              - 432
              - number of channels in the data
            * - Num. Frequency Bins
              - 36
              - he number of frequency bins in the spectrogram data
            * - Num. Temporal Bins
              - 32
              - the number of temporal bins in the spectrogram and timeseries data
            * - Num. Histogram Bins
              - 65536
              - the number of bins in the histogram data
            * - Num. Histogram Bins (Rebinned)
              - 256
              - number of bins to used in the rebinned histograms
            * - Num. Samples
              - 21012480
              - total number of samples used to calculate statistics
            * - Num. Samples (RFI Excised)
              - 19456000
              - total number of samples used to calculate statistics, excluding RFI excised data
            * - Num. Invalid Packets
              - 0
              - total number invalid/dropped packets in the data used to calculate statistics.
            * - Num. Weight Samples
              - 656640
              - total number samples used to calculate the weight statistics

        :return: a human readable version of the header scalar fields.
        :rtype: pd.DataFrame
        """
        keys = [
            "File Format Version",
            "Execution Block ID",
            "Telescope",
            "Scan ID",
            "Beam ID",
            "UTC Start Time",
            "Start Scan Offset",
            "End Scan Offset",
            "Frequency (MHz)",
            "Bandwidth (MHz)",
            "Start Channel Number",
            "End Channel Number",
            "Num. Polarisations",
            "Polarisations",
            "Num. Dimensions",
            "Num. Channels",
            "Num. Frequency Bins",
            "Num. Temporal Bins",
            "Num. Histogram Bins",
            "Num. Histogram Bins (Rebinned)",
            "Num. Samples",
            "Num. Samples (RFI Excised)",
            "Num. Invalid Packets",
        ]

        values = [
            self.metadata.file_format_version,
            self.metadata.eb_id,
            self.metadata.telescope,
            self.metadata.scan_id,
            self.metadata.beam_id,
            self.metadata.utc_start,
            self.metadata.t_min,
            self.metadata.t_max,
            self.metadata.frequency_mhz,
            self.metadata.bandwidth_mhz,
            self.metadata.start_chan,
            self.metadata.end_chan,
            self.metadata.npol,
            self.metadata.polarisations,
            self.metadata.ndim,
            self.metadata.nchan,
            self.metadata.nchan_ds,
            self.metadata.ndat_ds,
            self.metadata.histogram_nbin,
            self.metadata.nrebin,
            self.metadata.num_samples,
            self.metadata.num_samples_rfi_excised,
            self.metadata.num_invalid_packets,
        ]

        if self.metadata.has_weights:
            keys.append("Num. Weight Samples")
            values.append(self.metadata.num_weight_samples)

        data = {
            "Key": keys,
            "Value": values,
        }

        return pd.DataFrame(data=data)

[docs]    def get_frequency_averaged_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the frequency averaged statistics.

        This will return a data frame that includes statistics across all
        frequencies/channels as well as only the frequencies/channels
        that weren't marked as having RFI.

        While this method is a public method, it is recommended to use
        the following properties directly:

            * :py:attr:`frequency_averaged_stats`
            * :py:attr:`frequency_averaged_stats_rfi_excised`

        The data frame has the following columns:

            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * RFI Excised - a boolean value of whether the statistic after RFI
              had been excised.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Polarisation``, ``Dimension``,
        and ``RFI Excised`` columns.
        """
        # the 2 is for non_rfi_excised and rfi_excised
        shape = (self.npol, self.ndim, 2)

        rfi_excised = np.array([False, True]).repeat(self.npol * self.ndim)

        mean_freq_avg = np.zeros(shape=shape, dtype=np.float32)
        mean_freq_avg[:, :, 0] = self.data.mean_frequency_avg
        mean_freq_avg[:, :, 1] = self.data.mean_frequency_avg_rfi_excised

        variance_freq_avg = np.zeros(shape=shape, dtype=np.float32)
        variance_freq_avg[:, :, 0] = self.data.variance_frequency_avg
        variance_freq_avg[:, :, 1] = self.data.variance_frequency_avg_rfi_excised

        num_samples_clipped = np.zeros(shape=shape, dtype=np.uint32)
        num_samples_clipped[:, :, 0] = self.data.num_clipped_samples
        num_samples_clipped[:, :, 1] = self.data.num_clipped_samples_rfi_excised

        data = {
            POLARISATION: self._get_polarisation_column(shape=shape, axis=0),
            DIMENSION: self._get_dimension_column(shape=shape, axis=1),
            RFI_EXCISED: rfi_excised,
            MEAN: mean_freq_avg.flatten("F"),
            VARIANCE: variance_freq_avg.flatten("F"),
            CLIPPED: num_samples_clipped.flatten("F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([POLARISATION, DIMENSION, RFI_EXCISED], inplace=True)

        return df

    @property
    def frequency_averaged_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the frequency averaged statistics for all frequencies.

        This returns the mean and variance of all the data across
        all frequencies, including frequencies marked as having RFI,
        separated for each polarisation and complex value dimension.
        The statistics also includes the number of samples clipped
        (i.e. the digital value was at the min or max value given the
        number of bits.)

        The data frame has the following columns:

            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Polarisation``, and ``Dimension``
        columns.
        """
        df = self.get_frequency_averaged_stats()
        return df.loc[:, :, False]  # type: ignore

    @property
    def frequency_averaged_stats_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the frequency averaged statistics from all channels not flagged for RFI.

        This returns the mean and variance of all the data across
        all channels, expect those flagged for RFI,
        separated for each polarisation and complex value dimension.
        The statistics also includes the number of samples clipped
        (i.e. the digital value was at the min or max value given the
        number of bits.)

        The data frame has the following columns:

            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Polarisation``, and ``Dimension``
        columns.
        """
        df = self.get_frequency_averaged_stats()
        return df.loc[:, :, True]  # type: ignore

    def _get_polarisation_column(self: Statistics, shape: tuple[int, ...], axis: int = 0) -> np.ndarray:
        polarisation = np.empty(shape=shape, dtype=object)
        polarisations = self.polarisations
        assert len(polarisations) == polarisation.shape[0], (
            f"expected len({polarisations}) to have been {polarisation.shape[0]} but "
            f" was {len(polarisations)}"
        )

        # In Numpy it is easier to work with the first dimension of an array.
        # so swap first axis (0) with provided axis, do the work and then swap back
        polarisation = np.swapaxes(polarisation, axis1=0, axis2=axis)
        for ipol, pol in enumerate(polarisations):
            polarisation[ipol] = pol.text
        polarisation = np.swapaxes(polarisation, axis1=0, axis2=axis)

        return polarisation.flatten(order="F")

    def _assert_polarisation_exists(self: Statistics, polarisation: Polarisation) -> None:
        """
        Assert that the given polarisation is in the STAT file.

        :param polarisation: the polarisation to check
        :type polarisation: Polarisation
        :raises AssertionError: if given polarisation is not in STAT file.
        """
        assert (
            polarisation in self.polarisations
        ), f"{polarisation.text} is not a valid polarisation for statistics"

    def _get_pol_idx(self: Statistics, polarisation: Polarisation) -> int:
        """
        Get the index value of the given polarisation.

        If STAT file has both polarisations then this returns the integer value
        of the ``Polarisation`` enum.

        If there is only one polarisation in the STAT file then this will assert
        that the polarisation matches what is in the file and if so then return
        the value of 0, as there is only 1 polarisation and Python uses 0 offset
        for indexing.

        :param polarisation: the polarisation to get the index value for.
        :type polarisation: Polarisation
        :return: the index value of the given polarisation.
        :rtype: int
        :raises AssertionError: when the polarisation is not in the STAT file.
        """
        self._assert_polarisation_exists(polarisation)

        return polarisation.value if self.npol == 2 else 0

    def _get_dimension_column(self: Statistics, shape: tuple[int, ...], axis: int = 1) -> np.ndarray:
        dimension = np.empty(shape=shape, dtype=object)

        # In Numpy it is easier to work with the first dimension of an array.
        # so swap first axis (0) with provided axis, do the work and then swap back
        dimension = np.swapaxes(dimension, axis1=0, axis2=axis)
        for idim, dim in enumerate(list(Dimension)):
            dimension[idim] = dim.text
        dimension = np.swapaxes(dimension, axis1=0, axis2=axis)

        return dimension.flatten(order="F")

[docs]    def get_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the channel statistics.

        While this method is a public method, it is recommended to use
        the following properties as they provide more specific access to
        the data based on polarisation and specific dimension of the
        complex voltage data.

            * :py:attr:`pol_a_channel_stats`
            * :py:attr:`pol_b_channel_stats`
            * :py:attr:`pol_a_real_channel_stats`
            * :py:attr:`pol_a_imag_channel_stats`
            * :py:attr:`pol_b_real_channel_stats`
            * :py:attr:`pol_b_imag_channel_stats`

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Channel``, ``Polarisation``,
        and ``Dimension`` columns.

        :return: a data frame with statistics for each channel split by polarisation
            and complex voltage dimension.
        :rtype: pd.DataFrame
        """
        shape = self.data.mean_spectrum.shape

        channel_number_arange = self.channel_numbers
        channel_number = np.repeat(channel_number_arange, self.npol * self.ndim)
        channel_freq_mhz = np.repeat(self.metadata.channel_freq_mhz, self.npol * self.ndim)

        mean_data = self.data.mean_spectrum
        variance_data = self.data.variance_spectrum
        clipped_data = self.data.num_clipped_samples_spectrum

        data = {
            CHANNEL: channel_number,
            POLARISATION: self._get_polarisation_column(shape=shape, axis=0),
            DIMENSION: self._get_dimension_column(shape=shape, axis=1),
            CHANNEL_FREQ_MHZ: channel_freq_mhz,
            MEAN: mean_data.flatten(order="F"),
            VARIANCE: variance_data.flatten(order="F"),
            CLIPPED: clipped_data.flatten(order="F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([CHANNEL, POLARISATION, DIMENSION], inplace=True)
        return df

    @property
    def pol_a_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the polarisation A channel statistics.

        This property includes both the real and complex dimension
        of the data. The following utility properties are provided
        to get the statistics of each dimension directly:

            * :py:attr:`pol_a_real_channel_stats`
            * :py:attr:`pol_a_imag_channel_stats`

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Channel``, and ``Dimension`` columns.

        :return: a data frame of polarisation A with statistics for each channel split complex
            voltage dimension.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_channel_stats()
        return df.loc[:, Polarisation.POL_A.text, :]  # type: ignore

    @property
    def pol_a_real_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the real valued, polarisation A channel statistics.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        :return: a data frame of the real component of polarisation A with statistics for each channel.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_channel_stats()
        df = df.loc[:, Polarisation.POL_A.text, Dimension.REAL.text]  # type: ignore
        df.reset_index(inplace=True)
        return df

    @property
    def pol_a_imag_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the imaginary valued, polarisation A channel statistics.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        :return: a data frame of the imaginary component of polarisation A with statistics for each channel.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_channel_stats()
        df = df.loc[:, Polarisation.POL_A.text, Dimension.IMAG.text]  # type: ignore
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the polarisation B channel statistics.

        This property includes both the real and complex dimension
        of the data. The following utility properties are provided
        to get the statistics of each dimension directly:

            * :py:attr:`pol_b_real_channel_stats`
            * :py:attr:`pol_b_imag_channel_stats`

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        The Pandas frame has a MultiIndex key using the ``Channel``, and ``Dimension`` columns.

        :return: a data frame of polarisation B with statistics for each channel split complex
            voltage dimension.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_channel_stats()
        return df.loc[:, Polarisation.POL_B.text, :]  # type: ignore

    @property
    def pol_b_real_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the real valued, polarisation B channel statistics.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        :return: a data frame of the real component of polarisation B with statistics for each channel.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_channel_stats()
        df = df.loc[:, Polarisation.POL_B.text, Dimension.REAL.text]  # type: ignore
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_imag_channel_stats(self: Statistics) -> pd.DataFrame:
        """
        Get the imaginary valued, polarisation B channel statistics.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Channel Freq. (MHz) - the centre frequency for the channel.
            * Mean - the mean of the data for each polarisation and dimension, averaged
              over all channels.
            * Variance - the variance of the data for each polarisation and dimension,
              averaged over all channels.
            * Clipped - number of clipped input samples (maximum level) for each
              polarisation, dimension, averaged over all channels.

        :return: a data frame of the imaginary component of polarisation B with statistics for each channel.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_channel_stats()
        df = df.loc[:, Polarisation.POL_B.text, Dimension.IMAG.text]  # type: ignore
        df.reset_index(inplace=True)
        return df

[docs]    def get_spectral_power(self: Statistics) -> pd.DataFrame:
        """
        Get the mean and max spectral power values for each channel.

        This data frame includes both the mean and max of the spectral
        power for each channel for all polarisations.

        The following properties are provided for each polarisation:

            * :py:attr:`pol_a_spectral_power`
            * :py:attr:`pol_b_spectral_power`

        The data frame has the following columns:

            * Polarisation - which polarisation that the statistic value is for.
            * Channel - the channel number the statistics are for.
            * Mean - the mean of the spectral power for the current channel.
            * Max - the maximum of the spectral power for the current channel over
              the time sample of the statistics file.

        The Pandas frame has a MultiIndex key using the ``Polarisation``, and ``Channel`` columns.

        :return: the mean and max spectral power values for each channel.
        :rtype: pd.DataFrame
        """
        shape = self.data.mean_spectral_power.shape

        channels = np.repeat(self.channel_numbers, self.npol)
        mean_data = self.data.mean_spectral_power
        max_data = self.data.max_spectral_power
        data = {
            POLARISATION: self._get_polarisation_column(shape=shape),
            CHANNEL: channels,
            MEAN: mean_data.flatten(order="F"),
            MAX: max_data.flatten(order="F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([POLARISATION], inplace=True)
        return df

    @property
    def pol_a_spectral_power(self: Statistics) -> pd.DataFrame:
        """
        Get the mean and max spectral power values for each channel for polarisation A.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Mean - the mean of the spectral power for the current channel.
            * Max - the maximum of the spectral power for the current channel over
              the time sample of the statistics file.

        :return: the mean and max spectral power values for each channel for polarisation A.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_spectral_power().loc[Polarisation.POL_A.text]
        df.reset_index(inplace=True, drop=True)
        return df  # type: ignore

    @property
    def pol_b_spectral_power(self: Statistics) -> pd.DataFrame:
        """
        Get the mean and max spectral power values for each channel for polarisation B.

        The data frame has the following columns:

            * Channel - the channel number the statistics are for.
            * Mean - the mean of the spectral power for the current channel.
            * Max - the maximum of the spectral power for the current channel over
              the time sample of the statistics file.

        :return: the mean and max spectral power values for each channel for polarisation B.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_spectral_power().loc[Polarisation.POL_B.text]
        df.reset_index(inplace=True, drop=True)
        return df  # type: ignore

[docs]    def get_histogram_data(self: Statistics, rfi_excised: bool) -> pd.DataFrame:
        """
        Get the histogram of the input data integer states for each polarisation and dimension.

        While this method is a public method, it is recommended to use one of the
        following 8 properties as they provide the data in a more usable format:

            * :py:attr:`pol_a_real_histogram`
            * :py:attr:`pol_a_imag_histogram`
            * :py:attr:`pol_b_real_histogram`
            * :py:attr:`pol_b_imag_histogram`
            * :py:attr:`pol_a_real_histogram_rfi_excised`
            * :py:attr:`pol_a_imag_histogram_rfi_excised`
            * :py:attr:`pol_b_real_histogram_rfi_excised`
            * :py:attr:`pol_b_imag_histogram_rfi_excised`

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Count - the number/count for the bin.

        The Pandas frame has a MultiIndex key using the ``Bin``, ``Polarisation``,
        and ``Dimension`` columns.

        :param rfi_excised: a bool value to report on all (False) or RFI excised
            (True) data
        :type rfi_excised: True
        :return: a data frame for histogram data split polarisation
            and complex voltage dimension.
        :rtype: pd.DataFrame
        """
        if rfi_excised:
            histogram_data = self.data.histogram_1d_freq_avg_rfi_excised
        else:
            histogram_data = self.data.histogram_1d_freq_avg

        shape = histogram_data.shape

        # This is already flatten in column order
        bins = np.arange(self.metadata.histogram_nbin).repeat(self.npol * self.ndim)

        data = {
            BIN: bins,
            POLARISATION: self._get_polarisation_column(shape=shape, axis=0),
            DIMENSION: self._get_dimension_column(shape=shape, axis=1),
            BIN_COUNT: histogram_data.flatten(order="F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([BIN, POLARISATION, DIMENSION], inplace=True)
        return df

    @property
    def pol_a_real_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the real valued, polarisation A, input data integer states.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for real valued, polarisation A, voltage data.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.REAL.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_a_imag_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the imaginary valued, polarisation A, input data integer states.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for imaginary valued, polarisation A, voltage data.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.IMAG.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_real_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the real valued, polarisation B, input data integer states.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for real valued, polarisation B, voltage data.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.REAL.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_imag_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the imaginary valued, polarisation B, input data integer states.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for imaginary valued, polarisation B, voltage data.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.IMAG.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_a_real_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the real valued, pol A, input data from all channels not flagged for RFI.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for real valued, polarisation A, voltage data
             from all channels not flagged for RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.REAL.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_a_imag_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the imag valued, pol A, input data from all channels not flagged for RFI.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for imaginary valued, polarisation A, voltage data
             from all channels not flagged for RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.IMAG.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_real_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the real valued, pol B, input data from all channels not flagged for RFI.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for real valued, polarisation B, voltage data
             from all channels not flagged for RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.REAL.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

    @property
    def pol_b_imag_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the histogram of the imag valued, pol B, input data from all channels not flagged for RFI.

        The number of bins in the histogram is 2^(number of bits). For 8 bit
        data this is 256 bins and for 16 bit data this is 65536 bins.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for histogram data for imaginary valued, polarisation B, voltage data
             from all channels not flagged for RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.IMAG.text]  # type: ignore
        df = df[BIN_COUNT].to_frame()
        df.reset_index(inplace=True)
        return df

[docs]    def get_rebinned_histogram_data(self: Statistics, rfi_excised: bool) -> pd.DataFrame:
        """
        Get rebinned histogram data.

        While this method is a public method, it is recommended to use one of the
        following 8 properties as they provide the data in a more usable format.

            * :py:attr:`pol_a_real_rebinned_histogram`
            * :py:attr:`pol_a_imag_rebinned_histogram`
            * :py:attr:`pol_b_real_rebinned_histogram`
            * :py:attr:`pol_b_imag_rebinned_histogram`
            * :py:attr:`pol_a_real_rebinned_histogram_rfi_excised`
            * :py:attr:`pol_a_imag_rebinned_histogram_rfi_excised`
            * :py:attr:`pol_b_real_rebinned_histogram_rfi_excised`
            * :py:attr:`pol_b_imag_rebinned_histogram_rfi_excised`

        The number of bins that the data has been rebinned to is
        ``Num. Histogram Bins (Rebinned)`` value found in the :py:attr:`header`.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Polarisation - which polarisation that the statistic value is for.
            * Dimension - which complex dimension/component (i.e. real or imag)
              that the statistic is for.
            * Count - the number/count for the bin

        The Pandas frame has a MultiIndex key using the ``Bin``, ``Polarisation``,
        and ``Dimension`` columns.

        :param rfi_excised: a bool value to report on all (False) or RFI excised
            (True) data
        :type rfi_excised: True
        :return: a data frame for the rebinned histogram data split polarisation
            and complex voltage dimension.
        :rtype: pd.DataFrame
        """
        if rfi_excised:
            histogram_data = self.data.rebinned_histogram_1d_freq_avg_rfi_excised
        else:
            histogram_data = self.data.rebinned_histogram_1d_freq_avg

        shape = histogram_data.shape

        # This is already flatten in column order
        bins = np.arange(self.metadata.nrebin).repeat(self.npol * self.ndim)

        data = {
            BIN: bins,
            POLARISATION: self._get_polarisation_column(shape=shape, axis=0),
            DIMENSION: self._get_dimension_column(shape=shape, axis=1),
            BIN_COUNT: histogram_data.flatten(order="F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([BIN, POLARISATION, DIMENSION], inplace=True)
        return df

    @property
    def pol_a_real_rebinned_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the real valued, pol A.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for real valued, polarisation A.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_rebinned_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.REAL.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_a_imag_rebinned_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the imaginary valued, pol A.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for imaginary valued, polarisation A.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_rebinned_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.IMAG.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_b_real_rebinned_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the real valued, pol B.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for real valued, polarisation B.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_rebinned_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.REAL.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_b_imag_rebinned_histogram(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the imaginary valued, pol B.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for imaginary valued, polarisation B.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_rebinned_histogram_data(rfi_excised=False)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.IMAG.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_a_real_rebinned_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the real valued, pol A except those flagged with RFI.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for real valued, polarisation A
            except those flagged with RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_rebinned_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.REAL.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_a_imag_rebinned_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the imag valued, pol A except those flagged with RFI.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for imaginary valued, polarisation A
            except those flagged with RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        df = self.get_rebinned_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_A.text, Dimension.IMAG.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_b_real_rebinned_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the real valued, pol B except those flagged with RFI.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for real valued, polarisation B
            except those flagged with RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_rebinned_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.REAL.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

    @property
    def pol_b_imag_rebinned_histogram_rfi_excised(self: Statistics) -> pd.DataFrame:
        """
        Get the rebinned histogram of the imag valued, pol B except those flagged with RFI.

        The data frame has the following columns:

            * Bin - the bin for the histogram count.
            * Count - the number/count for the bin

        :return: a data frame for rebinned histogram data for imaginary valued, polarisation B
            except those flagged with RFI.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        df = self.get_rebinned_histogram_data(rfi_excised=True)
        df = df.loc[:, Polarisation.POL_B.text, Dimension.IMAG.text]  # type: ignore
        return df[BIN_COUNT].to_frame()

[docs]    def get_rebinned_histogram2d_data(
        self: Statistics, rfi_excised: bool, polarisation: Polarisation
    ) -> npt.NDArray[Literal["NRebin, NRebin"], npt.UInt32]:
        """
        Get the 2D histogram data.

        This returns a Numpy array rather than a Pandas Dataframe.

        While this is a public method the following properties should be used
        as they provide a more user friendly API.

            * :py:attr:`pol_a_rebinned_histogram2d`
            * :py:attr:`pol_b_rebinned_histogram2d`
            * :py:attr:`pol_a_rebinned_histogram2d_rfi_excised`
            * :py:attr:`pol_b_rebinned_histogram2d_rfi_excised`

        :param rfi_excised: use the RFI excised data (True) or all data (False)
        :type rfi_excised: bool
        :param polarisation: which polarisation of the data to use.
        :type polarisation: Polarisation
        :raises AssertionError: if given polarisation is not in STAT file.
        """
        self._assert_polarisation_exists(polarisation)
        pol_idx = self._get_pol_idx(polarisation)

        if rfi_excised:
            return self.data.rebinned_histogram_2d_freq_avg_rfi_excised[pol_idx]
        else:
            return self.data.rebinned_histogram_2d_freq_avg[pol_idx]

    @property
    def pol_a_rebinned_histogram2d(self: Statistics) -> npt.NDArray[Literal["NRebin, NRebin"], npt.UInt32]:
        """
        Get the rebinned 2D histogram data for polarisation A.

        This returns a Numpy array with data for all frequencies.

            * the first array dimension is the real valued data.
            * the second array dimension is the imaginary valued data.

        :return: the rebinned 2D histogram data for polarisation A.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        return self.get_rebinned_histogram2d_data(rfi_excised=False, polarisation=Polarisation.POL_A)

    @property
    def pol_b_rebinned_histogram2d(self: Statistics) -> npt.NDArray[Literal["NRebin, NRebin"], npt.UInt32]:
        """
        Get the rebinned 2D histogram data for polarisation B.

        This returns a Numpy array with data for all frequencies.

            * the first array dimension is the real valued data.
            * the second array dimension is the imaginary valued data.

        :return: the rebinned 2D histogram data for polarisation B.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        return self.get_rebinned_histogram2d_data(rfi_excised=False, polarisation=Polarisation.POL_B)

    @property
    def pol_a_rebinned_histogram2d_rfi_excised(
        self: Statistics,
    ) -> npt.NDArray[Literal["NRebin, NRebin"], npt.UInt32]:
        """
        Get the rebinned 2D histogram data for polarisation A except frequencies flagged with RFI.

        This returns a Numpy array with data for frequencies that aren't RFI excised.

            * the first array dimension is the real valued data.
            * the second array dimension is the imaginary valued data.

        :return: the rebinned 2D histogram data for polarisation A.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        return self.get_rebinned_histogram2d_data(rfi_excised=True, polarisation=Polarisation.POL_A)

    @property
    def pol_b_rebinned_histogram2d_rfi_excised(
        self: Statistics,
    ) -> npt.NDArray[Literal["NRebin, NRebin"], npt.UInt32]:
        """
        Get the rebinned 2D histogram data for polarisation B except frequencies flagged with RFI.

        This returns a Numpy array with data for frequencies that aren't RFI excised.

            * the first array dimension is the real valued data.
            * the second array dimension is the imaginary valued data.

        :return: the rebinned 2D histogram data for polarisation B.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        return self.get_rebinned_histogram2d_data(rfi_excised=True, polarisation=Polarisation.POL_B)

    @property
    def pol_a_spectrogram(
        self: Statistics,
    ) -> npt.NDArray[Literal["NFreqBin, NTimeBin"], npt.Float32]:
        """
        Get the spectrogram data for polarisation A.

        This returns a Numpy array that can be used with Matplotlib
        to plot a Spectrogram. The data in the spectrogram in binned
        by channel and within time (see ``Num. Frequency Bins``,
        ``Num. Temporal Bins`` in :py:attr:`header` for more details.)

        :return: the spectrogram data for polarisation A.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        return self.data.spectrogram[self._get_pol_idx(Polarisation.POL_A)]

    @property
    def pol_b_spectrogram(
        self: Statistics,
    ) -> npt.NDArray[Literal["NFreqBin, NTimeBin"], npt.Float32]:
        """
        Get the spectrogram data for polarisation B.

        This returns a Numpy array that can be used with Matplotlib
        to plot a Spectrogram. The data in the spectrogram in binned
        by channel and within time (see ``Num. Frequency Bins``,
        ``Num. Temporal Bins`` in :py:attr:`header` for more details.)

        :return: the spectrogram data for polarisation B.
        :rtype: np.ndarray
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        return self.data.spectrogram[self._get_pol_idx(Polarisation.POL_B)]

[docs]    def get_timeseries_data(self: Statistics, rfi_excised: bool) -> pd.DataFrame:
        """
        Get the timeseries data.

        While this is a public method, the following properties should be
        used as they provide a more user friendly access to the data.

            * :py:attr:`pol_a_timeseries`
            * :py:attr:`pol_b_timeseries`
            * :py:attr:`pol_a_timeseries_rfi_excised`
            * :py:attr:`pol_b_timeseries_rfi_excised`

        The timeseries is binned in time (see ``Num. Temporal Bins`` in :py:attr:`header`)
        and is summed over all frequencies. If `rfi_excised` is True
        then the summing happens over the frequency that are not RFI excised.

        The data frame has the following columns:

            * Polarisation - which polarisation that the statistic value is for.
            * Temporal Bin - the time bin.
            * Time Offset - the offset, in seconds, for the current temporal bin.
            * Max - the maximum power recorded in the temporal bin.
            * Min - the minimum power recorded in the temporal bin.
            * Mean - the mean power recorded in the temporal bin.

        The Pandas frame has a MultiIndex key using the ``Polarisation``,
        and `Temporal Bin` columns.

        :param rfi_excised: whether to use all frequencies (False) or those that
            are not marked as having RFI.
        :type rfi_excised: bool
        :return: a data frame with the timeseries statistics.
        :rtype: pd.DataFrame
        """
        if rfi_excised:
            timeseries_data = self.data.timeseries_rfi_excised
        else:
            timeseries_data = self.data.timeseries

        shape = timeseries_data.shape[:-1]

        # these 2 columns will be in column major format [0, 0, 1, 1, ...] where
        # the repeat is need if npol = 2
        temporal_bin = np.arange(self.metadata.ndat_ds).repeat(self.npol)
        timeseries_bins = np.repeat(self.metadata.timeseries_bins, self.npol)

        max_data = timeseries_data[:, :, TimeseriesDimension.MAX]
        min_data = timeseries_data[:, :, TimeseriesDimension.MIN]
        mean_data = timeseries_data[:, :, TimeseriesDimension.MEAN]

        data = {
            TEMPORAL_BIN: temporal_bin,
            POLARISATION: self._get_polarisation_column(shape=shape),
            TIME_OFFSET: timeseries_bins,
            MAX: max_data.flatten(order="F"),
            MIN: min_data.flatten(order="F"),
            MEAN: mean_data.flatten(order="F"),
        }

        df = pd.DataFrame(data=data)
        df.set_index([POLARISATION, TEMPORAL_BIN], inplace=True)
        return df

    @property
    def pol_a_timeseries(self: Statistics) -> pd.DataFrame:
        """
        Get the timeseries data for polarisation A for all frequencies.

        The timeseries is binned in time (see ``Num. Temporal Bins`` in :py:attr:`header`)
        and is summed over all frequencies.

        The data frame has the following columns:

            * Temporal Bin - the time bin.
            * Time Offset - the offset, in seconds, for the current temporal bin.
            * Max - the maximum power recorded in the temporal bin.
            * Min - the minimum power recorded in the temporal bin.
            * Mean - the mean power recorded in the temporal bin.

        :return: a data frame with the timeseries statistics for polarisation A.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        return self.get_timeseries_data(rfi_excised=False).loc[Polarisation.POL_A.text]  # type: ignore

    @property
    def pol_b_timeseries(self: Statistics) -> pd.DataFrame:
        """
        Get the timeseries data for polarisation B for all frequencies.

        The timeseries is binned in time (see ``Num. Temporal Bins`` in :py:attr:`header`)
        and is summed over all frequencies.

        The data frame has the following columns:

            * Temporal Bin - the time bin.
            * Time Offset - the offset, in seconds, for the current temporal bin.
            * Max - the maximum power recorded in the temporal bin.
            * Min - the minimum power recorded in the temporal bin.
            * Mean - the mean power recorded in the temporal bin.

        :return: a data frame with the timeseries statistics for polarisation B.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        return self.get_timeseries_data(rfi_excised=False).loc[Polarisation.POL_B.text]  # type: ignore

    @property
    def pol_a_timeseries_rfi_excised(
        self: Statistics,
    ) -> pd.DataFrame:
        """
        Get the timeseries data for polarisation A for all frequencies except for RFI excised frequencies.

        The timeseries is binned in time (see ``Num. Temporal Bins`` in :py:attr:`header`)
        and is summed over all frequencies.

        The data frame has the following columns:

            * Temporal Bin - the time bin.
            * Time Offset - the offset, in seconds, for the current temporal bin.
            * Max - the maximum power recorded in the temporal bin.
            * Min - the minimum power recorded in the temporal bin.
            * Mean - the mean power recorded in the temporal bin.

        :return: a data frame with the timeseries statistics for polarisation A except for frequencies that
            have been RFI excised.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'A' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_A)
        return self.get_timeseries_data(rfi_excised=True).loc[Polarisation.POL_A.text]  # type: ignore

    @property
    def pol_b_timeseries_rfi_excised(
        self: Statistics,
    ) -> pd.DataFrame:
        """
        Get the timeseries data for polarisation B for all frequencies except for RFI excised frequencies.

        The timeseries is binned in time (see ``Num. Temporal Bins`` in :py:attr:`header`)
        and is summed over all frequencies.

        The data frame has the following columns:

            * Temporal Bin - the time bin.
            * Time Offset - the offset, in seconds, for the current temporal bin.
            * Max - the maximum power recorded in the temporal bin.
            * Min - the minimum power recorded in the temporal bin.
            * Mean - the mean power recorded in the temporal bin.

        :return: a data frame with the timeseries statistics for polarisation B except for frequencies that
            have been RFI excised.
        :rtype: pd.DataFrame
        :raises AssertionError: if given polarisation 'B' is not in STAT file.
        """
        self._assert_polarisation_exists(Polarisation.POL_B)
        return self.get_timeseries_data(rfi_excised=True).loc[Polarisation.POL_B.text]  # type: ignore

    @property
    def channel_weights_stats(
        self: Statistics,
    ) -> pd.DataFrame:
        """
        Get the channel weights statistics.

        Note that if the file format version not at least 1.1.0 then this will raise an AssertionError.

        :return: the channel weights statistics.
        :raises AssertionError: if file format version is not at least 1.1.0
        """
        assert (
            self.metadata.has_weights
        ), f"File format version '{self.metadata.file_format_version}' doesn't support channel weights."

        data = {
            CHANNEL: self.channel_numbers,
            MIN: self.data.min_weights,
            MAX: self.data.max_weights,
            MEAN: self.data.mean_weights,
        }

        df = pd.DataFrame(data=data)
        df.set_index([CHANNEL], inplace=True)
        return df