Source code for ska_pst.common.normal_dist_stats

# -*- coding: utf-8 -*-
#
# This file is part of the SKA PST project
#
# Distributed under the terms of the BSD 3-clause new license.
# See LICENSE for more info.
"""Classes used for working with normal distribution statistics."""

from __future__ import annotations

import dataclasses
from typing import Any, Callable, TypeVar

import numpy as np

MAD_SCALE_FACTOR: float = 1.4826
"""The scale factor used to convert between standard deviation and MAD."""

Wrapped = TypeVar("Wrapped", bound=Callable[..., Any])
"""A ``TypeVar`` used by the ``needs_ndat`` decorator is only used on callables."""


[docs]@dataclasses.dataclass(kw_only=True)
class NormalSampleStats:
    """
    A data class used to calculate sample statistics of a normal distribution.

    This class can be used to get the mean, standard deviation / variance, median
    and median absolute deviation (MAD) of a known sample from a given population
    which has given mean and variance.
    """

    mean: float = 0.0
    """
    The population mean.

    Defaults to 0.0.
    """

    variance: float = 1.0
    """
    The population variance.

    Defaults to 1.0
    """

    ndat: int | None = None
    """
    The number of samples in the sample distribution.

    Defaults to None to allow delaying or reusing the stats to finally get the
    sample values. This must be set before calling any of the ``sample_*``
    properties off this statistics class.
    """

    @property
    def population_stddev(self: NormalSampleStats) -> float:
        """
        Get the population standard deviation.

        This returns ``sqrt(self.variance)``.

        :return: the population standard deviation.
        :rtype: float
        """
        return np.sqrt(self.variance)

    @population_stddev.setter
    def population_stddev(self: NormalSampleStats, stddev: float) -> None:
        """
        Set the population standard deviation.

        This can be used if the stddev is known and not having to calculate
        the variance before setting it on the class.  This will update
        the variance to being ``stddev^2``.

        :param stddev: the population standard deviation.
        :type stddev: float
        """
        self.variance = stddev**2

    @property
    def population_mad(self: NormalSampleStats) -> float:
        """
        Get the theoretical median absolute deviation (MAD) of the population.

        This is calculated from the ``stddev`` by a scalar factor of around
        ``1.4826``.  See
        [https://en.wikipedia.org/wiki/Median_absolute_deviation#Relation_to_standard_deviation]
        for more details.

        :return: the theoretical median absolute deviation (MAD) of the population.
        :rtype: float
        """
        return self.population_stddev / MAD_SCALE_FACTOR

    @property
    def sample_mean_stddev(self: NormalSampleStats) -> float:
        """
        Get the standard deviation of the sample mean.

        This is defined as ``sqrt(variance / ndat)``.

        :return: the standard deviation of the sample mean.
        :rtype: float
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        return np.sqrt(self.variance / self.ndat)

    @property
    def sample_variance_stddev(self: NormalSampleStats) -> float:
        """
        Get the standard deviation of the sample variance.

        This is defined as ``sqrt(2.0 * (ndat - 1)) * variance / ndat``

        :return: the standard deviation of the sample variance.
        :rtype: float
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        return np.sqrt(2.0 / (self.ndat - 1.0)) * self.variance

    @property
    def sample_mean_estimate(self: NormalSampleStats) -> ValueErrorEstimate:
        """
        Get the sample's mean error estimate.

        :return: the sample's mean error estimate.
        :rtype:  ValueErrorEstimate
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        estimate = ValueErrorEstimate(val=self.mean)
        estimate.error = self.sample_mean_stddev

        return estimate

    @property
    def sample_variance_estimate(self: NormalSampleStats) -> ValueErrorEstimate:
        """
        Get the sample's variance error estimate.

        :return: the sample's variance error estimate.
        :rtype:  ValueErrorEstimate
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        estimate = ValueErrorEstimate(val=self.variance)
        estimate.error = self.sample_variance_stddev

        return estimate

    @property
    def sample_mad_estimate(self: NormalSampleStats) -> ValueErrorEstimate:
        """
        Get the sample's MAD error estimate.

        :return: the sample's MAD error estimate.
        :rtype:  ValueErrorEstimate
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        estimate = ValueErrorEstimate(val=self.population_mad)
        estimate.error = self.sample_mad_stddev

        return estimate

    @property
    def sample_median_estimate(self: NormalSampleStats) -> ValueErrorEstimate:
        """
        Get the sample's median error estimate.

        :return: the sample's median error estimate.
        :rtype:  ValueErrorEstimate
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        estimate = ValueErrorEstimate(val=self.mean)
        estimate.error = self.sample_median_stddev

        return estimate

    @property
    def sample_median_stddev(self: NormalSampleStats) -> float:
        """
        Get the standard deviation of the sample median.

        This is defined as ``sqrt(pi/2) * (sample's mean stddev)``

        :return: the standard deviation of the sample median.
        :rtype: float
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        return np.sqrt(np.pi / 2) * self.sample_mean_stddev

    @property
    def sample_mad_stddev(self: NormalSampleStats) -> float:
        """
        Get the standard deviation of the sample MAD.

        This is defined as ``sqrt(pi/2) * pop_stddev / sqrt(ndat)``

        :return: the standard deviation of the sample MAD.
        :rtype:  float
        """
        assert self.ndat is not None, "ndat must be provided before getting sample statistics"
        return np.sqrt(np.pi / 2) * self.sample_mean_stddev

    def __copy__(self: NormalSampleStats) -> NormalSampleStats:
        """
        Copy the statistics to a instance.

        :return: a copy of this instance.
        :rtype: NormalSampleStats
        """
        return NormalSampleStats(mean=self.mean, variance=self.variance, ndat=self.ndat)


[docs]@dataclasses.dataclass(kw_only=True)
class ValueErrorEstimate:
    """
    A data class used to get the error of a value with a known value and variance.

    This class implements some Python magic dunder methods to make it easier to
    do manipulation and propagation of the errors, such as ``**`` (power), ``/``
    (division), ``*`` (multiplication).
    """

    val: float = 0.0
    """The value that has an associated variance in it's error."""

    variance: float = 1.0
    """The variance in the value."""

    @property
    def error(self: ValueErrorEstimate) -> float:
        """
        Get the error of the value.

        This is the standard deviation in the error and is ``sqrt(variance)``.

        :return: the error of the value.
        :rtype: float
        """
        return np.sqrt(self.variance, dtype=float)

    @error.setter
    def error(self: ValueErrorEstimate, error: float) -> None:
        """
        Set the error of the value.

        This is a utility property setter to allow setting the known error
        rather than calculating the variance first.  As the ``error`` is
        the standard deviation this sets the ``variance`` to being ``error ** 2``.

        :param error: the error of the value.
        :type error: float
        """
        self.variance = error**2

[docs]    def inverse(self: ValueErrorEstimate) -> ValueErrorEstimate:
        """
        Get the inverse of the value and variance.

        This is the programmatic way of doing ``1/val``. This
        will return a new value and it's error estimate.

        :return: the inverse of the value and variance.
        :rtype: ValueErrorEstimate
        """
        val = 1.0 / self.val
        variance = self.variance * np.power(val, 4)
        return ValueErrorEstimate(val=val, variance=variance)

[docs]    def sqrt(self: ValueErrorEstimate) -> ValueErrorEstimate:
        """
        Get the square root of the value and the variance.

        :return: the square root of the value and the variance.
        :rtype: ValueErrorEstimate
        """
        val = np.sqrt(self.val)
        variance = self.variance / 4.0 / self.val

        return ValueErrorEstimate(val=val, variance=variance)

    def __mul__(self: ValueErrorEstimate, val: Any) -> ValueErrorEstimate:
        """
        Get the multiplication of this value error estimate with another value.

        This is specifically the **left** multiplication but the operation is
        currently commutative and is equivalent to the **right** multiplication.

        This method only supports that multiplication where the ``val`` is an scalar
        ``int``, ``float`` or ``ValueErrorEstimate``.

        :param val: a scalar ``int``, ``float`` or ``ValueErrorEstimate``.
        :type val: Any
        :return: the multiplication of this value error estimate with another value.
        :rtype: ValueErrorEstimate
        """
        if isinstance(val, (int, float)):
            return ValueErrorEstimate(val=val * self.val, variance=val * val * self.variance)
        if isinstance(val, ValueErrorEstimate):
            if val == self:
                return self.__pow__(2)
            else:
                variance = (self.val**2) * val.variance + (val.val**2) * self.variance
                return ValueErrorEstimate(val=self.val * val.val, variance=variance)

        return NotImplemented

    def __rmul__(self: ValueErrorEstimate, val: Any) -> ValueErrorEstimate:
        """
        Get the multiplication of this value error estimate with another value.

        This is specifically the **right** multiplication but the operation is
        currently commutative and is equivalent to the **left** multiplication.
        As such the implementation delegates to the **left** multiplication.

        This method only supports that multiplication where the ``val`` is an scalar
        ``int``, ``float`` or ``ValueErrorEstimate``.

        :param val: a scalar ``int``, ``float`` or ``ValueErrorEstimate``.
        :type val: Any
        :return: the multiplication of this value error estimate with another value.
        :rtype: ValueErrorEstimate
        """
        return self * val

    def __truediv__(self: ValueErrorEstimate, val: Any) -> ValueErrorEstimate:
        """
        Get the result of dividing the current value estimate by another value.

        This performs the **left** division where this value is the numerator
        and the ``val`` parameter is the denominator.

        This only supports the case where the ``val`` (i.e. denominator) is a scalar
        ``int`` or ``float``.  If you need to perform division of two instances
        you should do ``A * B.inverse()`` but that assumes that there is no
        correlation between the values.

        :param val: the denominator of the division and is a scale ``int`` or ``float``.
        :type val: Any
        :return: the result of the current value estimate by the value.
        :rtype: ValueErrorEstimate
        """
        if isinstance(val, (int, float)):
            return ValueErrorEstimate(val=self.val / val, variance=self.variance / (val**2))
        return NotImplemented

    def __rtruediv__(self: ValueErrorEstimate, val: Any) -> ValueErrorEstimate:
        """
        Get the value divided by the current value error estimate.

        This performs the **right** division where this instance is in the
        denominator.

        :param val: the value to divide by the current value error estimate.
        :type val: Any
        :return: value divided by the current value error estimate.
        :rtype: ValueErrorEstimate
        """
        this = self.inverse()
        return val * this

    def __pow__(self: ValueErrorEstimate, pow: Any) -> ValueErrorEstimate:
        """
        Get the value raised to a power along with its error estimate.

        :param pow: the power to raise the value to.
        :type pow: Any, must be an int or float
        :return: the value raised to a power along with its error estimate.
        :rtype: ValueErrorEstimate
        """
        if isinstance(pow, (int, float)):
            new_val = self.val**pow
            new_variance = self.variance * (pow * new_val / self.val) ** 2.0
            return ValueErrorEstimate(val=new_val, variance=new_variance)
        return NotImplemented

    def __copy__(self: ValueErrorEstimate) -> ValueErrorEstimate:
        """
        Get a copy of the current instance.

        :return: a copy of the current instance.
        :rtype: ValueErrorEstimate
        """
        return ValueErrorEstimate(val=self.val, variance=self.variance)