Source code for pabutools.utils

"""
Collection of util functions.
"""

from __future__ import annotations

from collections.abc import Iterable, Generator
from enum import Enum
from itertools import combinations, chain

from typing import Union

from gmpy2 import mpq

from pabutools.fractions import frac

Numeric = Union[int, float, mpq]
"""
Type for numeric values. Is the union of int, float and mpq fractions (from the gumpy2 package).
"""



[docs]
def mean_generator(
    generator: Iterable[Numeric] | Iterable[tuple[Numeric, int]]
) -> Numeric:
    """
    Computes the mean of a sequence of numbers given as a generator. If the generator contains tuples, the first element
    is assumed to be the value and the second its multiplicity.

    Parameters
    ----------
        generator: Iterable[Numeric] | Iterable[tuple[Numeric, int]
            The generator.

    Returns
    -------
        Numeric
            The mean of the values.
    """
    n: int = 0
    mean: Numeric = 0
    for x in generator:
        multiplicity: int = 1
        value: Numeric = x
        if isinstance(x, tuple):
            value = x[0]
            multiplicity = x[1]
        for i in range(multiplicity):
            n += 1
            mean += frac(value - mean, n)
    return mean




[docs]
def powerset(iterable: Iterable) -> Generator:
    """
    Returns a generator of all the subsets of a given iterable.

    Parameters
    ----------
        iterable: Iterable
            An iterable.

    Returns
    -------
        Generator
            A generator of all the subsets of the iterable.
    """
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))




[docs]
def gini_coefficient(values: Iterable[Numeric]) -> Numeric:
    """
    Returns the Gini coefficient of the vector of values given as argument.

    Parameters
    ----------
        values: Iterable[Numeric]
            A vector of values.

    Returns
    -------
        Numeric
            The Gini coefficient.

    """
    all_nul: bool = True
    num_values: int = 0
    for v in values:
        if v < 0:
            raise ValueError(
                "Negative values not supported by gini coefficient implementation."
            )
        if all_nul and v > 0:
            all_nul = False
        num_values += 1
    if all_nul:
        return 0
    sorted_values: list[Numeric] = sorted(values)
    total_cum_sum: Numeric = 0
    for i, v in enumerate(sorted_values):
        total_cum_sum += v * (num_values - i)
    return frac(num_values + 1 - frac(2 * total_cum_sum, sum(values)), num_values)




[docs]
def round_cmp(a: Numeric, b: Numeric, precision: int = 6) -> int:
    """
    Compares two numbers after rounding them to a specified precision.

    Parameters
    ----------
        a : Numeric
            The first number for comparison.
        b : Numeric
            The second number for comparison.
        precision : int, optional
            The number of decimal places to which the numbers should be rounded.
            Defaults to 6.

    Returns
    -------
        int
            A negative number if the rounded value of 'a' is less than the rounded value of 'b',
            0 if they are approximately equal after rounding,
            a positive number if the rounded value of 'a' is greater than the rounded value of 'b'.

    """
    return round(a, precision) - round(b, precision)




[docs]
class DocEnum(Enum):
    """
    Enumeration with documentation of its members. Taken directly from
    `stack overflow <https://stackoverflow.com/questions/50473951/how-can-i-attach-documentation-to-members-of-a-python-enum/50473952#50473952>`_.
    """

    def __new__(cls, value, doc=None):
        self = object.__new__(cls)
        self._value_ = value
        if doc is not None:
            self.__doc__ = doc
        return self




[docs]
def format_table(headers: list[str], rows: list[tuple[str, str, str, str]]) -> str:
    """
    Format a table of rows with headers into a clean aligned string.

    Contributed by Vivian Umansky.

    Parameters
    ----------
    headers : list of str
        Column titles.
    rows : list of tuples of str
        Each tuple corresponds to a row.

    Returns
    -------
    str
        A formatted string representing the table.
    """
    col_widths = [max(len(str(row[i])) for row in [headers] + rows) for i in range(len(headers))]
    header_line = " | ".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers)))
    separator = "-+-".join("-" * col_widths[i] for i in range(len(headers)))
    lines = [header_line, separator]
    for row in rows:
        lines.append(" | ".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(headers))))
    return "\n".join(lines)