Source code for prefsampling.ordinal.mallows

"""
Mallows's model is a sampling model parameterised by a central ranking. The probability of
generating a given ranking is then exponential in the distance between the ranking and the central
ranking.
"""

from __future__ import annotations

from collections.abc import Iterable

import numpy as np

from prefsampling.combinatorics import kendall_tau_distance, all_rankings
from prefsampling.inputvalidators import validate_num_voters_candidates, validate_int
from prefsampling.ordinal import impartial


[docs] @validate_num_voters_candidates def mallows( num_voters: int, num_candidates: int, phi: float, normalise_phi: bool = False, central_vote: np.ndarray = None, impartial_central_vote: bool = False, seed: int = None, ) -> list[list[int]]: """ Generates votes according to Mallows' model (`Mallows, 1957 <https://www.jstor.org/stable/2333244>`_). This model is parameterised by a central vote. The probability of generating a given decreases exponentially with the distance between the vote and the central vote. Specifically, the probability of generating a vote is proportional to `phi ** distance` where `phi` is a dispersion coefficient (in [0, 1]) and `distance` is the Kendall-Tau distance between the central vote and the vote under consideration. A set of `num_voters` vote is generated independently and identically following this process. The `phi` coefficient controls the dispersion of the votes: values close to 0 render votes that are far away from the central vote unlikely to be generated; and the opposite for values close to 1. Depending on the application, it can be advised to normalise the value of `phi` (especially when comparing different values for `phi`), see `Boehmer, Faliszewski and Kraiczy (2023) <https://proceedings.mlr.press/v202/boehmer23b.html>`_ for more details. Use :code:`normalise_phi = True` to do so. For an analogous sampler generating approval ballots, see :py:func:`~prefsampling.approval.noise.noise`. Parameters ---------- num_voters : int Number of Voters. num_candidates : int Number of Candidates. phi : float The dispersion coefficient. normalise_phi : bool, default: :code:`False` Indicates whether phi should be normalised (see `Boehmer, Faliszewski and Kraiczy (2023) <https://proceedings.mlr.press/v202/boehmer23b.html>`_) central_vote : np.ndarray, default: :code:`np.arrange(num_candidates)` The central vote. Ignored if :code:`impartial_central_vote = True`. impartial_central_vote: bool, default: :code:`False` If true, the central vote is sampled from :py:func:`~prefsampling.ordinal.impartial`. seed : int, default: :code:`None` Seed for numpy random number generator. Returns ------- list[list[int]] Ordinal votes. Examples -------- .. testcode:: from prefsampling.ordinal import mallows # Sample from a Mallows' model with 2 voters and 3 candidates, the parameter phi is 0.6 mallows(2, 3, 0.6) # For reproducibility, you can set the seed. mallows(2, 3, 1, seed=1002) # Parameter phi should be in [0, 1] try: mallows(2, 3, -0.5) except ValueError: pass try: mallows(2, 3, 1.2) except ValueError: pass Validation ---------- The probability distribution derived from Mallows' model is well known. Specifically, given :math:`n` agents and :math:`m` candidates, a parameter :math:`\\phi` and a central ranking :math:`\\succ_c`, the probability of generating a ranking :math:`\\succ` is equal to: .. math:: \\phi^{d(\\succ, \\succ_c)} \\times \\frac{1}{\\prod_{j=1}^m \\frac{1 - \\phi^j}{1 - \\phi}} where :math:`d(\\succ, \\succ_c)` is the kendall-tau distance between the ranking and the central ranking. We test that the observed frequencies of rankings aligns with the theoretical probability distribution. The fact that the normalisation of phi does not seem to impact the figure is due to the small number of candidates that reduces the distance between phi and its normalised value. .. image:: ../validation_plots/ordinal/mallows_0_1.png :width: 800 :alt: Observed versus theoretical frequencies for a Mallows model with phi=0.1 .. image:: ../validation_plots/ordinal/mallows_0_5.png :width: 800 :alt: Observed versus theoretical frequencies for a Mallows model with phi=0.5 .. image:: ../validation_plots/ordinal/mallows_0_8.png :width: 800 :alt: Observed versus theoretical frequencies for a Mallows model with phi=0.8 When :code:`phi` is equal to 1, we are supposed to observe a uniform distribution over all rankings. .. image:: ../validation_plots/ordinal/mallows_1_0.png :width: 800 :alt: Observed versus theoretical frequencies for a Mallows model with phi=1.0 References ---------- `Non-null ranking models <https://www.jstor.org/stable/2333244>`_, *Colin Lingwood Mallows*, Biometrica, 44:114–130, 1957. `Properties of the Mallows model depending on the number of alternatives: A warning for an experimentalist. <https://proceedings.mlr.press/v202/boehmer23b/boehmer23b.pdf>`_, *Niclas Boehmer, Piotr Faliszewski and Sonja Kraiczy*, Proceedings of the International Conference on Machine Learning, 2023. """ if phi < 0 or 1 < phi: raise ValueError(f"Incorrect value of phi: {phi}. Value should be in [0, 1]") if normalise_phi: phi = phi_from_norm_phi(num_candidates, phi) rng = np.random.default_rng(seed) if impartial_central_vote: central_vote = impartial(1, num_candidates, seed=seed)[0] insert_distributions = [ _insert_prob_distr(i, phi) for i in range(1, num_candidates) ] votes = [] for i in range(num_voters): vote = _mallows_vote(num_candidates, insert_distributions, rng=rng) if central_vote is not None: vote = [central_vote[i] for i in vote] votes.append(vote) return votes
[docs] @validate_num_voters_candidates def norm_mallows( num_voters: int, num_candidates: int, norm_phi: float, central_vote: np.ndarray = None, impartial_central_vote: bool = False, seed: int = None, ) -> list[list[int]]: """ Shortcut for the function :py:func:`~prefsampling.ordinal.mallows` with :code:`normalise_phi = True`. """ if norm_phi < 0 or 1 < norm_phi: raise ValueError( f"Incorrect value of normphi: {norm_phi}. Value should be in [0,1]" ) return mallows( num_voters, num_candidates, norm_phi, normalise_phi=True, central_vote=central_vote, impartial_central_vote=impartial_central_vote, seed=seed, )
def _insert_prob_distr(position: int, phi: float) -> np.ndarray: """ Computes the insertion probability distribution for a given position and a given dispersion coefficient. Parameters ---------- position: int The position in the ranking phi: float The dispersion parameter Returns ------- np.ndarray The probability distribution. """ distribution = np.zeros(position + 1) for j in range(position + 1): distribution[j] = phi ** (position - j) return distribution / distribution.sum() def _mallows_vote( num_candidates: int, insert_distributions: list[np.ndarray], rng: np.random.Generator, ) -> list[int]: """ Samples a vote according to Mallows' model. Parameters ---------- num_candidates: int Number of candidates insert_distributions: list[np.ndarray] A list of np.ndarray representing the insert probability distributions rng: np.random.Generator The numpy random generator to use for randomness. Returns ------- np.ndarray The vote. """ vote = [0] for j in range(1, num_candidates): insert_distribution = insert_distributions[j - 1] index = rng.choice(range(len(insert_distribution)), p=insert_distribution) vote.insert(index, j) return vote def _calculate_expected_number_swaps(num_candidates: int, phi: float) -> float: """ Computes the expected number of swaps in a vote sampled from Mallows' model. Parameters ---------- num_candidates: int The number of candidates phi: float The dispersion coefficient of the Mallows' model Returns ------- float The expected number of swaps """ res = phi * num_candidates / (1 - phi) for j in range(1, num_candidates + 1): res += (j * (phi**j)) / ((phi**j) - 1) return res
[docs] def phi_from_norm_phi(num_candidates: int, norm_phi: float) -> float: """ Computes an approximation of the dispersion coefficient of a Mallows' model based on its normalised coefficient (`norm_phi`). Parameters ---------- num_candidates: int The number of candidates norm_phi: float The normalised dispersion coefficient of the Mallows' model Returns ------- float The (non-normalised) dispersion coefficient of the Mallows' model """ if norm_phi == 1: return 1 if norm_phi > 2 or norm_phi < 0: raise ValueError( f"The value of norm_phi should be between in (0, 2) (it is now {norm_phi})." ) if norm_phi > 1: return 2 - norm_phi exp_abs = norm_phi * (num_candidates * (num_candidates - 1)) / 4 low = 0 high = 1 while low <= high: mid = (high + low) / 2 cur = _calculate_expected_number_swaps(num_candidates, mid) if abs(cur - exp_abs) < 1e-5: return mid # If x is greater, ignore left half if cur < exp_abs: low = mid # If x is smaller, ignore right half elif cur > exp_abs: high = mid raise ValueError( "Something went wrong when computing phi, we should not have ended up here." )
def theoretical_distribution( num_candidates: int, phi: float, normalise_phi: bool = False, rankings: Iterable[tuple[int]] = None, ) -> dict: validate_int(num_candidates, lower_bound=0) if rankings is None: rankings = all_rankings(num_candidates) distribution = {} if normalise_phi: phi = phi_from_norm_phi(num_candidates, phi) central_ranking = tuple(range(num_candidates)) for ranking in rankings: distribution[ranking] = phi ** kendall_tau_distance(central_ranking, ranking) normaliser = sum(distribution.values()) for r in distribution: distribution[r] /= normaliser return distribution