# Source code for prefsampling.ordinal.mallows

```
"""
Mallows's model is a sampling model parameterised by a central ranking. The probability of
generating a given ranking is then exponential in the distance between the ranking and the central
ranking.
"""
from __future__ import annotations
from collections.abc import Iterable
import numpy as np
from prefsampling.combinatorics import kendall_tau_distance, all_rankings
from prefsampling.inputvalidators import validate_num_voters_candidates, validate_int
from prefsampling.ordinal import impartial
[docs]
@validate_num_voters_candidates
def mallows(
num_voters: int,
num_candidates: int,
phi: float,
normalise_phi: bool = False,
central_vote: np.ndarray = None,
impartial_central_vote: bool = False,
seed: int = None,
) -> list[list[int]]:
"""
Generates votes according to Mallows' model (`Mallows, 1957
<https://www.jstor.org/stable/2333244>`_). This model is parameterised by a central vote. The
probability of generating a given decreases exponentially with the distance between the vote
and the central vote.
Specifically, the probability of generating a vote is proportional to `phi ** distance` where
`phi` is a dispersion coefficient (in [0, 1]) and `distance` is the Kendall-Tau distance between
the central vote and the vote under consideration. A set of `num_voters` vote is generated
independently and identically following this process.
The `phi` coefficient controls the dispersion of the votes: values close to 0 render votes that
are far away from the central vote unlikely to be generated; and the opposite for values close
to 1. Depending on the application, it can be advised to normalise the value of `phi`
(especially when comparing different values for `phi`), see `Boehmer, Faliszewski and Kraiczy
(2023) <https://proceedings.mlr.press/v202/boehmer23b.html>`_ for more details. Use
:code:`normalise_phi = True` to do so.
For an analogous sampler generating approval ballots, see
:py:func:`~prefsampling.approval.noise.noise`.
Parameters
----------
num_voters : int
Number of Voters.
num_candidates : int
Number of Candidates.
phi : float
The dispersion coefficient.
normalise_phi : bool, default: :code:`False`
Indicates whether phi should be normalised (see `Boehmer, Faliszewski and Kraiczy (2023)
<https://proceedings.mlr.press/v202/boehmer23b.html>`_)
central_vote : np.ndarray, default: :code:`np.arrange(num_candidates)`
The central vote. Ignored if :code:`impartial_central_vote = True`.
impartial_central_vote: bool, default: :code:`False`
If true, the central vote is sampled from :py:func:`~prefsampling.ordinal.impartial`.
seed : int, default: :code:`None`
Seed for numpy random number generator.
Returns
-------
list[list[int]]
Ordinal votes.
Examples
--------
.. testcode::
from prefsampling.ordinal import mallows
# Sample from a Mallows' model with 2 voters and 3 candidates, the parameter phi is 0.6
mallows(2, 3, 0.6)
# For reproducibility, you can set the seed.
mallows(2, 3, 1, seed=1002)
# Parameter phi should be in [0, 1]
try:
mallows(2, 3, -0.5)
except ValueError:
pass
try:
mallows(2, 3, 1.2)
except ValueError:
pass
Validation
----------
The probability distribution derived from Mallows' model is well known.
Specifically, given :math:`n` agents and :math:`m` candidates, a parameter :math:`\\phi`
and a central ranking :math:`\\succ_c`, the probability of generating a ranking
:math:`\\succ` is equal to:
.. math::
\\phi^{d(\\succ, \\succ_c)} \\times
\\frac{1}{\\prod_{j=1}^m \\frac{1 - \\phi^j}{1 - \\phi}}
where :math:`d(\\succ, \\succ_c)` is the kendall-tau distance between the ranking and the
central ranking.
We test that the observed frequencies of rankings aligns with the theoretical probability
distribution. The fact that the normalisation of phi does not seem to impact the figure
is due to the small number of candidates that reduces the distance between phi and its
normalised value.
.. image:: ../validation_plots/ordinal/mallows_0_1.png
:width: 800
:alt: Observed versus theoretical frequencies for a Mallows model with phi=0.1
.. image:: ../validation_plots/ordinal/mallows_0_5.png
:width: 800
:alt: Observed versus theoretical frequencies for a Mallows model with phi=0.5
.. image:: ../validation_plots/ordinal/mallows_0_8.png
:width: 800
:alt: Observed versus theoretical frequencies for a Mallows model with phi=0.8
When :code:`phi` is equal to 1, we are supposed to observe a uniform distribution over all
rankings.
.. image:: ../validation_plots/ordinal/mallows_1_0.png
:width: 800
:alt: Observed versus theoretical frequencies for a Mallows model with phi=1.0
References
----------
`Non-null ranking models
<https://www.jstor.org/stable/2333244>`_,
*Colin Lingwood Mallows*,
Biometrica, 44:114–130, 1957.
`Properties of the Mallows model depending on the number of alternatives: A warning for an
experimentalist.
<https://proceedings.mlr.press/v202/boehmer23b/boehmer23b.pdf>`_,
*Niclas Boehmer, Piotr Faliszewski and Sonja Kraiczy*,
Proceedings of the International Conference on Machine Learning, 2023.
"""
if phi < 0 or 1 < phi:
raise ValueError(f"Incorrect value of phi: {phi}. Value should be in [0, 1]")
if normalise_phi:
phi = phi_from_norm_phi(num_candidates, phi)
rng = np.random.default_rng(seed)
if impartial_central_vote:
central_vote = impartial(1, num_candidates, seed=seed)[0]
insert_distributions = [
_insert_prob_distr(i, phi) for i in range(1, num_candidates)
]
votes = []
for i in range(num_voters):
vote = _mallows_vote(num_candidates, insert_distributions, rng=rng)
if central_vote is not None:
vote = [central_vote[i] for i in vote]
votes.append(vote)
return votes
[docs]
@validate_num_voters_candidates
def norm_mallows(
num_voters: int,
num_candidates: int,
norm_phi: float,
central_vote: np.ndarray = None,
impartial_central_vote: bool = False,
seed: int = None,
) -> list[list[int]]:
"""
Shortcut for the function :py:func:`~prefsampling.ordinal.mallows` with
:code:`normalise_phi = True`.
"""
if norm_phi < 0 or 1 < norm_phi:
raise ValueError(
f"Incorrect value of normphi: {norm_phi}. Value should be in [0,1]"
)
return mallows(
num_voters,
num_candidates,
norm_phi,
normalise_phi=True,
central_vote=central_vote,
impartial_central_vote=impartial_central_vote,
seed=seed,
)
def _insert_prob_distr(position: int, phi: float) -> np.ndarray:
"""
Computes the insertion probability distribution for a given position and a given dispersion
coefficient.
Parameters
----------
position: int
The position in the ranking
phi: float
The dispersion parameter
Returns
-------
np.ndarray
The probability distribution.
"""
distribution = np.zeros(position + 1)
for j in range(position + 1):
distribution[j] = phi ** (position - j)
return distribution / distribution.sum()
def _mallows_vote(
num_candidates: int,
insert_distributions: list[np.ndarray],
rng: np.random.Generator,
) -> list[int]:
"""
Samples a vote according to Mallows' model.
Parameters
----------
num_candidates: int
Number of candidates
insert_distributions: list[np.ndarray]
A list of np.ndarray representing the insert probability distributions
rng: np.random.Generator
The numpy random generator to use for randomness.
Returns
-------
np.ndarray
The vote.
"""
vote = [0]
for j in range(1, num_candidates):
insert_distribution = insert_distributions[j - 1]
index = rng.choice(range(len(insert_distribution)), p=insert_distribution)
vote.insert(index, j)
return vote
def _calculate_expected_number_swaps(num_candidates: int, phi: float) -> float:
"""
Computes the expected number of swaps in a vote sampled from Mallows' model.
Parameters
----------
num_candidates: int
The number of candidates
phi: float
The dispersion coefficient of the Mallows' model
Returns
-------
float
The expected number of swaps
"""
res = phi * num_candidates / (1 - phi)
for j in range(1, num_candidates + 1):
res += (j * (phi**j)) / ((phi**j) - 1)
return res
[docs]
def phi_from_norm_phi(num_candidates: int, norm_phi: float) -> float:
"""
Computes an approximation of the dispersion coefficient of a Mallows' model based on its
normalised coefficient (`norm_phi`).
Parameters
----------
num_candidates: int
The number of candidates
norm_phi: float
The normalised dispersion coefficient of the Mallows' model
Returns
-------
float
The (non-normalised) dispersion coefficient of the Mallows' model
"""
if norm_phi == 1:
return 1
if norm_phi > 2 or norm_phi < 0:
raise ValueError(
f"The value of norm_phi should be between in (0, 2) (it is now {norm_phi})."
)
if norm_phi > 1:
return 2 - norm_phi
exp_abs = norm_phi * (num_candidates * (num_candidates - 1)) / 4
low = 0
high = 1
while low <= high:
mid = (high + low) / 2
cur = _calculate_expected_number_swaps(num_candidates, mid)
if abs(cur - exp_abs) < 1e-5:
return mid
# If x is greater, ignore left half
if cur < exp_abs:
low = mid
# If x is smaller, ignore right half
elif cur > exp_abs:
high = mid
raise ValueError(
"Something went wrong when computing phi, we should not have ended up here."
)
def theoretical_distribution(
num_candidates: int,
phi: float,
normalise_phi: bool = False,
rankings: Iterable[tuple[int]] = None,
) -> dict:
validate_int(num_candidates, lower_bound=0)
if rankings is None:
rankings = all_rankings(num_candidates)
distribution = {}
if normalise_phi:
phi = phi_from_norm_phi(num_candidates, phi)
central_ranking = tuple(range(num_candidates))
for ranking in rankings:
distribution[ranking] = phi ** kendall_tau_distance(central_ranking, ranking)
normaliser = sum(distribution.values())
for r in distribution:
distribution[r] /= normaliser
return distribution
```