Source code for prefsampling.approval.noise

"""
Noise models are sampling procedures parameterised by a central vote and in which the probability
of generating a given vote is dependent on its distance to the central vote.
"""

from __future__ import annotations

from collections.abc import Iterable
from enum import Enum

import numpy as np

from prefsampling.approval.utils import validate_or_generate_central_vote
from prefsampling.inputvalidators import validate_num_voters_candidates
from prefsampling.combinatorics import comb, powerset


[docs] class SetDistance(Enum): """ Constants representing the different types of noise that can be applied to the noise sampler. """ HAMMING = "Hamming distance" """ Hamming distance between s1 and s2, i.e., the number of elements that are either only in s1 or only in s2. """ JACCARD = "Jaccard distance" """ Jaccard distance between s1 and s2, i.e., the Hamming distance divided by the size of the union of s1 and s2. """ ZELINKA = "Zelinka distance" """ Zelinka distance between s1 and s2, i.e., size of the largest set between s1 and s2 minus the size of the intersection. """ BUNKE_SHEARER = "Bunke-Shearer distance" """ Bunke-Shearer distance between s1 and s2, i.e., the Zelinka distance divided by size of the largest set between s1 and s2. """
class DistanceInfiniteError(ValueError): """ Exception thrown when the distance between two points is infinite., typically when dividing by 0 for the Bunke-Shearer or the Jaccard distances. """ pass def _compute_distance( distance: SetDistance, size_1: int, size_2: int, size_intersection: int ): if distance == SetDistance.HAMMING: return size_1 + size_2 - 2 * size_intersection if distance == SetDistance.JACCARD: size_union = size_1 + size_2 - size_intersection if size_union == 0: raise DistanceInfiniteError return 1 - size_intersection / size_union if distance == SetDistance.ZELINKA: return max(size_1, size_2) - size_intersection if distance == SetDistance.BUNKE_SHEARER: largest_size = max(size_1, size_2) if largest_size == 0: raise DistanceInfiniteError else: return 1 - size_intersection / largest_size raise ValueError( "The `distance` argument needs to be one of the constant defined in the " "approval.SetDistance enumeration. Choices are: " + ", ".join(str(s) for s in SetDistance) )
[docs] @validate_num_voters_candidates def noise( num_voters: int, num_candidates: int, phi: float, rel_size_central_vote: float, distance: SetDistance = SetDistance.HAMMING, central_vote: set = None, impartial_central_vote: bool = False, seed: int = None, ) -> list[set]: """ Generates approval votes under the noise model. This model is parameterised by a central vote. Approval ballots are then generated based on their distance to the central vote. Specifically, a vote is generated with probability :code:`phi` to the power distance between the vote and the central vote. A collection of `num_voters` vote is generated independently and identically following the process described above. For an analogous sampler generating ordinal ballots, see :py:func:`~prefsampling.ordinal.mallows.mallows`. Parameters ---------- num_voters : int Number of Voters. num_candidates : int Number of Candidates. phi : float Noise model parameter, denoting the noise. rel_size_central_vote: float The relative size of the central vote, if no central vote is provided, the central vote is selected uniformly at random among all subsets of candidates of size `⌊rel_size_central_vote * num_candidates⌋`. distance : :py:class:`~prefsampling.approval.noise.SetDistance`, default: :py:const:`~prefsampling.approval.noise.SetDistance.HAMMING` Distance used to compare a given vote to the central vote. central_vote : set The central vote. Ignored if :code:`impartial_central_vote = True`. impartial_central_vote: bool, default: :code:`False` If true, the central vote is sampled from :py:func:`~prefsampling.approval.impartial` with the same value for the parameter :code:`p` as passed to this sampler. seed : int, default: :code:`None` Seed for numpy random number generator. Returns ------- list[set] Approval votes. Examples -------- .. testcode:: from prefsampling.approval import noise, SetDistance # Sample a profile from the noise model with 2 voters and 3 candidates and parameters # phi = 0.5, p = 0.2, default distance is SetDistance.HAMMING noise(2, 3, 0.5, 0.2) # You can give a specific distance noise(2, 3, 0.5, 0.2, distance=SetDistance.JACCARD) # For reproducibility, you can set the seed. noise(2, 3, 0.5, 0.2, seed=157) # Parameter phi needs to be in [0, 1] try: noise(2, 3, 1.2, 0.2) except ValueError: pass try: noise(2, 3, -0.2, 0.2) except ValueError: pass # Parameter p needs to be in [0, 1] try: noise(2, 3, 0.5, 1.2) except ValueError: pass try: noise(2, 3, 0.5, -0.2) except ValueError: pass Validation ---------- .. image:: ../validation_plots/approval/noise_0_25.png :width: 800 :alt: Observed versus theoretical frequencies for a noise model with phi=0.25 .. image:: ../validation_plots/approval/noise_0_5.png :width: 800 :alt: Observed versus theoretical frequencies for a noise model with phi=0.5 .. image:: ../validation_plots/approval/noise_0_75.png :width: 800 :alt: Observed versus theoretical frequencies for a noise model with phi=0.75 When :code:`phi` is equal to 0, then a single approval ballot should receive all the probability mass. .. image:: ../validation_plots/approval/noise_0_0.png :width: 800 :alt: Observed versus theoretical frequencies for a noise model with phi=0 When :code:`phi` is equal to 1, then we are supposed to obtain a uniform distribution over all approval ballots. .. image:: ../validation_plots/approval/noise_1_0.png :width: 800 :alt: Observed versus theoretical frequencies for a noise model with phi=1 References ---------- `Evaluating Approval-Based Multiwinner Voting in Terms of Robustness to Noise <https://www.ijcai.org/proceedings/2020/11>`_, *Ioannis Caragiannis, Christos Kaklamanis, Nikos Karanikolas and George A. Krimpas*, Proceedings of the International Joint Conference on Artificial Intelligence, 2020. `How to Sample Approval Elections? <https://www.ijcai.org/proceedings/2022/71>`_, *Stanisław Szufa, Piotr Faliszewski, Łukasz Janeczko, Martin Lackner, Arkadii Slinko, Krzysztof Sornat and Nimrod Talmon*, Proceedings of the International Joint Conference on Artificial Intelligence, 2022. """ if phi < 0 or 1 < phi: raise ValueError(f"Incorrect value of phi: {phi}. Value should be in [0,1]") if rel_size_central_vote < 0 or 1 < rel_size_central_vote: raise ValueError( f"Incorrect value of p: {rel_size_central_vote}. Value should be in [0,1]" ) if isinstance(distance, Enum): distance = SetDistance(distance.value) else: distance = SetDistance(distance) rng = np.random.default_rng(seed) central_vote = tuple( validate_or_generate_central_vote( num_candidates, rel_size_central_vote, central_vote, impartial_central_vote, seed, ) ) size_central_vote = len(central_vote) central_non_vote = tuple(j for j in range(num_candidates) if j not in central_vote) size_central_non_vote = len(central_non_vote) choices = [] probabilities = [] # Prepare buckets for num_central in range(size_central_vote + 1): num_options_in = comb(size_central_vote, num_central) for num_non_central in range(size_central_non_vote + 1): num_options_out = comb(size_central_non_vote, num_non_central) try: exponent = _compute_distance( distance, size_central_vote, num_central + num_non_central, num_central, ) factor = phi**exponent except DistanceInfiniteError: factor = int(phi == 0) num_options = num_options_in * num_options_out * factor choices.append((num_central, num_non_central)) probabilities.append(num_options) denominator = sum(probabilities) probabilities = [p / denominator for p in probabilities] # Sample Votes votes = [] for _ in range(num_voters): num_central, num_non_central = rng.choice(choices, p=probabilities) vote = set(rng.choice(central_vote, num_central, replace=False)) vote.update(set(rng.choice(central_non_vote, num_non_central, replace=False))) votes.append(vote) return votes
def theoretical_distribution( num_candidates: int, phi: float, distance: SetDistance, rel_size_central_vote: float, central_vote: set = None, subsets: Iterable[set[int]] = None, ) -> dict: if subsets is None: subsets = powerset(range(num_candidates)) central_vote = validate_or_generate_central_vote( num_candidates, rel_size_central_vote, central_vote, False ) res = { o: phi ** _compute_distance( distance, len(central_vote), len(o), len(central_vote.intersection(o)) ) for o in subsets } denominator = sum(res.values()) for o in res: res[o] /= denominator return res