Source code for quara.qcircuit.data_generator

import numpy as np
from typing import List, Tuple

from quara.settings import Settings


def _random_number_to_data(probdist: np.ndarray, random_number: np.float64) -> int:
    cumulative_sum = 0.0
    for index, prob in enumerate(probdist):
        cumulative_sum += prob
        if random_number < cumulative_sum:
            return index
    return len(probdist) - 1


[docs]def generate_data_from_prob_dist( prob_dist: np.ndarray, data_num: int, seed: int = None, atol: float = None ) -> List[int]: """generates random data from a probability distribution. the data is a sequence (list) of measurement outcomes. measurement outcomes are integers. ``0 <= each measurement outcomes < len(probdist)``. length of the data equals ``data_num``. Parameters ---------- prob_dist : np.ndarray a probability distribution used to generate random data. data_num : int length of the data. seed : int, optional a seed used to generate random data, by default None. atol : float, optional the absolute tolerance parameter, uses :func:`~quara.settings.Settings.get_atol` by default. checks ``absolute(the sum of probabilities - 1) <= atol`` in this function. Returns ------- List[int] generated data. Raises ------ ValueError each probability is not a positive number. ValueError the sum of probabilities does not equal 1. """ # whether each probability is a positive number. for prob in prob_dist: if prob < 0: raise ValueError( f"each probability must be a positive number. there is {prob} in a probability distribution" ) # whether the sum of probabilities equals 1. sum_prob_dist = np.sum(prob_dist) atol = atol if atol else Settings.get_atol() if not np.isclose(sum_prob_dist, 1, atol=atol, rtol=0.0): raise ValueError( f"the sum of probabilities must equal 1. the sum of probabilities is {np.sum(prob_dist)}" ) if seed is not None: np.random.seed(seed) # generate random numbers. 0 <= rand_val[i] < 1 for all i = 0,..., num_data - 1 rand_val = np.random.rand(data_num) # use np.frompyfunc to apply the function '_random_number_to_data' to np.ndarray def curried_random_number_to_data(random_number): return _random_number_to_data(prob_dist, random_number) _random_number_to_data_func = np.frompyfunc(curried_random_number_to_data, 1, 1) return _random_number_to_data_func(rand_val).tolist()
[docs]def generate_dataset_from_prob_dists( prob_dists: List[np.ndarray], data_nums: List[int], seeds: List[int] = None, ) -> List[List[int]]: """generates random dataset from probability distributions. the dataset is a list of data generated by :func:`~quara.qcircuit.data_generator.generate_data_from_probdist` Parameters ---------- prob_dists : List[np.ndarray] a list of probdist. data_nums : List[int] a list of data_num. seeds : List[int], optional a list of seed, by default None Returns ------- List[List[int]] generated dataset. Raises ------ ValueError the length of ``prob_dists`` does not equal the length of ``data_nums``. ValueError ``seeds`` is not None and the length of ``prob_dists`` does not equal the length of ``seeds``. """ # whether the length of prob_dists equals the length of data_nums. if len(prob_dists) != len(data_nums): raise ValueError( f"the length of prob_dists must equal the length of data_nums. the length of prob_dists is {len(prob_dists)}. the length of data_nums is {len(data_nums)}" ) # whether the length of prob_dists equals the length of seeds. if seeds is not None: if len(prob_dists) != len(seeds): raise ValueError( f"the length of prob_dists must equal the length of seeds. the length of prob_dists is {len(prob_dists)}. the length of seeds is {len(seeds)}" ) dataset = [] for index, (prob_dist, data_num) in enumerate(zip(prob_dists, data_nums)): seed = None if seeds is None else seeds[index] data = generate_data_from_prob_dist(prob_dist, data_num, seed) dataset.append(data) return dataset
[docs]def calc_empi_dist_sequence( measurement_num: int, data: List[int], num_sums: List[int] ) -> List[Tuple[int, np.ndarray]]: """calculates empirical distributions. uses ``data`` from 0-th to ``num_sums[index]``-th to calculate empirical distributions. Parameters ---------- measurement_num : int number of measurements. data : List[int] data of measurement outcomes. num_sums : List[int] a list of the range of ``data`` to calculate empirical distributions. Returns ------- List[Tuple[int, np.ndarray]] a list of (the range of ``data``, empirical distribution). the dtype of each empirical distribution is np.float64. Raises ------ ValueError ``measurement_num`` is not non-negative integer. ValueError there is an element of ``num_sums`` that is not less than or equal to length of ``data``. ValueError there is an element of ``data`` that is not non-negative and less than ``measurement_num``. ValueError ``num_sums`` is not an increasing sequence. Examples -------- >>> measurement_num = 2 >>> data = [1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1] >>> num_sums = [5, 10, 20] >>> empi_dist = calc_empi_dist_sequence(measurement_num, data, num_sums) >>> empi_dist [(5, array([0.4, 0.6])), (10, array([0.3, 0.7])), (20, array([0.3, 0.7]))] """ # whether measurement_num is non-negative integer. if measurement_num < 0: raise ValueError( f"measurement_num must be non-negative integer. measurement_num is {measurement_num}" ) empi_dists = [] cumulative_frequency = np.zeros((measurement_num), dtype=int) # take next num_sum from 'num_sums' if len(num_sums) == 0: return empi_dists next_num_sum = num_sums[0] next_num_sum_position = 0 former_num_sum = 0 # whether each number of num_sums is less than or equal to length of data. if next_num_sum > len(data): raise ValueError( f"each number of num_sums must be less than or equal to length of data. num_sums of index {next_num_sum_position} is {next_num_sum}. length of data is {len(data)}" ) for index, d in enumerate(data): # whether 0 <= d < 'measurement_num'. if not 0 <= d < measurement_num: raise ValueError( f"for each data d, it must be 0 <= d < 'measurement_num'. data of index {index} is {d}" ) cumulative_frequency[d] += 1 # calculate empirical distribution if index + 1 == next_num_sum: empidist = cumulative_frequency / (index + 1) empi_dists.append((next_num_sum, empidist)) # take next num_sum from 'num_sums' if next_num_sum_position + 1 == len(num_sums): # end of 'num_sums' return empi_dists else: former_num_sum = next_num_sum next_num_sum_position += 1 next_num_sum = num_sums[next_num_sum_position] # whether each number of num_sums is less than or equal to length of data. if next_num_sum > len(data): raise ValueError( f"each number of num_sums must be less than or equal to length of data. num_sums of index {next_num_sum_position} is {next_num_sum}" ) # whether num_sums must be an increasing sequence. if former_num_sum >= next_num_sum: raise ValueError( f"num_sums must be an increasing sequence. num_sums contains the following subsequence: {former_num_sum}, {next_num_sum}" ) return empi_dists
[docs]def calc_empi_dists_sequence( measurement_nums: List[int], dataset: List[List[int]], list_num_sums: List[List[int]], ) -> List[List[Tuple[int, np.ndarray]]]: """calculates a sequence of empirical distributions by :func:`~quara.qcircuit.data_generator.calc_empidist`. Parameters ---------- measurement_nums : List[int] a list of measurement_num dataset : List[List[int]] a dataset list_num_sums : List[List[int]] a list of num_sums Returns ------- List[List[np.ndarray]] a sequence of empirical distributions. Raises ------ ValueError the length of ``measurement_nums`` does not equal the length of ``dataset``. ValueError the length of ``measurement_nums`` does not equal the length of ``list_llist_num_sumsist_num_sum``. """ # whether the length of measurement_nums equals the length of dataset. if len(measurement_nums) != len(dataset): raise ValueError( f"the length of measurement_nums must equal the length of dataset. the length of measurement_nums is {len(measurement_nums)}. the length of dataset is {len(dataset)}" ) # whether the length of measurement_nums equals the length of list_num_sums. if len(measurement_nums) != len(list_num_sums): raise ValueError( f"the length of measurement_nums must equal the length of list_num_sums. the length of measurement_nums is {len(measurement_nums)}. the length of list_num_sums is {len(list_num_sums)}" ) empi_dists_sequence = [] for measurement_num, data, num_sums in zip( measurement_nums, dataset, list_num_sums ): empi_dists = calc_empi_dist_sequence(measurement_num, data, num_sums) empi_dists_sequence.append(empi_dists) return empi_dists_sequence