Skip to content
190 changes: 96 additions & 94 deletions docs/tutorials/randomized_benchmarking.ipynb

Large diffs are not rendered by default.

41 changes: 35 additions & 6 deletions qiskit_experiments/curve_analysis/curve_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
FitOptions,
)
from qiskit_experiments.curve_analysis.curve_fit import multi_curve_fit
from qiskit_experiments.curve_analysis.data_processing import multi_mean_xy_data, data_sort
from qiskit_experiments.curve_analysis.visualization import FitResultPlotters, PlotterStyle
from qiskit_experiments.data_processing import DataProcessor
from qiskit_experiments.data_processing.exceptions import DataProcessorError
Expand Down Expand Up @@ -199,6 +200,8 @@ class AnalysisExample(CurveAnalysis):
- Customize pre-data processing:
Override :meth:`~self._format_data`. For example, here you can apply smoothing
to y values, remove outlier, or apply filter function to the data.
By default, data is sorted by x values and the measured values at the same
x value are averaged.

- Create extra data from fit result:
Override :meth:`~self._extra_database_entry`. You need to return a list of
Expand Down Expand Up @@ -448,7 +451,6 @@ def _format_data(self, data: CurveData) -> CurveData:
"""An optional subroutine to perform data pre-processing.

Subclasses can override this method to apply pre-precessing to data values to fit.
Otherwise the analysis uses extracted data values as-is.

For example,

Expand All @@ -458,20 +460,42 @@ def _format_data(self, data: CurveData) -> CurveData:

etc...

By default, the analysis just takes average over the same x values and sort
data index by the x values in ascending order.

.. note::

The data returned by this method should have the label "fit_ready".

Returns:
Formatted CurveData instance.
"""
# take average over the same x value by keeping sigma
series, xdata, ydata, sigma, shots = multi_mean_xy_data(
series=data.data_index,
xdata=data.x,
ydata=data.y,
sigma=data.y_err,
shots=data.shots,
method="shots_weighted",
)

# sort by x value in ascending order
series, xdata, ydata, sigma, shots = data_sort(
series=series,
xdata=xdata,
ydata=ydata,
sigma=sigma,
shots=shots,
)

return CurveData(
label="fit_ready",
x=data.x,
y=data.y,
y_err=data.y_err,
data_index=data.data_index,
metadata=data.metadata,
x=xdata,
y=ydata,
y_err=sigma,
shots=shots,
data_index=series,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happened to the metadata?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

metadata is just removed from final data set because metadata cannot be averaged.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently it is only used by RB to compute EPG. Probably we need refactoring for this in another PR.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make sure that this PR doesn't break computation of EPG for RB

Copy link
Copy Markdown
Collaborator Author

@nkanazawa1989 nkanazawa1989 Oct 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That should be okey because this data set is not used to compute EPG. There is another data set with different label to compute it (one before taking average).

)

# pylint: disable=unused-argument
Expand Down Expand Up @@ -566,6 +590,9 @@ def _is_target_series(datum, **filters):
# Store metadata
metadata = np.asarray([datum["metadata"] for datum in data], dtype=object)

# Store shots
shots = np.asarray([datum.get("shots", np.nan) for datum in data])

# Format data
x_values = np.asarray(x_values, dtype=float)
y_values = np.asarray(y_values, dtype=float)
Expand All @@ -585,6 +612,7 @@ def _is_target_series(datum, **filters):
x=x_values,
y=y_values,
y_err=y_sigmas,
shots=shots,
data_index=data_index,
metadata=metadata,
)
Expand Down Expand Up @@ -733,6 +761,7 @@ def _data(
x=data.x[locs],
y=data.y[locs],
y_err=data.y_err[locs],
shots=data.shots[locs],
data_index=idx,
metadata=data.metadata[locs] if data.metadata is not None else None,
)
Expand Down
3 changes: 3 additions & 0 deletions qiskit_experiments/curve_analysis/curve_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ class CurveData:
# Error bar
y_err: np.ndarray

# Shots number
shots: np.ndarray

# Maping of data index to series index
data_index: Union[np.ndarray, int]

Expand Down
123 changes: 111 additions & 12 deletions qiskit_experiments/curve_analysis/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]:


def mean_xy_data(
xdata: np.ndarray, ydata: np.ndarray, sigma: Optional[np.ndarray] = None, method: str = "sample"
xdata: np.ndarray,
ydata: np.ndarray,
sigma: Optional[np.ndarray] = None,
shots: Optional[np.ndarray] = None,
method: str = "sample",
) -> Tuple[np.ndarray, ...]:
r"""Return (x, y_mean, sigma) data.

Expand All @@ -61,41 +65,58 @@ def mean_xy_data(
* ``"iwv"`` *Inverse-weighted variance*
:math:`\overline{y} = (\sum_{i=1}^N y_i / \sigma_i^2 ) \sigma^2`
:math:`\sigma^2 = 1 / (\sum_{i=1}^N 1 / \sigma_i^2)`
* ``"shots_weighted_variance"`` *Sample mean and variance with weights from shots*
:math:`\overline{y} = \sum_{i=1}^N n_i y_i / M`,
:math:`\sigma^2 = \sum_{i=1}^N (n_i \sigma_i / M)^2`,
where :math:`n_i` is the number of shots per data point and :math:`M = \sum_{i=1}^N n_i`
is a total number of shots from different circuit execution at the same x value.
If ``shots`` is not provided, this applies uniform weights to all values.

Args
xdata: 1D or 2D array of xdata from curve_fit_data or
multi_curve_fit_data
ydata: array of ydata returned from curve_fit_data or
multi_curve_fit_data
sigma: Optional, array of standard deviations in ydata.
shots: Optional, array of shots used to get a data point.
method: The method to use for computing y means and
standard deviations sigma (default: "sample").

Returns:
tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where
tuple: ``(x, y_mean, sigma, shots)``, where
``x`` is an arrays of unique x-values, ``y`` is an array of
sample mean y-values, and ``sigma`` is an array of sample standard
deviation of y values.
sample mean y-values, ``sigma`` is an array of sample standard
deviation of y values, and ``shots`` are the total number of experiment shots
used to evaluate the data point. If ``shots`` in the function call is ``None``,
the numbers appear in the returned value will represent just a number of
duplicated x value entries.

Raises:
QiskitError: if "ivw" method is used without providing a sigma.
"""
x_means = np.unique(xdata, axis=0)
y_means = np.zeros(x_means.size)
y_sigmas = np.zeros(x_means.size)
y_shots = np.zeros(x_means.size)

if shots is None or any(np.isnan(shots)):
# this will become standard average
shots = np.ones_like(xdata)

# Sample mean and variance method
if method == "sample":
for i in range(x_means.size):
# Get positions of y to average
idxs = xdata == x_means[i]
ys = ydata[idxs]
ns = shots[idxs]

# Compute sample mean and biased sample variance
y_means[i] = np.mean(ys)
y_sigmas[i] = np.sqrt(np.mean((y_means[i] - ys) ** 2))
y_shots[i] = np.sum(ns)

return x_means, y_means, y_sigmas
return x_means, y_means, y_sigmas, y_shots

# Inverse-weighted variance method
if method == "iwv":
Expand All @@ -107,14 +128,33 @@ def mean_xy_data(
# Get positions of y to average
idxs = xdata == x_means[i]
ys = ydata[idxs]
ns = shots[idxs]

# Compute the inverse-variance weighted y mean and variance
weights = 1 / sigma[idxs] ** 2
y_var = 1 / np.sum(weights)
y_means[i] = y_var * np.sum(weights * ys)
y_sigmas[i] = np.sqrt(y_var)
y_shots[i] = np.sum(ns)

return x_means, y_means, y_sigmas, y_shots

# Quadrature sum of variance
if method == "shots_weighted":
for i in range(x_means.size):
# Get positions of y to average
idxs = xdata == x_means[i]
ys = ydata[idxs]
ss = sigma[idxs]
ns = shots[idxs]
weights = ns / np.sum(ns)

# Compute sample mean and sum of variance with weights based on shots
y_means[i] = np.sum(weights * ys)
y_sigmas[i] = np.sqrt(np.sum(weights ** 2 * ss ** 2))
y_shots[i] = np.sum(ns)

return x_means, y_means, y_sigmas
return x_means, y_means, y_sigmas, y_shots

# Invalid method
raise QiskitError(f"Unsupported method {method}")
Expand All @@ -125,40 +165,99 @@ def multi_mean_xy_data(
xdata: np.ndarray,
ydata: np.ndarray,
sigma: Optional[np.ndarray] = None,
shots: Optional[np.ndarray] = None,
method: str = "sample",
):
r"""Return (series, x, y_mean, sigma) data.
Performs `mean_xy_data` for each series
and returns the concatenated results
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Take mean of multi series data set.

Args:
series: Series index.
xdata: 1D or 2D array of xdata from curve_fit_data or
multi_curve_fit_data
ydata: array of ydata returned from curve_fit_data or
multi_curve_fit_data
sigma: Optional, array of standard deviations in ydata.
shots: Optional, array of shots used to get a data point.
method: The method to use for computing y means and
standard deviations sigma (default: "sample").

Returns:
Tuple of (series, xdata, ydata, sigma, shots)

See also:
:py:func:`~qiskit_experiments.curve_analysis.data_processing.mean_xy_data`
"""
series_vals = np.unique(series)

series_means = []
xdata_means = []
ydata_means = []
sigma_means = []
shots_sums = []

# Get x, y, sigma data for series and process mean data
for series_val in series_vals:
idxs = series == series_val
sigma_i = sigma[idxs] if sigma is not None else None
x_mean, y_mean, sigma_mean = mean_xy_data(
xdata[idxs], ydata[idxs], sigma=sigma_i, method=method
shots_i = shots[idxs] if shots is not None else None

x_mean, y_mean, sigma_mean, shots_sum = mean_xy_data(
xdata[idxs], ydata[idxs], sigma=sigma_i, shots=shots_i, method=method
)
series_means.append(np.full(x_mean.size, series_val, dtype=int))
xdata_means.append(x_mean)
ydata_means.append(y_mean)
sigma_means.append(sigma_mean)
shots_sums.append(shots_sum)

# Concatenate lists
return (
np.concatenate(series_means),
np.concatenate(xdata_means),
np.concatenate(ydata_means),
np.concatenate(sigma_means),
np.concatenate(shots_sums),
)


def data_sort(
series: np.ndarray,
xdata: np.ndarray,
ydata: np.ndarray,
sigma: Optional[np.ndarray] = None,
shots: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Sort data.

Input x values may not be lined up in order, since experiment may accept user input array,
or data may be concatenated with previous scan. This sometimes confuses the algorithmic
generation of initial guesses especially when guess depends on derivative.

This returns data set that is sorted by xdata and series in ascending order.

Args:
series: Series index.
xdata: 1D or 2D array of xdata from curve_fit_data or
multi_curve_fit_data
ydata: array of ydata returned from curve_fit_data or
multi_curve_fit_data
sigma: Optional, array of standard deviations in ydata.
shots: Optional, array of shots used to get a data point.

Returns:
Tuple of (series, xdata, ydata, sigma, shots) sorted in ascending order of xdata and series.
"""
if sigma is None:
sigma = np.full(series.size, np.nan, dtype=float)

if shots is None:
shots = np.full(series.size, np.nan, dtype=float)

sorted_data = sorted(zip(series, xdata, ydata, sigma, shots), key=lambda d: (d[0], d[1]))

return np.asarray(sorted_data).T


def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float, float]:
"""Return the outcome probability mean and variance.

Expand Down
10 changes: 1 addition & 9 deletions qiskit_experiments/curve_analysis/guess.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from scipy import signal

from qiskit_experiments.exceptions import AnalysisError
from .data_processing import mean_xy_data


def frequency(
Expand All @@ -48,6 +47,7 @@ def frequency(

This function returns always positive frequency.
This function is sensitive to the DC offset.
This function assumes sorted, no-overlapping x values.

Args:
x: Array of x values.
Expand All @@ -58,14 +58,6 @@ def frequency(
Returns:
Frequency estimation of oscillation signal.
"""
# TODO averaging and sort should be done by data processor

# average the same data points
x, y, _ = mean_xy_data(x, y)

# sorted by x to be monotonic increase
x, y = np.asarray(sorted(zip(x, y), key=lambda xy: xy[0])).T

# to run FFT x interval should be identical
sampling_interval = np.unique(np.round(np.diff(x), decimals=20))

Expand Down
Loading