diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 157db0cb5d..d3be004436 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -134,8 +134,40 @@ jobs: python -m pip install cibuildwheel==2.2.2 - name: Build wheels env: - CIBW_BEFORE_ALL: "yum install -y yum-utils wget && wget https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-rhel6-10-1-local-10.1.243-418.87.00-1.0-1.x86_64.rpm && rpm -i cuda-repo-rhel6-10-1-local-10.1.243-418.87.00-1.0-1.x86_64.rpm && yum clean all && yum -y install cuda-10-1 openblas-devel" - CIBW_SKIP: "*-manylinux_i686 pp* *musllinux*" + CIBW_BEFORE_ALL: "yum install -y yum-utils wget && wget -q https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-rhel6-10-1-local-10.1.243-418.87.00-1.0-1.x86_64.rpm && rpm -i cuda-repo-rhel6-10-1-local-10.1.243-418.87.00-1.0-1.x86_64.rpm && yum clean all && yum -y install cuda-10-1 openblas-devel" + CIBW_SKIP: "*-manylinux_i686 cp310* pp* cp36* *musllinux*" + CIBW_ENVIRONMENT: QISKIT_AER_PACKAGE_NAME=qiskit-aer-gpu AER_THRUST_BACKEND=CUDA CUDACXX=/usr/local/cuda/bin/nvcc + run: | + python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v2 + with: + path: ./wheelhouse/*.whl + - name: Publish Wheels + env: + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + TWINE_USERNAME: qiskit + run : | + pip install -U twine + twine upload wheelhouse/* + gpu-build-310: + name: Build qiskit-aer-gpu wheels + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: '3.7' + - name: Add msbuild to PATH + uses: microsoft/setup-msbuild@v1.0.2 + if: runner.os == 'Windows' + - name: Install cibuildwheel + run: | + python -m pip install cibuildwheel==2.2.2 + - name: Build wheels + env: + CIBW_BEFORE_ALL: "yum install -y yum-utils wget && wget -q https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-rhel7-10-1-local-10.1.105-418.39-1.0-1.x86_64.rpm && rpm -i cuda-repo-rhel7-10-1-local-10.1.105-418.39-1.0-1.x86_64.rpm && yum clean all && yum -y install cuda-10-1 openblas-devel" + CIBW_BUILD: "cp310-manylinux_x86_64" CIBW_ENVIRONMENT: QISKIT_AER_PACKAGE_NAME=qiskit-aer-gpu AER_THRUST_BACKEND=CUDA CUDACXX=/usr/local/cuda/bin/nvcc run: | python -m cibuildwheel --output-dir wheelhouse diff --git a/CMakeLists.txt b/CMakeLists.txt index 4eeb296f69..8c7e0d296d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,6 @@ if(STATIC_LINKING) endif() if(NOT MSVC) - enable_cxx_compiler_flag_if_supported("-ffast-math") if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") # PowerPC builds are not meant to be redistributable, we build them # in place, so we can have CPU = native. diff --git a/constraints.txt b/constraints.txt index 7934cfcbee..a0a43723c5 100644 --- a/constraints.txt +++ b/constraints.txt @@ -7,3 +7,8 @@ scipy>=1.0 # with modern importlib-metadata (4.8.1). importlib-metadata is only needed on # Python <3.8. importlib-metadata==4.6.4 + +# Jinja2 3.1.0 is incompatible with sphinx and/or jupyter until they are updated +# to work with the new jinja version (the jinja maintainers aren't going to +# fix things) pin to the previous working version. +jinja2==3.0.3 diff --git a/docs/conf.py b/docs/conf.py index 7bc02513e5..b8163803c0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -46,7 +46,7 @@ # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '0.10.3' +release = '0.10.4' # -- General configuration --------------------------------------------------- diff --git a/qiskit/providers/aer/VERSION.txt b/qiskit/providers/aer/VERSION.txt index a3f5a8ed4d..9b40aa6c21 100644 --- a/qiskit/providers/aer/VERSION.txt +++ b/qiskit/providers/aer/VERSION.txt @@ -1 +1 @@ -0.10.3 +0.10.4 diff --git a/qiskit/providers/aer/backends/aerbackend.py b/qiskit/providers/aer/backends/aerbackend.py index cc219e9fcb..2e802c66d1 100644 --- a/qiskit/providers/aer/backends/aerbackend.py +++ b/qiskit/providers/aer/backends/aerbackend.py @@ -288,9 +288,30 @@ def _run(self, qobj, job_id='', format_result=True): # Start timer start = time.time() + # Take metadata from headers of experiments to work around JSON serialization error + metadata_list = [] + metadata_index = 0 + for expr in qobj.experiments: + if hasattr(expr.header, "metadata"): + metadata_copy = expr.header.metadata.copy() + metadata_list.append(metadata_copy) + expr.header.metadata.clear() + if "id" in metadata_copy: + expr.header.metadata["id"] = metadata_copy["id"] + expr.header.metadata["metadata_index"] = metadata_index + metadata_index += 1 + # Run simulation output = self._execute(qobj) + # Recover metadata + metadata_index = 0 + for expr in qobj.experiments: + if hasattr(expr.header, "metadata"): + expr.header.metadata.clear() + expr.header.metadata.update(metadata_list[metadata_index]) + metadata_index += 1 + # Validate output if not isinstance(output, dict): logger.error("%s: simulation failed.", self.name()) @@ -305,6 +326,14 @@ def _run(self, qobj, job_id='', format_result=True): output["backend_name"] = self.name() output["backend_version"] = self.configuration().backend_version + # Push metadata to experiment headers + for result in output["results"]: + if ("header" in result and + "metadata" in result["header"] and + "metadata_index" in result["header"]["metadata"]): + metadata_index = result["header"]["metadata"]["metadata_index"] + result["header"]["metadata"] = metadata_list[metadata_index] + # Add execution time output["time_taken"] = time.time() - start diff --git a/qiskit/providers/aer/noise/device/models.py b/qiskit/providers/aer/noise/device/models.py index daa6c1ab96..c6ec388ec1 100644 --- a/qiskit/providers/aer/noise/device/models.py +++ b/qiskit/providers/aer/noise/device/models.py @@ -245,6 +245,7 @@ def _device_thermal_relaxation_error(qubits, error = None for qubit in qubits: t1, t2, freq = relax_params[qubit] + t2 = _truncate_t2_value(t1, t2) population = _excited_population(freq, temperature) if first: error = thermal_relaxation_error(t1, t2, gate_time, population) @@ -255,6 +256,17 @@ def _device_thermal_relaxation_error(qubits, return error +def _truncate_t2_value(t1, t2): + """Return t2 value truncated to 2 * t1 (for t2 > 2 * t1)""" + new_t2 = t2 + if t2 > 2 * t1: + new_t2 = 2 * t1 + warn("Device model returned an invalid T_2 relaxation time greater than" + f" the theoretical maximum value 2 * T_1 ({t2} > 2 * {t1})." + " Truncating to maximum value.", UserWarning) + return new_t2 + + def _excited_population(freq, temperature): """Return excited state population""" population = 0 diff --git a/qiskit/providers/aer/noise/device/parameters.py b/qiskit/providers/aer/noise/device/parameters.py index ead1fa1836..7468e526d4 100644 --- a/qiskit/providers/aer/noise/device/parameters.py +++ b/qiskit/providers/aer/noise/device/parameters.py @@ -180,11 +180,6 @@ def thermal_relaxation_values(properties): # Convert to Gigahertz freq *= _GHZ_UNITS.get(freq_params.unit, 1) - # NOTE: T2 cannot be larger than 2 * T1 for a physical noise - # channel, however if a backend erroneously reports such a value we - # truncated it here: - t2 = min(2 * t1, t2) - values.append((t1, t2, freq)) return values diff --git a/qiskit/providers/aer/noise/noise_model.py b/qiskit/providers/aer/noise/noise_model.py index 6bcf58d265..bdbed55143 100644 --- a/qiskit/providers/aer/noise/noise_model.py +++ b/qiskit/providers/aer/noise/noise_model.py @@ -21,11 +21,10 @@ from numpy import ndarray from qiskit.circuit import Instruction, Delay -from qiskit.providers import BaseBackend, BackendV1, BackendV2 from qiskit.providers.exceptions import BackendPropertyError from qiskit.providers.models import BackendProperties from qiskit.transpiler import PassManager -from .device.models import _excited_population +from .device.models import _excited_population, _truncate_t2_value from .device.models import basic_device_gate_errors from .device.models import basic_device_readout_errors from .errors.quantum_error import QuantumError @@ -303,10 +302,14 @@ def from_backend(cls, backend, Raises: NoiseError: If the input backend is not valid. """ - if isinstance(backend, BackendV2): + backend_interface_version = getattr(backend, "version", None) + if not isinstance(backend_interface_version, int): + backend_interface_version = 0 + + if backend_interface_version == 2: raise NoiseError( "NoiseModel.from_backend does not currently support V2 Backends.") - if isinstance(backend, (BaseBackend, BackendV1)): + if backend_interface_version <= 1: properties = backend.properties() configuration = backend.configuration() basis_gates = configuration.basis_gates @@ -373,9 +376,11 @@ def from_backend(cls, backend, except BackendPropertyError: excited_state_populations = None try: + t1s = [properties.t1(q) for q in range(num_qubits)] + t2s = [properties.t2(q) for q in range(num_qubits)] delay_pass = RelaxationNoisePass( - t1s=[properties.t1(q) for q in range(num_qubits)], - t2s=[properties.t2(q) for q in range(num_qubits)], + t1s=t1s, + t2s=[_truncate_t2_value(t1, t2) for t1, t2 in zip(t1s, t2s)], dt=dt, op_types=Delay, excited_state_populations=excited_state_populations diff --git a/qiskit/providers/aer/pulse/system_models/pulse_system_model.py b/qiskit/providers/aer/pulse/system_models/pulse_system_model.py index 72efc860b0..67a0af9b59 100644 --- a/qiskit/providers/aer/pulse/system_models/pulse_system_model.py +++ b/qiskit/providers/aer/pulse/system_models/pulse_system_model.py @@ -17,7 +17,7 @@ from warnings import warn from collections import OrderedDict -from qiskit.providers import BaseBackend, Backend +from qiskit.providers import Backend from ...aererror import AerError from .hamiltonian_model import HamiltonianModel @@ -94,7 +94,7 @@ def from_backend(cls, backend, subsystem_list=None): AerError: If channel or u_channel_lo are invalid. """ - if not isinstance(backend, (BaseBackend, Backend)): + if not isinstance(backend, Backend): raise AerError("{} is not a Qiskit backend".format(backend)) # get relevant information from backend diff --git a/releasenotes/notes/density-multi-chunk-fix-e9effc67d0365418.yaml b/releasenotes/notes/density-multi-chunk-fix-e9effc67d0365418.yaml new file mode 100644 index 0000000000..0815925509 --- /dev/null +++ b/releasenotes/notes/density-multi-chunk-fix-e9effc67d0365418.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fix cache blocking transpiler to recognize superop to be cache blocked. + This is fix for + `issue 1479 ` + now density_matrix with noise models can be parallelized. + New test, test_noise.TestNoise.test_kraus_gate_noise_on_QFT_cache_blocking + is added to verify this issue. + Also this fix include fix for + `issue 1483 ` + discovered by adding new test case. + This fixes measure over chunks for statevector. diff --git a/releasenotes/notes/fix-invalid-t2-error-a3685e4a3ad0a1e7.yaml b/releasenotes/notes/fix-invalid-t2-error-a3685e4a3ad0a1e7.yaml new file mode 100644 index 0000000000..cb1abebbe7 --- /dev/null +++ b/releasenotes/notes/fix-invalid-t2-error-a3685e4a3ad0a1e7.yaml @@ -0,0 +1,12 @@ +--- +fixes: + - | + Fixes a bug in ``NoiseModel.from_backend()`` that raised an error when + T2 value greater than 2 * T1 was supplied by the backend. + After this fix, it becomes to truncate T2 value up to 2 * T1 and + issue a user warning if truncates. + The bug was introduced at #1391 and, before that, ``NoiseModel.from_backend()`` had + truncated the T2 value up to 2 * T1 silently. + + See `Issue 1464 `__ + for details. diff --git a/releasenotes/notes/fix-thrust-cpu-threads-67db86b2edcf06b3.yaml b/releasenotes/notes/fix-thrust-cpu-threads-67db86b2edcf06b3.yaml new file mode 100644 index 0000000000..5495540da3 --- /dev/null +++ b/releasenotes/notes/fix-thrust-cpu-threads-67db86b2edcf06b3.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + device=Thrust was very slow for small number of qubits because OpenMP + threading was always applied. This fix applies OpenMP threads as same + as device=CPU by using statevector_parallel_threshold. diff --git a/releasenotes/notes/no-fast-math-1de357a9650094f3.yaml b/releasenotes/notes/no-fast-math-1de357a9650094f3.yaml new file mode 100644 index 0000000000..829b474809 --- /dev/null +++ b/releasenotes/notes/no-fast-math-1de357a9650094f3.yaml @@ -0,0 +1,18 @@ +--- +upgrade: + - | + Qiskit Aer is no longer compiled with unsafe floating-point optimisations. + While most of the effects should have been localised to Qiskit Aer, some + aspects of subnormal handling may previously have been leaked into user code + by the library incorrectly setting the "flush to zero" mode. This will not + happen any more. +fixes: + - | + Qiskit Aer will no longer set the floating-point mode to "flush to zero" + when loaded. Downstream users may previously have seen warnings from Numpy + such as: + + The value of the smallest subnormal for type is zero. + + These will now no longer be emitted, and the floating-point handling will be + correct. diff --git a/releasenotes/notes/remove_circuit_metadata_from_qobj-324e7ea9b369ee67.yaml b/releasenotes/notes/remove_circuit_metadata_from_qobj-324e7ea9b369ee67.yaml new file mode 100644 index 0000000000..b5c0266890 --- /dev/null +++ b/releasenotes/notes/remove_circuit_metadata_from_qobj-324e7ea9b369ee67.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixed a potential issue with running simulations on circuits that have the + :attr:`.QuantumCircuit.metadata` attribute set. The :attr:`~.QuantumCircuit.metadata` + attribute can be any python dictionary and previously qiskit-aer would attempt to + JSON serialize the contents of the attribute to process it with the rest of the rest + of the circuit input, even if the contents were not JSON serializable. This no longer + occurs as the :attr:`.QuantumCircuit.metadata` attribute is not used to run the + simulation so now the contents are no serialized and instead are directly attached + to the :class:`qiskit.result.Result` object without attempting to JSON serialize + the contents. + Fixed `#1435 `__ diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index 2f1f35639f..316effa8e4 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -1930,10 +1930,14 @@ bool Controller::validate_state(const state_t &state, const Circuit &circ, size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) / num_process_per_experiment_; size_t mem_size = (sim_device_ == Device::GPU) ? max_memory_mb_ + max_gpu_memory_mb_ : max_memory_mb_; memory_valid = (required_mb <= mem_size); - } - if (throw_except && !memory_valid) { - error_msg << "Insufficient memory to run circuit " << circ_name; - error_msg << " using the " << state.name() << " simulator."; + if (throw_except && !memory_valid) { + error_msg << "Insufficient memory to run circuit " << circ_name; + error_msg << " using the " << state.name() << " simulator."; + error_msg << " Required memory: " << required_mb << "M, max memory: " << max_memory_mb_ << "M"; + if (sim_device_ == Device::GPU) { + error_msg << " (Host) + " << max_gpu_memory_mb_ << "M (GPU)"; + } + } } if (noise_valid && circ_valid && memory_valid) { diff --git a/src/simulators/extended_stabilizer/ch_runner.hpp b/src/simulators/extended_stabilizer/ch_runner.hpp index 81303dd29d..b9303e727e 100644 --- a/src/simulators/extended_stabilizer/ch_runner.hpp +++ b/src/simulators/extended_stabilizer/ch_runner.hpp @@ -664,11 +664,7 @@ void Runner::metropolis_step(AER::RngEngine &rng) } complex_t ampsum(real_part, imag_part); double p_threshold = std::norm(ampsum)/std::norm(old_ampsum_); - #ifdef __FAST_MATH__ //isnan doesn't behave well under fastmath, so use absolute tolerance check instead - if(std::isinf(p_threshold) || std::abs(std::norm(old_ampsum_)-0.) < 1e-8) - #else if(std::isinf(p_threshold) || std::isnan(p_threshold)) - #endif { accept_ = true; old_ampsum_ = ampsum; diff --git a/src/simulators/statevector/chunk/chunk_container.hpp b/src/simulators/statevector/chunk/chunk_container.hpp index 5fd68798e4..69157fcb04 100644 --- a/src/simulators/statevector/chunk/chunk_container.hpp +++ b/src/simulators/statevector/chunk/chunk_container.hpp @@ -487,6 +487,10 @@ class ChunkContainer : public std::enable_shared_from_this& operator[](uint_t i) = 0; virtual uint_t Allocate(int idev,int chunk_bits,int num_qubits,uint_t chunks,uint_t buffers = AER_MAX_BUFFERS,bool multi_shots = false,int matrix_bit = AER_DEFAULT_MATRIX_BITS) = 0; @@ -820,7 +831,10 @@ void ChunkContainer::Execute(Function func,uint_t iChunk,uint_t count) #else uint_t size = count * func.size(chunk_bits_); auto ci = thrust::counting_iterator(0); - thrust::for_each_n(thrust::device, ci , size, func); + if(omp_threads_ > 1) + thrust::for_each_n(thrust::device, ci , size, func); + else + thrust::for_each_n(thrust::seq, ci , size, func); #endif } @@ -971,7 +985,10 @@ void ChunkContainer::ExecuteSum(double* pSum,Function func,uint_t iChunk auto ci = thrust::counting_iterator(0); double sum; - sum = thrust::transform_reduce(thrust::device, ci, ci + size, func,0.0,thrust::plus()); + if(omp_threads_ > 1) + sum = thrust::transform_reduce(thrust::device, ci, ci + size, func,0.0,thrust::plus()); + else + sum = thrust::transform_reduce(thrust::seq, ci, ci + size, func,0.0,thrust::plus()); if(count == 1 && pSum){ *pSum = sum; } @@ -1108,7 +1125,10 @@ void ChunkContainer::ExecuteSum2(double* pSum,Function func,uint_t iChun auto ci = thrust::counting_iterator(0); - ret = thrust::transform_reduce(thrust::device, ci, ci + size, func,zero,complex_sum()); + if(omp_threads_ > 1) + ret = thrust::transform_reduce(thrust::device, ci, ci + size, func,zero,complex_sum()); + else + ret = thrust::transform_reduce(thrust::seq, ci, ci + size, func,zero,complex_sum()); if(count == 1 && pSum){ *((thrust::complex*)pSum) = ret; diff --git a/src/simulators/statevector/chunk/device_chunk_container.hpp b/src/simulators/statevector/chunk/device_chunk_container.hpp index 34e92ab1c8..c9ce2c9a07 100644 --- a/src/simulators/statevector/chunk/device_chunk_container.hpp +++ b/src/simulators/statevector/chunk/device_chunk_container.hpp @@ -617,7 +617,10 @@ void DeviceChunkContainer::Zero(uint_t iChunk,uint_t count) #ifdef AER_THRUST_CUDA thrust::fill_n(thrust::cuda::par.on(stream_[iChunk]),data_.begin() + (iChunk << this->chunk_bits_),count,0.0); #else - thrust::fill_n(thrust::device,data_.begin() + (iChunk << this->chunk_bits_),count,0.0); + if(this->omp_threads_ > 1) + thrust::fill_n(thrust::device,data_.begin() + (iChunk << this->chunk_bits_),count,0.0); + else + thrust::fill_n(thrust::seq,data_.begin() + (iChunk << this->chunk_bits_),count,0.0); #endif } @@ -665,12 +668,22 @@ reg_t DeviceChunkContainer::sample_measure(uint_t iChunk,const std::vect cudaStreamSynchronize(stream_[iChunk]); #else - if(dot) - thrust::transform_inclusive_scan(thrust::device,iter.begin(),iter.end(),iter.begin(),complex_dot_scan(),thrust::plus>()); - else - thrust::inclusive_scan(thrust::device,iter.begin(),iter.end(),iter.begin(),thrust::plus>()); + if(this->omp_threads_ > 1){ + if(dot) + thrust::transform_inclusive_scan(thrust::device,iter.begin(),iter.end(),iter.begin(),complex_dot_scan(),thrust::plus>()); + else + thrust::inclusive_scan(thrust::device,iter.begin(),iter.end(),iter.begin(),thrust::plus>()); - thrust::lower_bound(thrust::device, iter.begin(), iter.end(), rnds.begin(), rnds.begin() + SHOTS, samples.begin() ,complex_less()); + thrust::lower_bound(thrust::device, iter.begin(), iter.end(), rnds.begin(), rnds.begin() + SHOTS, samples.begin() ,complex_less()); + } + else{ + if(dot) + thrust::transform_inclusive_scan(thrust::seq,iter.begin(),iter.end(),iter.begin(),complex_dot_scan(),thrust::plus>()); + else + thrust::inclusive_scan(thrust::seq,iter.begin(),iter.end(),iter.begin(),thrust::plus>()); + + thrust::lower_bound(thrust::seq, iter.begin(), iter.end(), rnds.begin(), rnds.begin() + SHOTS, samples.begin() ,complex_less()); + } #endif return samples; diff --git a/src/simulators/statevector/qubitvector.hpp b/src/simulators/statevector/qubitvector.hpp index 79fad5745b..79be2dc657 100755 --- a/src/simulators/statevector/qubitvector.hpp +++ b/src/simulators/statevector/qubitvector.hpp @@ -399,7 +399,7 @@ class QubitVector { // Get the sample_measure index size int get_sample_measure_index_size() {return sample_measure_index_size_;} - virtual bool enable_batch(bool flg) + virtual bool enable_batch(bool flg) const { return false; } diff --git a/src/simulators/statevector/qubitvector_thrust.hpp b/src/simulators/statevector/qubitvector_thrust.hpp index fdbbbbe2b5..c183ab9abf 100644 --- a/src/simulators/statevector/qubitvector_thrust.hpp +++ b/src/simulators/statevector/qubitvector_thrust.hpp @@ -34,6 +34,11 @@ #include "simulators/statevector/chunk/chunk_manager.hpp" +#ifdef _OPENMP +#include +#endif + + namespace AER { namespace QV { @@ -312,7 +317,7 @@ class QubitVectorThrust { #endif } - bool enable_batch(bool flg); + bool enable_batch(bool flg) const; virtual void apply_bfunc(const Operations::Op &op); virtual void set_conditional(int_t reg); @@ -450,7 +455,7 @@ class QubitVectorThrust { uint_t chunk_index_; bool multi_chunk_distribution_; bool multi_shots_; - bool enable_batch_; + mutable bool enable_batch_; bool register_blocking_; @@ -463,7 +468,7 @@ class QubitVectorThrust { // Config settings //----------------------------------------------------------------------- uint_t omp_threads_ = 1; // Disable multithreading by default - uint_t omp_threshold_ = 1; // Qubit threshold for multithreading when enabled + uint_t omp_threshold_ = 14; // Qubit threshold for multithreading when enabled int sample_measure_index_size_ = 1; // Sample measure indexing qubit size double json_chop_threshold_ = 0; // Threshold for choping small values // in JSON serialization @@ -1053,6 +1058,10 @@ void QubitVectorThrust::set_num_qubits(size_t num_qubits) register_blocking_ = false; + //set OpenMP threads for ThrustCPU + if(num_qubits_ > omp_threshold_ && omp_threads_ > 1) + chunk_.container()->set_omp_threads(omp_threads_); + #ifdef AER_DEBUG if(chunk_.pos() == 0){ spdlog::debug(" ==== Thrust qubit vector initialization {} qubits ====",num_qubits_); @@ -1274,7 +1283,7 @@ void QubitVectorThrust::set_conditional(int_t reg) } template -bool QubitVectorThrust::enable_batch(bool flg) +bool QubitVectorThrust::enable_batch(bool flg) const { bool prev = enable_batch_; @@ -1530,6 +1539,8 @@ void QubitVectorThrust::apply_function_sum(double* pSum,Function func,bo if(func.batch_enable() && ((multi_chunk_distribution_ && chunk_.device() >= 0 && num_qubits_ == num_qubits()) || (enable_batch_))){ if(chunk_.pos() != 0){ //only first chunk on device calculates all the chunks + if(pSum) + *pSum = 0.0; return; } count = chunk_.container()->num_chunks(); @@ -1555,6 +1566,10 @@ void QubitVectorThrust::apply_function_sum2(double* pSum,Function func,b if(func.batch_enable() && ((multi_chunk_distribution_ && chunk_.device() >= 0 && num_qubits_ == num_qubits()) || (enable_batch_))){ if(chunk_.pos() != 0){ //only first chunk on device calculates all the chunks + if(pSum){ + pSum[0] = 0.0; + pSum[1] = 0.0; + } return; } count = chunk_.container()->num_chunks(); @@ -1578,9 +1593,16 @@ void QubitVectorThrust::apply_function_sum2(double* pSum,Function func,b ******************************************************************************/ template -void QubitVectorThrust::set_omp_threads(int n) { +void QubitVectorThrust::set_omp_threads(int n) +{ if (n > 0) omp_threads_ = n; + +#ifdef _OPENMP + //disable nested parallel for ThrustCPU + if(omp_get_num_threads() > 1) + omp_threads_ = 1; +#endif } template @@ -3439,7 +3461,7 @@ double QubitVectorThrust::norm() const { double ret; #ifdef AER_THRUST_CUDA - if((multi_chunk_distribution_ && chunk_.device() >= 0) || enable_batch_){ + if(enable_batch_ && ((multi_chunk_distribution_ && chunk_.device() >= 0) || !multi_chunk_distribution_)){ if(chunk_.pos() != 0) return 0.0; //first chunk execute all in batch } diff --git a/src/simulators/statevector/statevector_state.hpp b/src/simulators/statevector/statevector_state.hpp index 5606be96e7..e9084b8713 100755 --- a/src/simulators/statevector/statevector_state.hpp +++ b/src/simulators/statevector/statevector_state.hpp @@ -1838,12 +1838,18 @@ std::vector State::sample_measure(const reg_t &qubits, //calculate per chunk sum if(BaseState::chunk_omp_parallel_){ #pragma omp parallel for if(BaseState::chunk_omp_parallel_) private(i) - for(i=0;i& //check if the operation can be reordered in front of waiting queue uint_t j,iq,jq; - //only gate and matrix can be reordered - if(op.type != Operations::OpType::gate && op.type != Operations::OpType::matrix && op.type != Operations::OpType::diagonal_matrix){ - //except for reset for density matrix - if(!density_matrix_ || op.type != Operations::OpType::reset){ - return false; - } - } + //only blockable ops can be reordered + if(!is_blockable_operation(op)) + return false; for(j=0;j& ops,std::vector mapped(block_bits_,false); - nq = blockedQubits.size(); - for(i=0;i mapped(block_bits_,false); + nq = blockedQubits.size(); + for(i=0;i block_bits_){ - throw std::runtime_error("CacheBlocking : Kraus operator, number of qubits should be smaller than chunk qubit size"); - break; - } + } + else{ + if(queue.size() == 0){ //if queue is empty, apply op here + bool restore_qubits = false; + if(ops[i].type == Operations::OpType::kraus){ + if(ops[i].qubits.size() > block_bits_){ + throw std::runtime_error("CacheBlocking : Kraus operator, number of qubits should be smaller than chunk qubit size"); + break; + } + if(!can_block(ops[i],blockedQubits)){ //if some qubits are out of chunk, queued for next step + queue.push_back(ops[i]); + continue; + } + } + else if(ops[i].type == Operations::OpType::initialize){ + if(ops[i].qubits.size() <= block_bits_){ if(!can_block(ops[i],blockedQubits)){ //if some qubits are out of chunk, queued for next step queue.push_back(ops[i]); continue; } } - else if(ops[i].type == Operations::OpType::initialize){ - if(ops[i].qubits.size() <= block_bits_){ - if(!can_block(ops[i],blockedQubits)){ //if some qubits are out of chunk, queued for next step - queue.push_back(ops[i]); - continue; - } - } - //otherwise StateChunk have to parallelize initialize operation - } - else if(sample_measure_ && ops[i].type == Operations::OpType::measure){ - //currently sampling should be done with original qubit mapping (TO DO : sampling without inserting swaps) + //otherwise StateChunk have to parallelize initialize operation + } + else if(sample_measure_ && ops[i].type == Operations::OpType::measure){ + //currently sampling should be done with original qubit mapping (TO DO : sampling without inserting swaps) + restore_qubits = true; + } + else if(ops[i].type != Operations::OpType::measure && ops[i].type != Operations::OpType::reset && + ops[i].type != Operations::OpType::save_amps && ops[i].type != Operations::OpType::save_amps_sq && + ops[i].type != Operations::OpType::save_densmat){ + if(!(ops[i].type == Operations::OpType::snapshot && ops[i].name == "density_matrix")){ restore_qubits = true; } - else if(ops[i].type != Operations::OpType::measure && ops[i].type != Operations::OpType::reset && - ops[i].type != Operations::OpType::save_amps && ops[i].type != Operations::OpType::save_amps_sq && - ops[i].type != Operations::OpType::save_densmat){ - if(!(ops[i].type == Operations::OpType::snapshot && ops[i].name == "density_matrix")){ - restore_qubits = true; - } - } - - if(num_gates_added > 0 && !end_block_inserted){ //insert end of block to synchronize chunks - if(doSwap) - insert_sim_op(out,"end_blocking",blockedQubits); - else - insert_sim_op(out,"end_memory_blocking",blockedQubits); - } - else if(!end_block_inserted){ - out.pop_back(); - } - if(restore_qubits && doSwap) - restore_qubits_order(out); - - //mapping swapped qubits - if(doSwap){ - for(iq=0;iq 0){ - if(doSwap) - insert_sim_op(out,"end_blocking",blockedQubits); - else - insert_sim_op(out,"end_memory_blocking",blockedQubits); - } - else{ - //pop unnecessary operations - while(out.size() > pos_begin){ - out.pop_back(); - } - } - } -/* } - else{ - i = 0; - //add chunk swap and block ops (if blocking is enabled) - if(blocking_enabled_){ - while(i 0 && !end_block_inserted){ //insert end of block to synchronize chunks + if(doSwap) + insert_sim_op(out,"end_blocking",blockedQubits); + else + insert_sim_op(out,"end_memory_blocking",blockedQubits); } - else{ - break; + else if(!end_block_inserted){ + out.pop_back(); } - i++; - } - } + if(restore_qubits && doSwap) + restore_qubits_order(out); - insert_sim_op(out,"begin_register_blocking",blockedQubits); - //gather blocked gates - while(i < ops.size()){ - if(ops[i].type == Operations::OpType::gate || ops[i].type == Operations::OpType::matrix){ - if((ops[i].qubits.size() > 1 && ops[i].type == Operations::OpType::matrix) || ops[i].name == "pauli"){ - queue.push_back(ops[i]); - } - else{ - if(can_reorder(ops[i],queue)){ - if(is_diagonal_op(ops[i])){ - //diagonal gate can be applied - out.push_back(ops[i]); - num_gates_added++; - } - else{ - exist = false; - iq = ops[i].qubits[ops[i].qubits.size()-1]; //block target bit - nq = blockedQubits.size(); - for(j=0;j pos_begin + 1){ - out[pos_begin].qubits = blockedQubits; //store qubits to be blocked in the sim_op::begin_register_blocking - insert_sim_op(out,"end_register_blocking",blockedQubits); + if(!end_block_inserted){ + if(num_gates_added > 0){ + if(doSwap) + insert_sim_op(out,"end_blocking",blockedQubits); + else + insert_sim_op(out,"end_memory_blocking",blockedQubits); } else{ - out.pop_back(); + //pop unnecessary operations + while(out.size() > pos_begin){ + out.pop_back(); + } } - }*/ + } return num_gates_added; } diff --git a/test/terra/backends/aer_simulator/test_chunk.py b/test/terra/backends/aer_simulator/test_chunk.py index 1939818a7e..1ccaf35b85 100644 --- a/test/terra/backends/aer_simulator/test_chunk.py +++ b/test/terra/backends/aer_simulator/test_chunk.py @@ -88,6 +88,35 @@ def test_chunk_QuantumVolumeWithFusion(self, method, device): self.assertEqual(counts_no_chunk, counts) + @supported_methods(['statevector', 'density_matrix']) + def test_chunk_QFT(self, method, device): + """Test multi-chunk with QFT""" + opts_no_chunk = { + "fusion_enable": False, + "fusion_threshold": 10, + } + opts_chunk = copy.copy(opts_no_chunk) + opts_chunk["blocking_enable"] = True + opts_chunk["blocking_qubits"] = 2 + + backend = self.backend( + method=method, device=device, **opts_chunk) + backend_no_chunk = self.backend( + method=method, device=device, **opts_no_chunk) + + shots = 100 + num_qubits = 3 + circuit = transpile(QFT(num_qubits), backend=backend, + optimization_level=0) + circuit.measure_all() + + result = backend.run(circuit, shots=shots, memory=True).result() + counts = result.get_counts(circuit) + result_no_chunk = backend_no_chunk.run(circuit, shots=shots, memory=True).result() + counts_no_chunk = result_no_chunk.get_counts(circuit) + + self.assertEqual(counts_no_chunk, counts) + @supported_methods(['statevector', 'density_matrix']) def test_chunk_QFTWithFusion(self, method, device): """Test multi-chunk with fused QFT (testing multi-chunk diagonal matrix)""" diff --git a/test/terra/backends/aer_simulator/test_metadata.py b/test/terra/backends/aer_simulator/test_metadata.py new file mode 100644 index 0000000000..96902d322a --- /dev/null +++ b/test/terra/backends/aer_simulator/test_metadata.py @@ -0,0 +1,104 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2018, 2019, 2020, 2021, 2022. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +AerSimulator Integration Tests +""" +from math import sqrt +from ddt import ddt +from qiskit import transpile, QuantumCircuit +from test.terra.reference import ref_algorithms + +from test.terra.backends.simulator_test_case import ( + SimulatorTestCase, supported_methods) + + +@ddt +class TestMetadata(SimulatorTestCase): + """AerSimulator algorithm tests in the default basis""" + + @supported_methods( + ['automatic', 'statevector', 'density_matrix', + 'matrix_product_state', 'extended_stabilizer']) + def test_single_circuit_metadata(self, method, device): + """Test circuits with object metadata.""" + backend = self.backend(method=method, device=device) + metadata = {1: object} + circuit = QuantumCircuit(1, name='circ0', metadata=metadata.copy()) + result = backend.run(circuit).result() + self.assertSuccess(result) + self.assertEqual(result.results[0].header.metadata, metadata) + self.assertEqual(circuit.metadata, metadata) + + @supported_methods( + ['automatic', 'statevector', 'density_matrix', + 'matrix_product_state', 'extended_stabilizer']) + def test_three_circuit_metadata(self, method, device): + """Test circuits with object metadata.""" + backend = self.backend(method=method, device=device) + + metadata0 = {0: object} + circuit0 = QuantumCircuit(1, name='circ0', metadata=metadata0.copy()) + + metadata1 = {1: object} + circuit1 = QuantumCircuit(1, name='circ1', metadata=metadata1.copy()) + + metadata2 = {2: object} + circuit2 = QuantumCircuit(1, name='circ2', metadata=metadata2.copy()) + + result = backend.run([circuit0, circuit1, circuit2]).result() + self.assertSuccess(result) + self.assertEqual(len(result.results), 3) + self.assertEqual(result.results[0].header.metadata, metadata0) + self.assertEqual(result.results[1].header.metadata, metadata1) + self.assertEqual(result.results[2].header.metadata, metadata2) + self.assertEqual(circuit0.metadata, metadata0) + self.assertEqual(circuit1.metadata, metadata1) + self.assertEqual(circuit2.metadata, metadata2) + + @supported_methods( + ['automatic', 'statevector', 'density_matrix', 'matrix_product_state']) + def test_three_parameterized_circuit_metadata(self, method, device): + """Test circuits with object metadata.""" + backend = self.backend(method=method, device=device) + + metadata0 = {0: object} + circuit0 = QuantumCircuit(1, name='circ0', metadata=metadata0.copy()) + circuit0.ry(0.1, 0) + circuit0.measure_all() + + metadata1 = {1: object} + circuit1 = QuantumCircuit(1, name='circ1', metadata=metadata1.copy()) + circuit1.ry(0.1, 0) + circuit1.measure_all() + + metadata2 = {2: object} + circuit2 = QuantumCircuit(1, name='circ2', metadata=metadata2.copy()) + circuit2.ry(0.1, 0) + circuit2.measure_all() + + parameterizations=[[[[0, 0], [0, 1]]], + [[[0, 0], [0, 1, 2]]], + []] + + result = backend.run([circuit0, circuit1, circuit2], + parameterizations=parameterizations).result() + self.assertSuccess(result) + self.assertEqual(len(result.results), 6) + self.assertEqual(result.results[0].header.metadata, metadata0) + self.assertEqual(result.results[1].header.metadata, metadata0) + self.assertEqual(result.results[2].header.metadata, metadata1) + self.assertEqual(result.results[3].header.metadata, metadata1) + self.assertEqual(result.results[4].header.metadata, metadata1) + self.assertEqual(result.results[5].header.metadata, metadata2) + self.assertEqual(circuit0.metadata, metadata0) + self.assertEqual(circuit1.metadata, metadata1) + self.assertEqual(circuit2.metadata, metadata2) diff --git a/test/terra/backends/aer_simulator/test_noise.py b/test/terra/backends/aer_simulator/test_noise.py index 0c9e8f9d44..2549d22f2d 100644 --- a/test/terra/backends/aer_simulator/test_noise.py +++ b/test/terra/backends/aer_simulator/test_noise.py @@ -153,10 +153,7 @@ def test_kraus_gate_noise(self, method, device): self.assertSuccess(result) self.compare_counts(result, [circuit], [target], delta=0.05 * shots) - @supported_methods([ - 'automatic', 'statevector', 'density_matrix', 'matrix_product_state']) - def test_kraus_gate_noise_on_QFT(self, method, device): - """Test Kraus noise on a QFT circuit""" + def _test_kraus_gate_noise_on_QFT(self, **options): shots = 10000 # Build noise model @@ -166,8 +163,7 @@ def test_kraus_gate_noise_on_QFT(self, method, device): noise_model.add_all_qubit_quantum_error(error1, ['h']) noise_model.add_all_qubit_quantum_error(error2, ['cp', 'swap']) - backend = self.backend( - method=method, device=device, noise_model=noise_model) + backend = self.backend(**options, noise_model=noise_model) ideal_circuit = transpile(QFT(3), backend) # manaully build noise circuit @@ -188,6 +184,20 @@ def test_kraus_gate_noise_on_QFT(self, method, device): self.assertSuccess(result) self.compare_counts(result, [ideal_circuit], [ref_target], hex_counts=False, delta=0.1 * shots) + @supported_methods([ + 'automatic', 'statevector', 'density_matrix', 'matrix_product_state']) + def test_kraus_gate_noise_on_QFT(self, method, device): + """Test Kraus noise on a QFT circuit""" + self._test_kraus_gate_noise_on_QFT( + method=method, device=device) + + @supported_methods([ + 'statevector', 'density_matrix']) + def test_kraus_gate_noise_on_QFT_cache_blocking(self, method, device): + """Test Kraus noise on a QFT circuit with caceh blocking""" + self._test_kraus_gate_noise_on_QFT( + method=method, device=device, blocking_qubits=2) + @supported_methods(ALL_METHODS) def test_clifford_circuit_noise(self, method, device): """Test simulation with mixed Clifford quantum errors in circuit.""" diff --git a/test/terra/backends/aer_simulator/test_options.py b/test/terra/backends/aer_simulator/test_options.py index cf5e31ab8a..4400701e39 100644 --- a/test/terra/backends/aer_simulator/test_options.py +++ b/test/terra/backends/aer_simulator/test_options.py @@ -186,3 +186,31 @@ def test_mps_options(self): # Check that the approximated result is not identical to the exact # result, because that could mean there was actually no approximation self.assertLessEqual(state_fidelity(sv_left, sv_approx), 0.999) + + def test_statevector_memory(self): + """Test required memory is correctly checked in statevector""" + method = "statevector" + backend = self.backend(method=method) + + # attempt to simulate a circuit with too many qubits + n = 50 + circuit = QuantumCircuit(n) + for q in range(n): + circuit.h(q) + circuit.measure_all() + result = backend.run(circuit).result() + self.assertNotSuccess(result) + self.assertTrue('Insufficient memory' in result.results[0].status) + self.assertTrue('Required memory: {}'.format(2**(n-20)*16) in result.results[0].status) + + n = 30 + max_memory_mb = 16 + circuit = QuantumCircuit(n) + for q in range(n): + circuit.h(q) + circuit.measure_all() + result = backend.run(circuit, max_memory_mb=max_memory_mb).result() + self.assertNotSuccess(result) + self.assertTrue('Insufficient memory' in result.results[0].status) + self.assertTrue('Required memory: {}'.format(2**(n-20)*16) in result.results[0].status) + self.assertTrue('max memory: {}'.format(max_memory_mb) in result.results[0].status) diff --git a/test/terra/noise/test_noise_model.py b/test/terra/noise/test_noise_model.py index 24f05e1bfc..9abd2af487 100644 --- a/test/terra/noise/test_noise_model.py +++ b/test/terra/noise/test_noise_model.py @@ -19,17 +19,18 @@ import numpy as np from qiskit.providers.aer.backends import AerSimulator from qiskit.providers.aer.noise import NoiseModel -from qiskit.providers.aer.utils.noise_transformation import transform_noise_model +from qiskit.providers.aer.noise.device.models import _excited_population from qiskit.providers.aer.noise.errors.standard_errors import amplitude_damping_error from qiskit.providers.aer.noise.errors.standard_errors import kraus_error from qiskit.providers.aer.noise.errors.standard_errors import pauli_error from qiskit.providers.aer.noise.errors.standard_errors import reset_error -from test.terra.common import QiskitAerTestCase +from qiskit.providers.aer.noise.errors.standard_errors import thermal_relaxation_error +from qiskit.providers.aer.utils.noise_transformation import transform_noise_model from qiskit.circuit import QuantumRegister, ClassicalRegister, QuantumCircuit from qiskit.compiler import transpile -from qiskit.transpiler import TranspilerError from qiskit.test import mock +from test.terra.common import QiskitAerTestCase class TestNoiseModel(QiskitAerTestCase): @@ -229,6 +230,66 @@ def test_noise_model_from_mumbai(self): result = AerSimulator().run(circ, noise_model=noise_model).result() self.assertTrue(result.success) + def test_noise_model_from_invalid_t2_backend(self): + """Test if issue user warning when creating a noise model from invalid t2 backend""" + from qiskit.providers.models.backendproperties import BackendProperties, Gate, Nduv + import datetime + + t1_ns, invalid_t2_ns = 75_1000, 200_1000 + u3_time_ns = 320 + frequency = 4919.96800692 + + class InvalidT2Fake1Q(mock.FakeBackend): + def __init__(self): + mock_time = datetime.datetime.now() + dt = 1.3333 + configuration = BackendProperties( + backend_name="invalid_t2", + backend_version="0.0.0", + num_qubits=1, + basis_gates=["u3"], + qubits=[ + [ + Nduv(date=mock_time, name="T1", unit="µs", value=t1_ns/1000), + Nduv(date=mock_time, name="T2", unit="µs", value=invalid_t2_ns/1000), + Nduv(date=mock_time, name="frequency", unit="MHz", value=frequency), + ], + ], + gates=[ + Gate( + gate="u3", + name="u3_0", + qubits=[0], + parameters=[ + Nduv(date=mock_time, name="gate_error", unit="", value=0.001), + Nduv(date=mock_time, name="gate_length", unit="ns", value=u3_time_ns), + ], + ), + ], + last_update_date=mock_time, + general=[], + ) + super().__init__(configuration) + + def defaults(self): + """defaults == configuration""" + return self._configuration + + def properties(self): + """properties == configuration""" + return self._configuration + + backend = InvalidT2Fake1Q() + with self.assertWarns(UserWarning): + noise_model = NoiseModel.from_backend(backend, gate_error=False) + expected = thermal_relaxation_error( + t1=t1_ns, + t2=2*t1_ns, + time=u3_time_ns, + excited_state_population=_excited_population(frequency, temperature=0) + ) + self.assertEqual(expected, noise_model._local_quantum_errors["u3"][(0, )]) + def test_transform_noise(self): org_error = reset_error(0.2) new_error = pauli_error([("I", 0.5), ("Z", 0.5)])