diff --git a/src/simulators/statevector/chunk/chunk_container.hpp b/src/simulators/statevector/chunk/chunk_container.hpp index b024313ad2..00e832902a 100644 --- a/src/simulators/statevector/chunk/chunk_container.hpp +++ b/src/simulators/statevector/chunk/chunk_container.hpp @@ -487,6 +487,7 @@ class ChunkContainer : public std::enable_shared_from_this @@ -705,21 +706,53 @@ template void ChunkContainer::allocate_chunks(void) { uint_t i; - chunks_.resize(num_chunks_); - buffers_.resize(num_buffers_); - checkpoints_.resize(num_checkpoint_); - for(i=0;i>(this->shared_from_this(),i); + if(num_chunks_ > 0){ + chunks_.resize(num_chunks_); + for(i=0;i>(this->shared_from_this(),i); + } + } + if(num_buffers_ > 0){ + buffers_.resize(num_buffers_); + for(i=0;i>(this->shared_from_this(),num_chunks_+i); + } + } + if(num_checkpoint_ > 0){ + checkpoints_.resize(num_checkpoint_); + for(i=0;i>(this->shared_from_this(),num_chunks_+num_buffers_+i); + } + } +} + +template +void ChunkContainer::deallocate_chunks(void) +{ + uint_t i; + + if(num_chunks_ > 0){ + for(i=0;i>(this->shared_from_this(),num_chunks_+i); + if(num_buffers_ > 0){ + for(i=0;i>(this->shared_from_this(),num_chunks_+num_buffers_+i); + if(num_checkpoint_ > 0){ + for(i=0;i::ChunkManager() #endif - chunks_.resize(num_places_*2 + 1); + chunks_.resize(num_places_*2 + 1,nullptr); iplace_host_ = num_places_ ; @@ -173,7 +173,6 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) char* str; bool multi_gpu = false; bool hybrid = false; - uint_t num_checkpoint,total_checkpoint = 0; bool multi_shot = false; //--- for test @@ -253,25 +252,13 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) nc /= 2; } - num_checkpoint = nc; chunks_[iDev] = std::make_shared>(); - -#ifdef AER_THRUST_CUDA - size_t freeMem,totalMem; - cudaSetDevice(iDev); - cudaMemGetInfo(&freeMem,&totalMem); - if(freeMem <= ( ((uint_t)sizeof(thrust::complex) * (nc + num_buffers + num_checkpoint)) << chunk_bits_)){ - num_checkpoint = 0; - } -#endif - - total_checkpoint += num_checkpoint; - num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers,num_checkpoint); + num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers); } if(num_chunks_ < nchunks){ //rest of chunks are stored on host chunks_[num_places_] = std::make_shared>(); - chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,AER_MAX_BUFFERS); + chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,num_buffers); num_places_ += 1; num_chunks_ = nchunks; } @@ -279,7 +266,11 @@ uint_t ChunkManager::Allocate(int chunk_bits,int nqubits,uint_t nchunks) //additional host buffer iplace_host_ = num_places_; chunks_[iplace_host_] = std::make_shared>(); +#ifdef AER_DISABLE_GDR chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,AER_MAX_BUFFERS); +#else + chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,0); +#endif } } @@ -292,9 +283,11 @@ void ChunkManager::Free(void) int i; for(i=0;iDeallocate(); - chunks_[i].reset(); + chunks_[i].reset(); + chunks_[i] = nullptr; + } } chunk_bits_ = 0; diff --git a/src/simulators/statevector/chunk/device_chunk_container.hpp b/src/simulators/statevector/chunk/device_chunk_container.hpp index 8fe2ba9250..ad1ea13f69 100644 --- a/src/simulators/statevector/chunk/device_chunk_container.hpp +++ b/src/simulators/statevector/chunk/device_chunk_container.hpp @@ -373,6 +373,8 @@ void DeviceChunkContainer::Deallocate(void) } stream_.clear(); #endif + + ChunkContainer::deallocate_chunks(); } template diff --git a/src/simulators/statevector/chunk/host_chunk_container.hpp b/src/simulators/statevector/chunk/host_chunk_container.hpp index a6b32d1375..f00dee8195 100644 --- a/src/simulators/statevector/chunk/host_chunk_container.hpp +++ b/src/simulators/statevector/chunk/host_chunk_container.hpp @@ -131,12 +131,16 @@ uint_t HostChunkContainer::Allocate(int idev,int bits,uint_t chunks,uint ChunkContainer::num_buffers_ = buffers; ChunkContainer::num_checkpoint_ = checkpoint; ChunkContainer::num_chunks_ = nc; - data_.resize((nc + buffers + checkpoint) << bits); - matrix_.resize(nc + buffers); - params_.resize(nc + buffers); + if(nc + buffers + checkpoint > 0) + data_.resize((nc + buffers + checkpoint) << bits); + if(nc + buffers > 0){ + matrix_.resize(nc + buffers); + params_.resize(nc + buffers); + } //allocate chunk classes - ChunkContainer::allocate_chunks(); + if(nc + buffers + checkpoint > 0) + ChunkContainer::allocate_chunks(); return nc; } @@ -147,9 +151,12 @@ uint_t HostChunkContainer::Resize(uint_t chunks,uint_t buffers,uint_t ch uint_t i; if(chunks + buffers + checkpoint > this->num_chunks_ + this->num_buffers_ + this->num_checkpoint_){ - data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_); - matrix_.resize(chunks + buffers); - params_.resize(chunks + buffers); + if(chunks + buffers + checkpoint > 0) + data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_); + if(chunks + buffers > 0){ + matrix_.resize(chunks + buffers); + params_.resize(chunks + buffers); + } } this->num_chunks_ = chunks; @@ -157,7 +164,8 @@ uint_t HostChunkContainer::Resize(uint_t chunks,uint_t buffers,uint_t ch this->num_checkpoint_ = checkpoint; //allocate chunk classes - ChunkContainer::allocate_chunks(); + if(chunks + buffers + checkpoint > 0) + ChunkContainer::allocate_chunks(); return chunks + buffers + checkpoint; } @@ -171,6 +179,8 @@ void HostChunkContainer::Deallocate(void) matrix_.shrink_to_fit(); params_.clear(); params_.shrink_to_fit(); + + ChunkContainer::deallocate_chunks(); } diff --git a/src/simulators/statevector/qubitvector_thrust.hpp b/src/simulators/statevector/qubitvector_thrust.hpp index c99cb24cc0..a760175fe0 100644 --- a/src/simulators/statevector/qubitvector_thrust.hpp +++ b/src/simulators/statevector/qubitvector_thrust.hpp @@ -517,7 +517,10 @@ QubitVectorThrust::QubitVectorThrust(size_t num_qubits) : num_qubits_(0) chunk_ = nullptr; chunk_index_ = 0; multi_chunk_distribution_ = false; + buffer_chunk_ = nullptr; checkpoint_ = nullptr; + send_chunk_ = nullptr; + recv_chunk_ = nullptr; #ifdef AER_DEBUG debug_count = 0; @@ -914,31 +917,14 @@ std::complex QubitVectorThrust::inner_product() const chunk_->set_device(); vec0 = (data_t*)chunk_->pointer(); + vec1 = (data_t*)checkpoint_->pointer(); #ifdef AER_THRUST_CUDA cudaStream_t strm = chunk_->stream(); - if(strm){ - if(chunk_->device() == checkpoint_->device()){ - vec1 = (data_t*)checkpoint_->pointer(); - - dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); - } - else{ - std::shared_ptr> pBuffer = chunk_manager_.MapBufferChunk(chunk_->place()); - pBuffer->CopyIn(checkpoint_); - vec1 = (data_t*)pBuffer->pointer(); - - dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); - chunk_manager_.UnmapBufferChunk(pBuffer); - } - } - else{ - vec1 = (data_t*)checkpoint_->pointer(); - + if(strm) + dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); + else dot = thrust::inner_product(thrust::omp::par,vec0,vec0 + data_size_*2,vec1,0.0); - } #else - vec1 = (data_t*)checkpoint_->pointer(); - if(num_qubits_ > omp_threshold_ && omp_threads_ > 1) dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0); else