Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 42 additions & 9 deletions src/simulators/statevector/chunk/chunk_container.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ class ChunkContainer : public std::enable_shared_from_this<ChunkContainer<data_t

//allocate storage for chunk classes
void allocate_chunks(void);
void deallocate_chunks(void);
};

template <typename data_t>
Expand Down Expand Up @@ -705,21 +706,53 @@ template <typename data_t>
void ChunkContainer<data_t>::allocate_chunks(void)
{
uint_t i;
chunks_.resize(num_chunks_);
buffers_.resize(num_buffers_);
checkpoints_.resize(num_checkpoint_);

for(i=0;i<num_chunks_;i++){
chunks_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),i);
if(num_chunks_ > 0){
chunks_.resize(num_chunks_);
for(i=0;i<num_chunks_;i++){
chunks_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),i);
}
}
if(num_buffers_ > 0){
buffers_.resize(num_buffers_);
for(i=0;i<num_buffers_;i++){
buffers_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),num_chunks_+i);
}
}
if(num_checkpoint_ > 0){
checkpoints_.resize(num_checkpoint_);
for(i=0;i<num_checkpoint_;i++){
checkpoints_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),num_chunks_+num_buffers_+i);
}
}
}

template <typename data_t>
void ChunkContainer<data_t>::deallocate_chunks(void)
{
uint_t i;

if(num_chunks_ > 0){
for(i=0;i<num_chunks_;i++){
chunks_[i].reset();
}
chunks_.clear();
}
for(i=0;i<num_buffers_;i++){
buffers_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),num_chunks_+i);
if(num_buffers_ > 0){
for(i=0;i<num_buffers_;i++){
buffers_[i].reset();
}
buffers_.clear();
}
for(i=0;i<num_checkpoint_;i++){
checkpoints_[i] = std::make_shared<Chunk<data_t>>(this->shared_from_this(),num_chunks_+num_buffers_+i);
if(num_checkpoint_ > 0){
for(i=0;i<num_checkpoint_;i++){
checkpoints_[i].reset();
}
checkpoints_.clear();
}
}


//------------------------------------------------------------------------------
} // end namespace QV
} // end namespace AER
Expand Down
29 changes: 11 additions & 18 deletions src/simulators/statevector/chunk/chunk_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ ChunkManager<data_t>::ChunkManager()

#endif

chunks_.resize(num_places_*2 + 1);
chunks_.resize(num_places_*2 + 1,nullptr);

iplace_host_ = num_places_ ;

Expand Down Expand Up @@ -173,7 +173,6 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits,int nqubits,uint_t nchunks)
char* str;
bool multi_gpu = false;
bool hybrid = false;
uint_t num_checkpoint,total_checkpoint = 0;
bool multi_shot = false;

//--- for test
Expand Down Expand Up @@ -253,33 +252,25 @@ uint_t ChunkManager<data_t>::Allocate(int chunk_bits,int nqubits,uint_t nchunks)
nc /= 2;
}

num_checkpoint = nc;
chunks_[iDev] = std::make_shared<DeviceChunkContainer<data_t>>();

#ifdef AER_THRUST_CUDA
size_t freeMem,totalMem;
cudaSetDevice(iDev);
cudaMemGetInfo(&freeMem,&totalMem);
if(freeMem <= ( ((uint_t)sizeof(thrust::complex<data_t>) * (nc + num_buffers + num_checkpoint)) << chunk_bits_)){
num_checkpoint = 0;
}
#endif

total_checkpoint += num_checkpoint;
num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers,num_checkpoint);
num_chunks_ += chunks_[iDev]->Allocate(iDev,chunk_bits,nc,num_buffers);
}
if(num_chunks_ < nchunks){
//rest of chunks are stored on host
chunks_[num_places_] = std::make_shared<HostChunkContainer<data_t>>();
chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,AER_MAX_BUFFERS);
chunks_[num_places_]->Allocate(-1,chunk_bits,nchunks-num_chunks_,num_buffers);
num_places_ += 1;
num_chunks_ = nchunks;
}

//additional host buffer
iplace_host_ = num_places_;
chunks_[iplace_host_] = std::make_shared<HostChunkContainer<data_t>>();
#ifdef AER_DISABLE_GDR
chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,AER_MAX_BUFFERS);
#else
chunks_[iplace_host_]->Allocate(-1,chunk_bits,0,0);
#endif
}
}

Expand All @@ -292,9 +283,11 @@ void ChunkManager<data_t>::Free(void)
int i;

for(i=0;i<chunks_.size();i++){
if(chunks_[i])
if(chunks_[i]){
chunks_[i]->Deallocate();
chunks_[i].reset();
chunks_[i].reset();
chunks_[i] = nullptr;
}
}

chunk_bits_ = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/simulators/statevector/chunk/device_chunk_container.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ void DeviceChunkContainer<data_t>::Deallocate(void)
}
stream_.clear();
#endif

ChunkContainer<data_t>::deallocate_chunks();
}

template <typename data_t>
Expand Down
26 changes: 18 additions & 8 deletions src/simulators/statevector/chunk/host_chunk_container.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,16 @@ uint_t HostChunkContainer<data_t>::Allocate(int idev,int bits,uint_t chunks,uint
ChunkContainer<data_t>::num_buffers_ = buffers;
ChunkContainer<data_t>::num_checkpoint_ = checkpoint;
ChunkContainer<data_t>::num_chunks_ = nc;
data_.resize((nc + buffers + checkpoint) << bits);
matrix_.resize(nc + buffers);
params_.resize(nc + buffers);
if(nc + buffers + checkpoint > 0)
data_.resize((nc + buffers + checkpoint) << bits);
if(nc + buffers > 0){
matrix_.resize(nc + buffers);
params_.resize(nc + buffers);
}

//allocate chunk classes
ChunkContainer<data_t>::allocate_chunks();
if(nc + buffers + checkpoint > 0)
ChunkContainer<data_t>::allocate_chunks();

return nc;
}
Expand All @@ -147,17 +151,21 @@ uint_t HostChunkContainer<data_t>::Resize(uint_t chunks,uint_t buffers,uint_t ch
uint_t i;

if(chunks + buffers + checkpoint > this->num_chunks_ + this->num_buffers_ + this->num_checkpoint_){
data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_);
matrix_.resize(chunks + buffers);
params_.resize(chunks + buffers);
if(chunks + buffers + checkpoint > 0)
data_.resize((chunks + buffers + checkpoint) << this->chunk_bits_);
if(chunks + buffers > 0){
matrix_.resize(chunks + buffers);
params_.resize(chunks + buffers);
}
}

this->num_chunks_ = chunks;
this->num_buffers_ = buffers;
this->num_checkpoint_ = checkpoint;

//allocate chunk classes
ChunkContainer<data_t>::allocate_chunks();
if(chunks + buffers + checkpoint > 0)
ChunkContainer<data_t>::allocate_chunks();

return chunks + buffers + checkpoint;
}
Expand All @@ -171,6 +179,8 @@ void HostChunkContainer<data_t>::Deallocate(void)
matrix_.shrink_to_fit();
params_.clear();
params_.shrink_to_fit();

ChunkContainer<data_t>::deallocate_chunks();
}


Expand Down
28 changes: 7 additions & 21 deletions src/simulators/statevector/qubitvector_thrust.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,10 @@ QubitVectorThrust<data_t>::QubitVectorThrust(size_t num_qubits) : num_qubits_(0)
chunk_ = nullptr;
chunk_index_ = 0;
multi_chunk_distribution_ = false;
buffer_chunk_ = nullptr;
checkpoint_ = nullptr;
send_chunk_ = nullptr;
recv_chunk_ = nullptr;

#ifdef AER_DEBUG
debug_count = 0;
Expand Down Expand Up @@ -914,31 +917,14 @@ std::complex<double> QubitVectorThrust<data_t>::inner_product() const
chunk_->set_device();

vec0 = (data_t*)chunk_->pointer();
vec1 = (data_t*)checkpoint_->pointer();
#ifdef AER_THRUST_CUDA
cudaStream_t strm = chunk_->stream();
if(strm){
if(chunk_->device() == checkpoint_->device()){
vec1 = (data_t*)checkpoint_->pointer();

dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0);
}
else{
std::shared_ptr<Chunk<data_t>> pBuffer = chunk_manager_.MapBufferChunk(chunk_->place());
pBuffer->CopyIn(checkpoint_);
vec1 = (data_t*)pBuffer->pointer();

dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0);
chunk_manager_.UnmapBufferChunk(pBuffer);
}
}
else{
vec1 = (data_t*)checkpoint_->pointer();

if(strm)
dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0);
else
dot = thrust::inner_product(thrust::omp::par,vec0,vec0 + data_size_*2,vec1,0.0);
}
#else
vec1 = (data_t*)checkpoint_->pointer();

if(num_qubits_ > omp_threshold_ && omp_threads_ > 1)
dot = thrust::inner_product(thrust::device,vec0,vec0 + data_size_*2,vec1,0.0);
else
Expand Down