From d0e559729f87c8e3abf4b3c24183d0523ba13baa Mon Sep 17 00:00:00 2001 From: Yaoyi Chen <29800192+yaoyic@users.noreply.github.com> Date: Wed, 30 Sep 2020 10:16:49 +0200 Subject: [PATCH 1/3] Synchronize CUDA context before force copying. --- platforms/cuda/src/CudaTorchKernels.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/platforms/cuda/src/CudaTorchKernels.cpp b/platforms/cuda/src/CudaTorchKernels.cpp index c0a891b5..aedd1fe3 100644 --- a/platforms/cuda/src/CudaTorchKernels.cpp +++ b/platforms/cuda/src/CudaTorchKernels.cpp @@ -33,6 +33,7 @@ #include "CudaTorchKernelSources.h" #include "openmm/internal/ContextImpl.h" #include +#include using namespace TorchPlugin; using namespace OpenMM; @@ -85,6 +86,8 @@ double CudaCalcTorchForceKernel::execute(ContextImpl& context, bool includeForce energyTensor.backward(); // Note: "forceTensor" needs to be cloned due to a shared context (https://github.com/openmm/openmm-torch/issues/13) torch::Tensor forceTensor = posTensor.grad().clone(); + // make sure that all calculations on PyTorch side is properly finished before changing CUDA context or starting the `addForcesKernel` of this plugin + cudaDeviceSynchronize(); cu.setAsCurrent(); void* data; if (cu.getUseDoublePrecision()) { From 7211b1b08965c95527a0803052e9d8168a2a1360 Mon Sep 17 00:00:00 2001 From: Yaoyi Chen <29800192+yaoyic@users.noreply.github.com> Date: Thu, 1 Oct 2020 12:29:28 +0200 Subject: [PATCH 2/3] Changing synchronizing strategy before retrieving force: whole device -> current context. --- platforms/cuda/src/CudaTorchKernels.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/platforms/cuda/src/CudaTorchKernels.cpp b/platforms/cuda/src/CudaTorchKernels.cpp index aedd1fe3..bf680322 100644 --- a/platforms/cuda/src/CudaTorchKernels.cpp +++ b/platforms/cuda/src/CudaTorchKernels.cpp @@ -39,6 +39,15 @@ using namespace TorchPlugin; using namespace OpenMM; using namespace std; +// macro for checking the result of synchronization operation on CUDA +// copied from `openmm/platforms/cuda/src/CudaParallelKernels.cpp` +#define CHECK_RESULT(result, prefix) \ +if (result != CUDA_SUCCESS) { \ + std::stringstream m; \ + m< Date: Tue, 6 Oct 2020 09:15:45 +0200 Subject: [PATCH 3/3] Adding a synchronization before starting PyTorch operations. --- platforms/cuda/src/CudaTorchKernels.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/platforms/cuda/src/CudaTorchKernels.cpp b/platforms/cuda/src/CudaTorchKernels.cpp index bf680322..3b5ed150 100644 --- a/platforms/cuda/src/CudaTorchKernels.cpp +++ b/platforms/cuda/src/CudaTorchKernels.cpp @@ -90,6 +90,8 @@ double CudaCalcTorchForceKernel::execute(ContextImpl& context, bool includeForce vector inputs = {posTensor}; if (usePeriodic) inputs.push_back(boxTensor); + // synchronizing the current context before switching to PyTorch + CHECK_RESULT(cuCtxSynchronize(), "Error synchronizing CUDA context"); torch::Tensor energyTensor = module.forward(inputs).toTensor(); if (includeForces) { energyTensor.backward();