Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

using unified external error message for cufft api #36114

Merged
merged 1 commit into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ if(WITH_GPU)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 061f3b7895aadcbe2c3ed592590f8b10) # download file externalErrorMsg.tar.gz
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) # download file externalErrorMsg.tar.gz
if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly.
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/operators/spectral_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ static inline std::string get_cufft_error_info(cufftResult error) {
}

static inline void CUFFT_CHECK(cufftResult error) {
if (error != CUFFT_SUCCESS) {
PADDLE_THROW(platform::errors::External(get_cufft_error_info(error)));
}
PADDLE_ENFORCE_CUDA_SUCCESS(error);
}

// This struct is used to easily compute hashes of the
Expand Down Expand Up @@ -413,6 +411,7 @@ void exec_fft(const DeviceContext& ctx, const Tensor* X, Tensor* out,
? framework::ToRealType(input.type())
: input.type();
auto fft_type = GetFFTTransformType(input.type(), output.type());

PlanKey Key(framework::vectorize(input.dims()),
framework::vectorize(output.dims()), signal_size, fft_type,
value_type);
Expand Down
14 changes: 14 additions & 0 deletions paddle/fluid/platform/enforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cudnn.h>
#include <cufft.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
Expand Down Expand Up @@ -714,6 +715,7 @@ DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND);
DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN);
DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS);
DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER);
DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT);

#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL);
Expand Down Expand Up @@ -751,6 +753,8 @@ inline const char* GetErrorMsgUrl(T status) {
return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/"
"types.html#ncclresult-t";
break;
case platform::proto::ApiType::CUFFT:
return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult";
default:
return "Unknown type of External API, can't get error message URL!";
break;
Expand Down Expand Up @@ -839,6 +843,7 @@ template std::string GetExternalErrorMsg<curandStatus_t>(curandStatus_t);
template std::string GetExternalErrorMsg<cudnnStatus_t>(cudnnStatus_t);
template std::string GetExternalErrorMsg<cublasStatus_t>(cublasStatus_t);
template std::string GetExternalErrorMsg<cusolverStatus_t>(cusolverStatus_t);
template std::string GetExternalErrorMsg<cufftResult_t>(cufftResult_t);
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
template std::string GetExternalErrorMsg<ncclResult_t>(ncclResult_t);
#endif
Expand Down Expand Up @@ -899,6 +904,15 @@ inline std::string build_nvidia_error_msg(cusolverStatus_t stat) {
return sout.str();
}

/*************** CUFFT ERROR ***************/
inline bool is_error(cufftResult_t stat) { return stat != CUFFT_SUCCESS; }

inline std::string build_nvidia_error_msg(cufftResult_t stat) {
std::ostringstream sout;
sout << "CUFFT error(" << stat << "). " << GetExternalErrorMsg(stat);
return sout.str();
}

/**************** NCCL ERROR ****************/
#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
inline bool is_error(ncclResult_t nccl_result) {
Expand Down
22 changes: 21 additions & 1 deletion paddle/fluid/platform/enforce_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/enforce.h"

#include <list>

#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"

TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, paddle::platform::errors::Unavailable(
Expand Down Expand Up @@ -418,6 +419,25 @@ TEST(enforce, cuda_success) {
"negative vector size, for example).To correct: ensure that all the "
"parameters being passed have valid values"));

EXPECT_TRUE(CheckCudaStatusSuccess(CUFFT_SUCCESS));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_PLAN, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_ALLOC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_TYPE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_VALUE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INTERNAL_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_EXEC_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_SETUP_FAILED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_SIZE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_UNALIGNED_DATA, "CUFFT error"));
EXPECT_TRUE(
CheckCudaStatusFailure(CUFFT_INCOMPLETE_PARAMETER_LIST, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_DEVICE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_PARSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NO_WORKSPACE, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_IMPLEMENTED, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_LICENSE_ERROR, "CUFFT error"));
EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_SUPPORTED, "CUFFT error"));

#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL)
EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess));
EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "NCCL error"));
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/platform/external_error.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum ApiType {
CUBLAS = 3;
CUSOLVER = 4;
NCCL = 5;
CUFFT = 6;
}

message MessageDesc {
Expand Down
30 changes: 23 additions & 7 deletions tools/externalError/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
Usage:
#### **Introduction for crawling new error message:**

Please run:
```
bash start.sh
```

If you want to update all external error message, you need to run command `bash start.sh` in current directory,
and upload the generated file `externalErrorMsg.tar.gz` to https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz

1. add new spider code in spider.py for crawling error message from website.

2. run `bash start.sh` in current directory to generate new externalErrorMsg_${date}.tar.gz file, for example `externalErrorMsg_20210928.tar.gz`.

3. upload above tar file into bos https://paddlepaddledeps.bj.bcebos.com **paddlepaddledeps** bucket, and copy download link `${download_url}`. ***\*Be careful not to delete original tar file\****.

4. compute md5 value of above tar file `${md5}`, and modify cmake/third_party.cmake file

```
set(URL "${download_url}" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 ${md5})
```

for example:

```
set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE)
file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa)
```

5. commit your changes, and create pull request.
29 changes: 28 additions & 1 deletion tools/externalError/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
import urllib.request
import json
import collections
import sys, getopt
import sys
import getopt
import external_error_pb2
from html.parser import HTMLParser


def parsing(externalErrorDesc):
Expand Down Expand Up @@ -335,6 +337,31 @@ def parsing(externalErrorDesc):
_Messages.message = "'%s'. %s" % (error[0], m_message)
print("End crawling errorMessage for nvidia NCCL API!\n")

#*************************************************************************************************#
#*********************************** CUFFT Error Message **************************************#
print("start crawling errorMessage for nvidia CUFFT API--->")
url = 'https://docs.nvidia.com/cuda/cufft/index.html#cufftresult'

allMessageDesc = externalErrorDesc.errors.add()
allMessageDesc.type = external_error_pb2.CUFFT

html = urllib.request.urlopen(url).read().decode('utf-8')

class CUFFTHTMLParser(HTMLParser):
'''CUFFTHTML Parser
'''

def handle_data(self, data):
if 'typedef enum cufftResult_t' in data:
for line in data.strip().splitlines()[1:-1]:
status, code, desc = re.split('=|//', line.strip())
_Messages = allMessageDesc.messages.add()
_Messages.code = int(code.strip(' ,'))
_Messages.message = "'%s'. %s" % (status.strip(),
desc.strip())

CUFFTHTMLParser().feed(html)


def main(argv):
try:
Expand Down
2 changes: 1 addition & 1 deletion tools/externalError/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ fi
protobuf/bin/protoc -I../../paddle/fluid/platform/ --python_out . ../../paddle/fluid/platform/external_error.proto

python3.7 spider.py
tar czvf externalErrorMsg.tar.gz externalErrorMsg.pb
tar czvf externalErrorMsg_$(date +'%Y%m%d').tar.gz externalErrorMsg.pb