From 57bf367b3f5489db1e586d3f7ba36d1e8a403421 Mon Sep 17 00:00:00 2001 From: Xiaoxu Chen Date: Sun, 26 Sep 2021 10:16:12 +0000 Subject: [PATCH] use unified external error message for cufft api --- cmake/third_party.cmake | 4 +-- paddle/fluid/operators/spectral_op.cu | 5 ++-- paddle/fluid/platform/enforce.h | 14 ++++++++++ paddle/fluid/platform/enforce_test.cc | 22 +++++++++++++++- paddle/fluid/platform/external_error.proto | 1 + tools/externalError/README.md | 30 +++++++++++++++++----- tools/externalError/spider.py | 29 ++++++++++++++++++++- tools/externalError/start.sh | 2 +- 8 files changed, 92 insertions(+), 15 deletions(-) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 892ae270267a7..b3260ba27b072 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -251,8 +251,8 @@ if(WITH_GPU) include(external/cub) # download cub list(APPEND third_party_deps extern_cub) endif() - set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz" CACHE STRING "" FORCE) - file_download_and_uncompress(${URL} "externalError" MD5 061f3b7895aadcbe2c3ed592590f8b10) # download file externalErrorMsg.tar.gz + set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE) + file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) # download file externalErrorMsg.tar.gz if(WITH_TESTING) # copy externalErrorMsg.pb, just for unittest can get error message correctly. set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data) diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu index 9aa5ca39d737e..24dffaad41b5f 100644 --- a/paddle/fluid/operators/spectral_op.cu +++ b/paddle/fluid/operators/spectral_op.cu @@ -83,9 +83,7 @@ static inline std::string get_cufft_error_info(cufftResult error) { } static inline void CUFFT_CHECK(cufftResult error) { - if (error != CUFFT_SUCCESS) { - PADDLE_THROW(platform::errors::External(get_cufft_error_info(error))); - } + PADDLE_ENFORCE_CUDA_SUCCESS(error); } // This struct is used to easily compute hashes of the @@ -413,6 +411,7 @@ void exec_fft(const DeviceContext& ctx, const Tensor* X, Tensor* out, ? framework::ToRealType(input.type()) : input.type(); auto fft_type = GetFFTTransformType(input.type(), output.type()); + PlanKey Key(framework::vectorize(input.dims()), framework::vectorize(output.dims()), signal_size, fft_type, value_type); diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index c420a5a64be06..7427060add8b1 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -31,6 +31,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include +#include #include #include #include @@ -714,6 +715,7 @@ DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS, CURAND); DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS, CUDNN); DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS, CUBLAS); DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS, CUSOLVER); +DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS, CUFFT); #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess, NCCL); @@ -751,6 +753,8 @@ inline const char* GetErrorMsgUrl(T status) { return "https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/" "types.html#ncclresult-t"; break; + case platform::proto::ApiType::CUFFT: + return "https://docs.nvidia.com/cuda/cufft/index.html#cufftresult"; default: return "Unknown type of External API, can't get error message URL!"; break; @@ -839,6 +843,7 @@ template std::string GetExternalErrorMsg(curandStatus_t); template std::string GetExternalErrorMsg(cudnnStatus_t); template std::string GetExternalErrorMsg(cublasStatus_t); template std::string GetExternalErrorMsg(cusolverStatus_t); +template std::string GetExternalErrorMsg(cufftResult_t); #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) template std::string GetExternalErrorMsg(ncclResult_t); #endif @@ -899,6 +904,15 @@ inline std::string build_nvidia_error_msg(cusolverStatus_t stat) { return sout.str(); } +/*************** CUFFT ERROR ***************/ +inline bool is_error(cufftResult_t stat) { return stat != CUFFT_SUCCESS; } + +inline std::string build_nvidia_error_msg(cufftResult_t stat) { + std::ostringstream sout; + sout << "CUFFT error(" << stat << "). " << GetExternalErrorMsg(stat); + return sout.str(); +} + /**************** NCCL ERROR ****************/ #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) inline bool is_error(ncclResult_t nccl_result) { diff --git a/paddle/fluid/platform/enforce_test.cc b/paddle/fluid/platform/enforce_test.cc index 95a852ad6e92a..c6d5f171ddce4 100644 --- a/paddle/fluid/platform/enforce_test.cc +++ b/paddle/fluid/platform/enforce_test.cc @@ -9,10 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/enforce.h" + #include #include "gtest/gtest.h" -#include "paddle/fluid/platform/enforce.h" TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, paddle::platform::errors::Unavailable( @@ -418,6 +419,25 @@ TEST(enforce, cuda_success) { "negative vector size, for example).To correct: ensure that all the " "parameters being passed have valid values")); + EXPECT_TRUE(CheckCudaStatusSuccess(CUFFT_SUCCESS)); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_PLAN, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_ALLOC_FAILED, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_TYPE, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_VALUE, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INTERNAL_ERROR, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_EXEC_FAILED, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_SETUP_FAILED, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_SIZE, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_UNALIGNED_DATA, "CUFFT error")); + EXPECT_TRUE( + CheckCudaStatusFailure(CUFFT_INCOMPLETE_PARAMETER_LIST, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_INVALID_DEVICE, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_PARSE_ERROR, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NO_WORKSPACE, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_IMPLEMENTED, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_LICENSE_ERROR, "CUFFT error")); + EXPECT_TRUE(CheckCudaStatusFailure(CUFFT_NOT_SUPPORTED, "CUFFT error")); + #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) EXPECT_TRUE(CheckCudaStatusSuccess(ncclSuccess)); EXPECT_TRUE(CheckCudaStatusFailure(ncclUnhandledCudaError, "NCCL error")); diff --git a/paddle/fluid/platform/external_error.proto b/paddle/fluid/platform/external_error.proto index 2094de7e10f69..cbbf803492e64 100644 --- a/paddle/fluid/platform/external_error.proto +++ b/paddle/fluid/platform/external_error.proto @@ -24,6 +24,7 @@ enum ApiType { CUBLAS = 3; CUSOLVER = 4; NCCL = 5; + CUFFT = 6; } message MessageDesc { diff --git a/tools/externalError/README.md b/tools/externalError/README.md index 029efd8cb9491..0c2ac626991da 100644 --- a/tools/externalError/README.md +++ b/tools/externalError/README.md @@ -1,9 +1,25 @@ -Usage: +#### **Introduction for crawling new error message:** -Please run: -``` -bash start.sh -``` -If you want to update all external error message, you need to run command `bash start.sh` in current directory, -and upload the generated file `externalErrorMsg.tar.gz` to https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg.tar.gz + +1. add new spider code in spider.py for crawling error message from website. + +2. run `bash start.sh` in current directory to generate new externalErrorMsg_${date}.tar.gz file, for example `externalErrorMsg_20210928.tar.gz`. + +3. upload above tar file into bos https://paddlepaddledeps.bj.bcebos.com **paddlepaddledeps** bucket, and copy download link `${download_url}`. ***\*Be careful not to delete original tar file\****. + +4. compute md5 value of above tar file `${md5}`, and modify cmake/third_party.cmake file + + ``` + set(URL "${download_url}" CACHE STRING "" FORCE) + file_download_and_uncompress(${URL} "externalError" MD5 ${md5}) + ``` + + for example: + + ``` + set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE) + file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) + ``` + +5. commit your changes, and create pull request. diff --git a/tools/externalError/spider.py b/tools/externalError/spider.py index a74d82f40ebeb..e07f05f561cb5 100644 --- a/tools/externalError/spider.py +++ b/tools/externalError/spider.py @@ -17,8 +17,10 @@ import urllib.request import json import collections -import sys, getopt +import sys +import getopt import external_error_pb2 +from html.parser import HTMLParser def parsing(externalErrorDesc): @@ -335,6 +337,31 @@ def parsing(externalErrorDesc): _Messages.message = "'%s'. %s" % (error[0], m_message) print("End crawling errorMessage for nvidia NCCL API!\n") + #*************************************************************************************************# + #*********************************** CUFFT Error Message **************************************# + print("start crawling errorMessage for nvidia CUFFT API--->") + url = 'https://docs.nvidia.com/cuda/cufft/index.html#cufftresult' + + allMessageDesc = externalErrorDesc.errors.add() + allMessageDesc.type = external_error_pb2.CUFFT + + html = urllib.request.urlopen(url).read().decode('utf-8') + + class CUFFTHTMLParser(HTMLParser): + '''CUFFTHTML Parser + ''' + + def handle_data(self, data): + if 'typedef enum cufftResult_t' in data: + for line in data.strip().splitlines()[1:-1]: + status, code, desc = re.split('=|//', line.strip()) + _Messages = allMessageDesc.messages.add() + _Messages.code = int(code.strip(' ,')) + _Messages.message = "'%s'. %s" % (status.strip(), + desc.strip()) + + CUFFTHTMLParser().feed(html) + def main(argv): try: diff --git a/tools/externalError/start.sh b/tools/externalError/start.sh index 32ef63c261268..82715dd47326c 100644 --- a/tools/externalError/start.sh +++ b/tools/externalError/start.sh @@ -32,4 +32,4 @@ fi protobuf/bin/protoc -I../../paddle/fluid/platform/ --python_out . ../../paddle/fluid/platform/external_error.proto python3.7 spider.py -tar czvf externalErrorMsg.tar.gz externalErrorMsg.pb +tar czvf externalErrorMsg_$(date +'%Y%m%d').tar.gz externalErrorMsg.pb