From e72fe9f24092fb2d259ad3f2b21fa77c6116379f Mon Sep 17 00:00:00 2001 From: kunpeng Date: Fri, 26 May 2023 10:02:07 +0000 Subject: [PATCH 01/48] First commit --- mllib-dal/src/main/native/Logger.cpp | 63 ++++++++++++++++++++++++++++ mllib-dal/src/main/native/Logger.h | 18 ++++++++ mllib-dal/src/main/native/Makefile | 1 + 3 files changed, 82 insertions(+) create mode 100644 mllib-dal/src/main/native/Logger.cpp create mode 100644 mllib-dal/src/main/native/Logger.h diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp new file mode 100644 index 000000000..395008f62 --- /dev/null +++ b/mllib-dal/src/main/native/Logger.cpp @@ -0,0 +1,63 @@ +#include "Logger.h" +#include + +std::tuple get_prefix(MessageType message_type) { + std::string prefix; + bool enable{true}; + switch (message_type) { + case NONE: + break; + case INFO: + prefix = "[INFO ]"; + break; + case WARN: + prefix = "[WARNING]"; + break; + case ERROR: + prefix = "[ERROR ]"; + break; + case DEBUG: + prefix = "[DEBUG ]"; + break; + case ASSERT: + prefix = "[ASSERT ]"; + break; + default: + break; + } + return {prefix + " ", enable}; +} + +int print2streamFromArgs(MessageType message_type, FILE *stream, const char *format, va_list args) { + // print prefix + auto [prefix, enable] = get_prefix(message_type); + if (!enable) + return 0; + fprintf(stream, "%s", prefix.c_str()); + + // print message + int ret = vfprintf(stream, format, args); + + return ret; +} + +int print2stream(MessageType message_type, FILE *stream, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stream, format, args); + va_end(args); + + return ret; +} + +int print(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stdout, msg.c_str()); + return ret; +} +int print(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stdout, format, args); + va_end(args); + return ret; +} diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h new file mode 100644 index 000000000..1c137854c --- /dev/null +++ b/mllib-dal/src/main/native/Logger.h @@ -0,0 +1,18 @@ +#pragma once + +#include "iostream" +#include +#include + +// message type for print functions +enum MessageType { + NONE = 0, + INFO = 1, + WARN = 2, + ERROR = 3, + DEBUG = 4, + ASSERT = 5 +}; + +int print(MessageType message_type, const std::string &msg); +int print(MessageType message_type, const char *format, ...); diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index b25072c04..e8cb6bfcc 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -83,6 +83,7 @@ endif CPP_SRCS += \ ./OneCCL.cpp ./OneDAL.cpp ./service.cpp ./error_handling.cpp \ + ./Logger.cpp \ ./KMeansImpl.cpp \ ./PCAImpl.cpp \ ./ALSDALImpl.cpp ./ALSShuffle.cpp \ From 1531964568a631d9b5cf9477b7bb6a6c75044142 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Sun, 4 Jun 2023 22:36:45 -0700 Subject: [PATCH 02/48] Logger tmp --- mllib-dal/src/main/native/LinearRegressionImpl.cpp | 3 ++- mllib-dal/src/main/native/service.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 433c73a6d..43737e37b 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -29,6 +29,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_regression_LinearRegressionDALImpl.h" #include "service.h" +#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -152,7 +153,6 @@ ridge_regression_compute(size_t rankId, ccl::communicator &comm, InputDataArchive dataArch; localAlgorithm.getPartialResult()->serialize(dataArch); size_t perNodeArchLength = dataArch.getSizeOfArchive(); - // std::cout << "perNodeArchLength: " << perNodeArchLength << std::endl; serializedData = services::SharedPtr(new byte[perNodeArchLength * nBlocks]); @@ -221,6 +221,7 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { + print(INFO, "KP:oneDAL (native): GPU compute start , rankid %d\n", rankId); std::cout << "oneDAL (native): GPU compute start , rankid " << rankId << std::endl; const bool isRoot = (rankId == ccl_root); diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 288f35243..66b179c6b 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -42,7 +42,7 @@ using namespace daal::data_management; #include #include "error_handling.h" -#include "oneapi/dal/table/detail/csr.hpp" +#include "oneapi/dal/table/csr.hpp" #include "oneapi/dal/table/homogen.hpp" using namespace oneapi::dal; From 8aede57b967fd128b4905c4324f59ca14e988104 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Sat, 24 Jun 2023 13:01:53 -0700 Subject: [PATCH 03/48] ALS done --- mllib-dal/src/main/native/ALSDALImpl.cpp | 44 +++++++++---------- mllib-dal/src/main/native/ALSShuffle.cpp | 7 +-- .../src/main/native/LinearRegressionImpl.cpp | 4 +- mllib-dal/src/main/native/Makefile | 3 +- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index d029d08dc..c87cd99f7 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -24,6 +24,8 @@ #include "ALSShuffle.h" +#include "Logger.h" + using namespace std; using namespace daal; using namespace daal::algorithms; @@ -212,7 +214,7 @@ void initializeStep2Local( void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nUsers, size_t nFactors) { - std::cout << "ALS (native): initializeModel " << std::endl; + print(INFO, "ALS (native): initializeModel \n"); auto t1 = std::chrono::high_resolution_clock::now(); @@ -231,8 +233,7 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): initializeModel took " << duration << " secs" - << std::endl; + print(INFO, "ALS (native): initializeModel took %d secs\n", duration); } training::DistributedPartialResultStep1Ptr computeStep1Local( @@ -314,7 +315,7 @@ computeStep4Local(const CSRNumericTablePtr &dataTable, void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nFactors, size_t maxIterations) { - std::cout << "ALS (native): trainModel" << std::endl; + print(INFO, "ALS (native): trainModel\n"); auto tStart = std::chrono::high_resolution_clock::now(); @@ -423,15 +424,13 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): iteration " << iteration << " took " - << duration << " secs" << std::endl; + print(INFO, "ALS (native): iteration %d took %f secs\n",iteration ,duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); - std::cout << "ALS (native): trainModel took " << durationTotal << " secs" - << std::endl; + print(INFO, "ALS (native): trainModel took %d secs\n", durationTotal); } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { @@ -449,7 +448,7 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { // cout << "cShuffleData: rank " << rankId << endl; - cout << "RATING_SIZE: " << RATING_SIZE << endl; + print(INFO, "RATING_SIZE: %d\n", RATING_SIZE); ccl::communicator &comm = getComm(); @@ -493,19 +492,18 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( dataTable = *((CSRNumericTablePtr *)numTableAddr); - cout << "ALS (native): Input info: " << endl; - cout << "- NumberOfRows: " << dataTable->getNumberOfRows() << endl; - cout << "- NumberOfColumns: " << dataTable->getNumberOfColumns() << endl; - cout << "- NumberOfRatings: " << dataTable->getDataSize() << endl; - cout << "- fullNUsers: " << nUsers << endl; - cout << "- nFactors: " << nFactors << endl; + print(INFO, "ALS (native): Input info: "); + print(INFO, "- NumberOfRows: %d\n", dataTable->getNumberOfRows()); + print(INFO, "- NumberOfColumns: %d\n", dataTable->getNumberOfColumns()); + print(INFO, "- NumberOfRatings: %d\n", dataTable->getDataSize()); + print(INFO, "- fullNUsers: %d\n", nUsers); + print(INFO, "- nFactors: %d\n", nFactors); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; + print(INFO, "oneDAL (native): Number of CPU threads used: %d\n", nThreadsNew); int nBlocks = executor_num; initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); @@ -516,16 +514,16 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1) ->getFactors(); - std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl; + print(INFO, "\n"); + print(INFO, "=== Results for Rank %d ===\n", rankId); + print(INFO, "\n"); printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) - << std::endl; - std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) - << std::endl; - std::cout << std::endl; + print(INFO, "User Offset: %d\n", getOffsetFromOffsetTable(userOffset)); + print(INFO, "Item Offset: %d\n", getOffsetFromOffsetTable(itemOffset)); + print(INFO, "\n"); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 7759e3cc0..1da81a93c 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -22,6 +22,8 @@ #include "ALSShuffle.h" +#include "Logger.h" + using namespace std; std::vector recvData; @@ -76,7 +78,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // perNodeSendLens[i] << endl; sendBufSize += perNodeSendLens[i]; } - cout << "sendData size " << sendBufSize << endl; + print(INFO, "sendData size %d\n", sendBufSize); sendData.resize(sendBufSize); // Fill in send buffer @@ -119,8 +121,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // std::distance(recvData.begin(), iter); newCsrRowNum = distinct_count(recvData); - cout << "newRatingsNum: " << newRatingsNum - << " newCsrRowNum: " << newCsrRowNum << endl; + print(INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", newRatingsNum, newCsrRowNum); return recvData.data(); } diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 43737e37b..1849a2955 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -221,9 +221,7 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { - print(INFO, "KP:oneDAL (native): GPU compute start , rankid %d\n", rankId); - std::cout << "oneDAL (native): GPU compute start , rankid " << rankId - << std::endl; + print(INFO, "oneDAL (native): GPU compute start , rankid %d\n", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index e8cb6bfcc..5589f5bec 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -82,8 +82,8 @@ else ifeq ($(PLATFORM_PROFILE),CPU_GPU_PROFILE) endif CPP_SRCS += \ - ./OneCCL.cpp ./OneDAL.cpp ./service.cpp ./error_handling.cpp \ ./Logger.cpp \ + ./OneCCL.cpp ./OneDAL.cpp ./service.cpp ./error_handling.cpp \ ./KMeansImpl.cpp \ ./PCAImpl.cpp \ ./ALSDALImpl.cpp ./ALSShuffle.cpp \ @@ -99,6 +99,7 @@ CPP_SRCS += \ ./oneapi/dal/RowAccessorImpl.cpp OBJS += \ + ./Logger.o\ ./OneCCL.o ./OneDAL.o ./service.o ./error_handling.o \ ./KMeansImpl.o \ ./PCAImpl.o \ From 8d9e900d32135cdc3f28f1bea73f2820acd06290 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Sun, 25 Jun 2023 19:45:17 -0700 Subject: [PATCH 04/48] Correlation tmp --- mllib-dal/src/main/native/CorrelationImpl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index f42811d36..256c7d050 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -55,8 +55,7 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "Correleation (native): local step took " << duration / 1000 - << " secs" << std::endl; + print(INFO, "Correleation (native): local step took %d secs\n", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -82,6 +81,7 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); + print(INFO, "Correleation (native): ccl_allgatherv took " std::cout << "Correleation (native): ccl_allgatherv took " << duration / 1000 << " secs" << std::endl; if (isRoot) { From 8f6c6a42a824d969b3938e1351715d2d9757a129 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Sun, 2 Jul 2023 23:53:13 -0700 Subject: [PATCH 05/48] Add table output --- mllib-dal/src/main/native/ALSDALImpl.cpp | 38 +++++++-------- mllib-dal/src/main/native/ALSShuffle.cpp | 4 +- mllib-dal/src/main/native/CorrelationImpl.cpp | 20 ++++---- .../src/main/native/LinearRegressionImpl.cpp | 2 +- mllib-dal/src/main/native/Logger.cpp | 48 ++++++++++++++++++- mllib-dal/src/main/native/Logger.h | 7 ++- 6 files changed, 87 insertions(+), 32 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index c87cd99f7..363f0697c 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -214,7 +214,7 @@ void initializeStep2Local( void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nUsers, size_t nFactors) { - print(INFO, "ALS (native): initializeModel \n"); + logger::print(logger::INFO, "ALS (native): initializeModel \n"); auto t1 = std::chrono::high_resolution_clock::now(); @@ -233,7 +233,7 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - print(INFO, "ALS (native): initializeModel took %d secs\n", duration); + logger::print(logger::INFO, "ALS (native): initializeModel took %d secs\n", duration); } training::DistributedPartialResultStep1Ptr computeStep1Local( @@ -315,7 +315,7 @@ computeStep4Local(const CSRNumericTablePtr &dataTable, void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nFactors, size_t maxIterations) { - print(INFO, "ALS (native): trainModel\n"); + logger::print(logger::INFO, "ALS (native): trainModel\n"); auto tStart = std::chrono::high_resolution_clock::now(); @@ -424,13 +424,13 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - print(INFO, "ALS (native): iteration %d took %f secs\n",iteration ,duration); + logger::print(logger::INFO, "ALS (native): iteration %d took %f secs\n",iteration ,duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); - print(INFO, "ALS (native): trainModel took %d secs\n", durationTotal); + logger::print(logger::INFO, "ALS (native): trainModel took %d secs\n", durationTotal); } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { @@ -448,7 +448,7 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { // cout << "cShuffleData: rank " << rankId << endl; - print(INFO, "RATING_SIZE: %d\n", RATING_SIZE); + logger::print(logger::INFO, "RATING_SIZE: %d\n", RATING_SIZE); ccl::communicator &comm = getComm(); @@ -492,18 +492,18 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( dataTable = *((CSRNumericTablePtr *)numTableAddr); - print(INFO, "ALS (native): Input info: "); - print(INFO, "- NumberOfRows: %d\n", dataTable->getNumberOfRows()); - print(INFO, "- NumberOfColumns: %d\n", dataTable->getNumberOfColumns()); - print(INFO, "- NumberOfRatings: %d\n", dataTable->getDataSize()); - print(INFO, "- fullNUsers: %d\n", nUsers); - print(INFO, "- nFactors: %d\n", nFactors); + logger::print(logger::INFO, "ALS (native): Input info: "); + logger::print(logger::INFO, "- NumberOfRows: %d\n", dataTable->getNumberOfRows()); + logger::print(logger::INFO, "- NumberOfColumns: %d\n", dataTable->getNumberOfColumns()); + logger::print(logger::INFO, "- NumberOfRatings: %d\n", dataTable->getDataSize()); + logger::print(logger::INFO, "- fullNUsers: %d\n", nUsers); + logger::print(logger::INFO, "- nFactors: %d\n", nFactors); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - print(INFO, "oneDAL (native): Number of CPU threads used: %d\n", nThreadsNew); + logger::print(logger::INFO, "oneDAL (native): Number of CPU threads used: %d\n", nThreadsNew); int nBlocks = executor_num; initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); @@ -514,16 +514,16 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1) ->getFactors(); - print(INFO, "\n"); - print(INFO, "=== Results for Rank %d ===\n", rankId); - print(INFO, "\n"); + logger::print(logger::INFO, "\n"); + logger::print(logger::INFO, "=== Results for Rank %d ===\n", rankId); + logger::print(logger::INFO, "\n"); printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - print(INFO, "User Offset: %d\n", getOffsetFromOffsetTable(userOffset)); - print(INFO, "Item Offset: %d\n", getOffsetFromOffsetTable(itemOffset)); - print(INFO, "\n"); + logger::print(logger::INFO, "User Offset: %d\n", getOffsetFromOffsetTable(userOffset)); + logger::print(logger::INFO, "Item Offset: %d\n", getOffsetFromOffsetTable(itemOffset)); + logger::print(logger::INFO, "\n"); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 1da81a93c..9cb073223 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -78,7 +78,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // perNodeSendLens[i] << endl; sendBufSize += perNodeSendLens[i]; } - print(INFO, "sendData size %d\n", sendBufSize); + logger::print(logger::INFO, "sendData size %d\n", sendBufSize); sendData.resize(sendBufSize); // Fill in send buffer @@ -121,7 +121,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // std::distance(recvData.begin(), iter); newCsrRowNum = distinct_count(recvData); - print(INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", newRatingsNum, newCsrRowNum); + logger::print(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", newRatingsNum, newCsrRowNum); return recvData.data(); } diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 256c7d050..e15ae17ee 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -25,6 +25,8 @@ #include "com_intel_oap_mllib_stat_CorrelationDALImpl.h" #include "service.h" +#include "Logger.h" + using namespace std; #ifdef CPU_GPU_PROFILE namespace covariance_gpu = oneapi::dal::covariance; @@ -55,7 +57,7 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - print(INFO, "Correleation (native): local step took %d secs\n", duration / 1000); + logger::print(logger::INFO, "Correleation (native): local step took %d secs\n", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -81,9 +83,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - print(INFO, "Correleation (native): ccl_allgatherv took " - std::cout << "Correleation (native): ccl_allgatherv took " - << duration / 1000 << " secs" << std::endl; + logger::print(logger::INFO, "Correleation (native): ccl_allgatherv took %d secs\n", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -121,8 +122,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Correlation (native): master step took " - << duration / 1000 << " secs" << std::endl; + logger::print(logger::INFO, "Correleation (native): master step took %d secs\n", + duration / 1000); /* Print the results */ printNumericTable(result->get(covariance_cpu::correlation), @@ -149,7 +150,7 @@ static void doCorrelationOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::print(logger::INFO, "oneDAL (native): GPU compute start\n"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -160,7 +161,10 @@ static void doCorrelationOneAPICompute( auto t1 = std::chrono::high_resolution_clock::now(); const auto result_train = preview::compute(comm, cor_desc, htable); if (isRoot) { - std::cout << "Mean:\n" << result_train.get_means() << std::endl; + logger::print(logger::INFO, "Mean:\n"); + logger::print(logger::INFO, result_train.get_means()); + logger::print(logger::INFO, "Correlation:\n"); + logger::print(logger::INFO, result_train.get_cor_matrix()); std::cout << "Correlation:\n" << result_train.get_cor_matrix() << std::endl; auto t2 = std::chrono::high_resolution_clock::now(); diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 1849a2955..af0746149 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -221,7 +221,7 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { - print(INFO, "oneDAL (native): GPU compute start , rankid %d\n", rankId); + logger::print(logger::INFO, "oneDAL (native): GPU compute start , rankid %d\n", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 395008f62..c642bc2a0 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -1,6 +1,10 @@ -#include "Logger.h" +#include +#include #include +#include "Logger.h" + +namespace logger{ std::tuple get_prefix(MessageType message_type) { std::string prefix; bool enable{true}; @@ -54,6 +58,7 @@ int print(MessageType message_type, const std::string &msg) { int ret = print2stream(message_type, stdout, msg.c_str()); return ret; } + int print(MessageType message_type, const char *format, ...) { va_list args; va_start(args, format); @@ -61,3 +66,44 @@ int print(MessageType message_type, const char *format, ...) { va_end(args); return ret; } + +int print(MessageType message_type, const oneapi::dal::table &table) { + auto [prefix, enable] = get_prefix(message_type); + if (!enable) + return 0; + + auto arr = oneapi::dal::row_accessor(table).pull(); + const auto x = arr.get_data(); + if (table.get_row_count() <= 10) { + for (std::int64_t i = 0; i < table.get_row_count(); i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + fprintf(stdout, "%s", prefix.c_str()); + std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) + << std::setprecision(6) << x[i * table.get_column_count() + j]; + } + std::cout << std::endl; + } + } + else { + for (std::int64_t i = 0; i < 5; i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + fprintf(stdout, "%s", prefix.c_str()); + std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) + << std::setprecision(6) << x[i * table.get_column_count() + j]; + } + std::cout << std::endl; + } + fprintf(stdout, "%s", prefix.c_str()); + std::cout << "..." << (table.get_row_count() - 10) << " lines skipped..." << std::endl; + for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + fprintf(stdout, "%s", prefix.c_str()); + std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) + << std::setprecision(6) << x[i * table.get_column_count() + j]; + } + std::cout << std::endl; + } + } + return 0; +} +}; diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 1c137854c..42b3ac4d5 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -1,9 +1,12 @@ #pragma once -#include "iostream" #include #include +#include "oneapi/dal/table/row_accessor.hpp" +#include "oneapi/dal/table/common.hpp" + +namespace logger{ // message type for print functions enum MessageType { NONE = 0, @@ -16,3 +19,5 @@ enum MessageType { int print(MessageType message_type, const std::string &msg); int print(MessageType message_type, const char *format, ...); +int print(MessageType message_type, const oneapi::dal::table &table); +}; From b382d2b39c0b33f9fad16b883d495e866d0334eb Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 18:50:26 -0700 Subject: [PATCH 06/48] Use looger function to print table --- mllib-dal/src/main/native/Logger.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index c642bc2a0..5442c7881 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -71,37 +71,35 @@ int print(MessageType message_type, const oneapi::dal::table &table) { auto [prefix, enable] = get_prefix(message_type); if (!enable) return 0; + FILE *output = stdout; auto arr = oneapi::dal::row_accessor(table).pull(); const auto x = arr.get_data(); if (table.get_row_count() <= 10) { for (std::int64_t i = 0; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { - fprintf(stdout, "%s", prefix.c_str()); - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; + fprintf(output, "%s", prefix.c_str()); + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - std::cout << std::endl; + fprintf(output, "\n"); } } else { for (std::int64_t i = 0; i < 5; i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(stdout, "%s", prefix.c_str()); - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - std::cout << std::endl; + fprintf(output, "\n"); } - fprintf(stdout, "%s", prefix.c_str()); - std::cout << "..." << (table.get_row_count() - 10) << " lines skipped..." << std::endl; + fprintf(output, "%s", prefix.c_str()); + fprintf(output, "...%d lines skipped...\n", (table.get_row_count() - 10)); for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(stdout, "%s", prefix.c_str()); - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - std::cout << std::endl; + fprintf(output, "\n"); } } return 0; From f132289a8052acbaf4564fcf9112d567f358f6df Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 18:59:46 -0700 Subject: [PATCH 07/48] Correlation done --- mllib-dal/src/main/native/CorrelationImpl.cpp | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index e15ae17ee..35a14e244 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -165,14 +165,12 @@ static void doCorrelationOneAPICompute( logger::print(logger::INFO, result_train.get_means()); logger::print(logger::INFO, "Correlation:\n"); logger::print(logger::INFO, result_train.get_cor_matrix()); - std::cout << "Correlation:\n" - << result_train.get_cor_matrix() << std::endl; auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Correlation batch(native): computing step took " - << duration / 1000 << " secs." << std::endl; + logger::print(logger::INFO, "Correlation batch(native): computing step took %d secs.\n", + duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -195,9 +193,9 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::print(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s\n", + ComputeDeviceString[computeDeviceOrdinal]); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -210,8 +208,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used" - << nThreadsNew << std::endl; + logger::print(logger::INFO, "oneDAL (native): Number of CPU threads used %d\n", + nThreadsNew); doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -219,9 +217,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::print(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d\n", + nGPU, rankID); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -240,7 +237,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; + logger::print(logger::ERROR, "no supported device!\n"); exit(-1); } } From a72496e45e26e5d0ed68efa976aa403925cbc62f Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 19:09:11 -0700 Subject: [PATCH 08/48] Add println --- mllib-dal/src/main/native/Logger.cpp | 15 +++++++++++++++ mllib-dal/src/main/native/Logger.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 5442c7881..cb02dd930 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -67,6 +67,21 @@ int print(MessageType message_type, const char *format, ...) { return ret; } +int println(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stdout, msg.c_str()); + fprintf(stdout, "\n"); + return ret; +} + +int println(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stdout, format, args); + va_end(args); + fprintf(stdout, "\n"); + return ret; +} + int print(MessageType message_type, const oneapi::dal::table &table) { auto [prefix, enable] = get_prefix(message_type); if (!enable) diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 42b3ac4d5..831c179b5 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -20,4 +20,6 @@ enum MessageType { int print(MessageType message_type, const std::string &msg); int print(MessageType message_type, const char *format, ...); int print(MessageType message_type, const oneapi::dal::table &table); +int println(MessageType message_type, const char *format, ...); +int println(MessageType message_type, const std::string &msg); }; From 7fd20328bafa5cbd5f5c1bfddb407a2dc81f3711 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 19:24:31 -0700 Subject: [PATCH 09/48] Fix typo --- mllib-dal/src/main/native/CorrelationImpl.cpp | 4 ++-- mllib-dal/src/main/native/Logger.cpp | 2 +- mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 35a14e244..6f47f06f1 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -194,7 +194,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::print(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s\n", - ComputeDeviceString[computeDeviceOrdinal]); + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -218,7 +218,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); logger::print(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d\n", - nGPU, rankID); + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index cb02dd930..2206df94a 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -108,7 +108,7 @@ int print(MessageType message_type, const oneapi::dal::table &table) { fprintf(output, "\n"); } fprintf(output, "%s", prefix.c_str()); - fprintf(output, "...%d lines skipped...\n", (table.get_row_count() - 10)); + fprintf(output, "...%ld lines skipped...\n", (table.get_row_count() - 10)); for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(stdout, "%s", prefix.c_str()); diff --git a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp index 69149c106..49f8314d6 100644 --- a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp @@ -14,7 +14,6 @@ * limitations under the License. *******************************************************************************/ #include -#include #include #include #include @@ -28,6 +27,7 @@ #include "com_intel_oneapi_dal_table_HomogenTableImpl.h" #include "service.h" +#include "Logger.h" using namespace std; using namespace oneapi::dal; From b4d64e8f2663aa31786d99d1a33f75942f223cc0 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 19:28:20 -0700 Subject: [PATCH 10/48] HomogenTable done --- .../src/main/native/oneapi/dal/HomogenTableImpl.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp index 49f8314d6..acfd5acb8 100644 --- a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp @@ -129,7 +129,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( printf("HomogenTable int init \n"); jint *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::println(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -173,7 +173,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( printf("HomogenTable float init \n"); jfloat *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::println(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -216,7 +216,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( printf("HomogenTable double init \n"); jdouble *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::println(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -260,7 +260,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( printf("HomogenTable long init \n"); jlong *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::println(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -460,7 +460,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom const auto targetDataType = targetMetaData.get_data_type(0); const auto sourceDataType = sourceMetaData.get_data_type(0); if( targetDataType != sourceDataType ) { - std::cout << "different data type" << std::endl; + logger::println(logger::ERROR, "different data type"); exit(-1); } else { switch(targetDataType){ @@ -477,7 +477,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom return MergeHomogenTable(targetTable, sourceTable, cComputeDevice); } default: { - std::cout << "no base type" << std::endl; + logger::println(logger::ERROR, "no base type"); exit(-1); } } From 97ae54a8acba460590ca5b59e2a4d084ee156e60 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 19:49:35 -0700 Subject: [PATCH 11/48] Summarizer done --- mllib-dal/src/main/native/Logger.cpp | 1 - mllib-dal/src/main/native/OneDAL.cpp | 2 +- mllib-dal/src/main/native/SummarizerImpl.cpp | 46 ++++++++++---------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 2206df94a..168b4394c 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "Logger.h" diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index ff2323031..057e91025 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -58,7 +58,7 @@ JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetDoubleBatch( ((SerializationIfacePtr *)numTableAddr)->get()); jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); if (values == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::println(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } std::memcpy((*nt)[curRows], values, numRows * numCols * sizeof(double)); diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 1e99be460..6a08d62b7 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -24,6 +24,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_stat_SummarizerDALImpl.h" #include "service.h" +#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -39,7 +40,7 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, ccl::communicator &comm, const NumericTablePtr &pData, size_t nBlocks, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): CPU compute start"); using daal::byte; auto t1 = std::chrono::high_resolution_clock::now(); @@ -56,8 +57,7 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "low_order_moments (native): local step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "low_order_moments (native): local step took %d secs", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -83,8 +83,7 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "low_order_moments (native): ccl_gather took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "low_order_moments (native): ccl_gather took %d secs", duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -122,8 +121,7 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "low_order_moments (native): master step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "low_order_moments (native): master step took %d secs", duration / 1000); /* Print the results */ printNumericTable(result->get(low_order_moments::mean), @@ -202,7 +200,7 @@ static void doSummarizerOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -210,17 +208,20 @@ static void doSummarizerOneAPICompute( auto t1 = std::chrono::high_resolution_clock::now(); const auto result_train = preview::compute(comm, bs_desc, htable); if (isRoot) { - std::cout << "Minimum:\n" << result_train.get_min() << std::endl; - std::cout << "Maximum:\n" << result_train.get_max() << std::endl; - std::cout << "Mean:\n" << result_train.get_mean() << std::endl; - std::cout << "Variance:\n" << result_train.get_variance() << std::endl; + logger::println(logger::INFO, "Minimum"); + logger::print(logger::INFO, result_train.get_min()); + logger::println(logger::INFO, "Maximum"); + logger::print(logger::INFO, result_train.get_max()); + logger::println(logger::INFO, "Mean"); + logger::print(logger::INFO, result_train.get_mean()); + logger::println(logger::INFO, "Variation"); + logger::print(logger::INFO, result_train.get_variance()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = (float)std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Summarizer (native): computing step took " - << duration / 1000 << " secs." << std::endl; + logger::println(logger::INFO, "Summarizer (native): computing step took %d secs", duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -260,9 +261,9 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -275,8 +276,8 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doSummarizerDAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -284,9 +285,8 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -305,7 +305,7 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; + logger::println(logger::ERROR, "no supported device!"); exit(-1); } } From 3f30bd5efa913ff476a19c0bd44db9092a9ffd5a Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 23:01:05 -0700 Subject: [PATCH 12/48] Kmeans done --- mllib-dal/src/main/native/KMeansImpl.cpp | 28 ++++++++++++------------ mllib-dal/src/main/native/OneDAL.cpp | 1 + 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index a7a40a90f..a0a75b8bb 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -26,6 +26,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_clustering_KMeansDALImpl.h" #include "service.h" +#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -180,7 +181,7 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, NumericTablePtr ¢roids, jint cluster_num, jdouble tolerance, jint iteration_num, jint executor_num, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): CPU compute start"); algorithmFPType totalCost; NumericTablePtr newCentroids; @@ -208,8 +209,8 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "KMeans (native): iteration " << it << " took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "KMeans (native): iteration %d took %d secs", + it, duration / 1000); } if (rankId == ccl_root) { @@ -268,8 +269,8 @@ static jlong doKMeansOneAPICompute( auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "KMeans (native): training step took " << duration / 1000 - << " secs." << std::endl; + logger::println(logger::INFO, "KMeans (native): training step took %d secs", + duration / 1000); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); // Get Field references @@ -304,9 +305,9 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + jlong ret = 0L; ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -321,8 +322,8 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); ret = doKMeansDaalCompute(env, obj, rankId, cclComm, pData, centroids, clusterNum, tolerance, iterationNum, executorNum, resultObj); @@ -331,9 +332,8 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -355,7 +355,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe } #endif default: { - std::cout << "no supported device!" << std::endl; + logger::println(logger::ERROR, "no supported device!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index 057e91025..37f37e4e7 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -19,6 +19,7 @@ #include "com_intel_oap_mllib_OneDAL__.h" #include "service.h" +#include "Logger.h" using namespace daal; using namespace daal::data_management; From ae47e71073f177a81309816ffa39607d469ed799 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 23:07:09 -0700 Subject: [PATCH 13/48] KMeans done --- mllib-dal/src/main/native/KMeansImpl.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index a0a75b8bb..16ac2551f 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -215,11 +215,11 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, if (rankId == ccl_root) { if (it == iteration_num) - std::cout << "KMeans (native): reached " << iteration_num - << " max iterations." << std::endl; + logger::println(logger::INFO, "KMeans (native): reached %d max iterations.", + iteration_num); else - std::cout << "KMeans (native): converged in " << it - << " iterations." << std::endl; + logger::println(logger::INFO, "KMeans (native): converged in %d iterations.", + iteration_num); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -246,7 +246,7 @@ static jlong doKMeansOneAPICompute( jdouble tolerance, jint iterationNum, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -261,10 +261,9 @@ static jlong doKMeansOneAPICompute( kmeans_gpu::train_result result_train = preview::train(comm, kmeans_desc, local_input); if (isRoot) { - std::cout << "Iteration count: " << result_train.get_iteration_count() - << std::endl; - std::cout << "Centroids:\n" - << result_train.get_model().get_centroids() << std::endl; + logger::println(logger::INFO, "Iteration count: %d", result_train.get_iteration_count()); + logger::println(logger::INFO, "Centroids:"); + logger::print(logger::INFO, result_train.get_model().get_centroids()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) From 78fe30fa80cf9bea506aa73f642ff26737433a1c Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 23:12:29 -0700 Subject: [PATCH 14/48] error_handling done --- mllib-dal/src/main/native/error_handling.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index 45967009a..dc238259e 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -21,39 +21,39 @@ !******************************************************************************/ #include "error_handling.h" +#include "Logger.h" #include void checkAllocation(void *ptr) { if (!ptr) { - std::cout << "Error: Memory allocation failed" << std::endl; + logger::println(logger::ERROR, "Error: Memory allocation failed"); exit(-1); } } void checkPtr(void *ptr) { if (!ptr) { - std::cout << "Error: NULL pointer" << std::endl; + logger::println(logger::ERROR, "Error: NULL pointer"); exit(-2); } } void fileOpenError(const char *filename) { - std::cout << "Unable to open file '" << filename << "'" << std::endl; + logger::println(logger::ERROR, "Unable to open file '%s'", filename); exit(fileError); } void fileReadError() { - std::cout << "Unable to read next line" << std::endl; + logger::println(logger::ERROR, "Unable to read next line"); exit(fileError); } void sparceFileReadError() { - std::cout << "Incorrect format of file" << std::endl; + logger::println(logger::ERROR, "Incorrect format of file"); exit(fileError); } void deviceError() { - std::cout << "Error: no supported device, please select HOST/CPU/GPU" - << std::endl; + logger::println(logger::ERROR, "Error: no supported device, please select HOST/CPU/GPU"); exit(-1); } From a39911e9d85a234d5b1f1e352a206254eb08a327 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Mon, 3 Jul 2023 23:46:43 -0700 Subject: [PATCH 15/48] DFR and NB --- .../native/DecisionForestRegressorImpl.cpp | 38 +++++++++---------- .../src/main/native/NaiveBayesDALImpl.cpp | 23 +++++------ 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 1cc5e17a2..fb42074e6 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -29,6 +29,7 @@ #include "com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" #include "service.h" +#include "Logger.h" using namespace std; using namespace oneapi::dal; @@ -179,11 +180,9 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID learningNodeConstructor = env->GetMethodID(learningNodeClass, "", "()V"); - std::cout << "Number of trees: " << m.get_tree_count() << std::endl; + logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - std::cout - << "Iterate over the C++ map and add each entry to the Java map" - << std::endl; + logger::println(logger::INFO, "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -193,7 +192,7 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID mapPut = env->GetMethodID( mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); - std::cout << "convertJavaMap tree id = " << i << std::endl; + logger::println(logger::INFO, "convertJavaMap tree id = %d", i); // Create a new Integer object with the value key jobject jKey = env->NewObject( env->FindClass("java/lang/Integer"), // Find the Integer class @@ -212,14 +211,14 @@ static jobject doRFRegressorOneAPICompute( jboolean bootstrap, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table hFeaturetable = *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - std::cout << "doRFRegressorOneAPICompute get_column_count = " - << hFeaturetable.get_column_count() << std::endl; + logger::println(logger::INFO, "doRFRegressorOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); const auto df_desc = df::descriptor{} .set_tree_count(treeCount) @@ -238,11 +237,12 @@ static jobject doRFRegressorOneAPICompute( preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable); jobject trees = nullptr; if (isRoot) { - std::cout << "Variable importance results:\n" - << result_train.get_var_importance() << std::endl; - std::cout << "OOB error: " << result_train.get_oob_err() << std::endl; - std::cout << "Prediction results:\n" - << result_infer.get_responses() << std::endl; + logger::println(logger::INFO, "Variable importance results:"); + logger::print(logger::INFO, result_train.get_var_importance()); + logger::println(logger::INFO, "OOB error:"); + logger::print(logger::INFO, result_train.get_oob_err()); + logger::println(logger::INFO, "Prediction results:"); + logger::print(logger::INFO, result_infer.get_responses()); // convert c++ map to java hashmap jint statsSize = 3; // spark create VarianceCalculator needs array of @@ -291,16 +291,17 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -321,8 +322,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra return hashmapObj; } default: { - std::cout << "RandomForest (native): The compute device " - << "is not supported!" << std::endl; + logger::println(logger::ERROR, "RandomForest (native): The compute device is not supported!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp index 305a867df..85c95c4cd 100644 --- a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp +++ b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp @@ -3,6 +3,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_classification_NaiveBayesDALImpl.h" #include "service.h" +#include "Logger.h" #define PROFILE 1 @@ -137,34 +138,28 @@ Java_com_intel_oap_mllib_classification_NaiveBayesDALImpl_cNaiveBayesDALCompute( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; - + logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); auto t1 = std::chrono::high_resolution_clock::now(); // Support both dense and csr numeric table training::ResultPtr trainingResult; if (featuresTab->getDataLayout() == NumericTable::StorageLayout::csrArray) { - cout << "oneDAL (native): training model with fastCSR method" << endl; + logger::println(logger::INFO, "oneDAL (native): training model with fastCSR method"); trainingResult = trainModel(comm, featuresTab, labelsTab, class_num); } else { - cout << "oneDAL (native): training model with defaultDense method" - << endl; + logger::println(logger::INFO, "oneDAL (native): training model with defaultDense method"); trainingResult = trainModel( comm, featuresTab, labelsTab, class_num); } - cout << "oneDAL (native): training model finished" << endl; + logger::println(logger::INFO, "oneDAL (native): training model finished"); auto t2 = std::chrono::high_resolution_clock::now(); - - std::cout << "training took " - << (float)std::chrono::duration_cast( - t2 - t1) - .count() / - 1000 - << " secs" << std::endl; + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + logger::println(logger::INFO, "training took %d secs", duration / 1000); if (rankId == ccl_root) { multinomial_naive_bayes::ModelPtr model = From f84cfdf78315d7c3d1c23b8c70419569419d2666 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 4 Jul 2023 18:08:50 -0700 Subject: [PATCH 16/48] GPU done --- mllib-dal/src/main/native/GPU.cpp | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 4205b8c26..303aa47d5 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -3,6 +3,7 @@ #include #include "GPU.h" +#include "Logger.h" typedef std::shared_ptr queuePtr; @@ -17,7 +18,7 @@ static std::vector get_gpus() { return devices; } } - std::cout << "No GPUs!" << std::endl; + logger::println(logger::ERROR, "No GPUs!"); exit(-1); return {}; @@ -70,23 +71,19 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - std::cout - << "Not implemented for HOST/CPU device, Please run on GPU device." - << std::endl; + logger::println(logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { - std::cout << "selector GPU" << std::endl; + logger::println(logger::INFO, "selector GPU"); auto local_rank = getLocalRank(comm, size, rankId); auto gpus = get_gpus(); - std::cout << "rank: " << rankId << " size: " << size - << " local_rank: " << local_rank << " n_gpu: " << n_gpu - << std::endl; + logger::println(logger::INFO, "rank: %d size: %d local_rank: %d n_gpu: %d", + rankId, size, local_rank, n_gpu); auto gpu_selected = gpu_indices[local_rank % n_gpu]; - std::cout << "GPU selected for current rank: " << gpu_selected - << std::endl; + logger::println(logger::INFO, "GPU selected for current rank: %d", gpu_selected); // In case gpu_selected index is larger than number of GPU SYCL devices auto rank_gpu = gpus[gpu_selected % gpus.size()]; @@ -95,7 +92,7 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, } default: { - std::cout << "No Device!" << std::endl; + logger::println(logger::ERROR, "No Device!"); exit(-1); } } @@ -107,19 +104,17 @@ sycl::queue getQueue(const ComputeDevice device) { switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - std::cout << "Not implemented for HOST/CPU device, Please run on " - "GPU device." - << std::endl; + logger::println(logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { - std::cout << "selector GPU" << std::endl; + logger::println(logger::INFO, "selector GPU"); auto device_gpu = sycl::gpu_selector{}.select_device(); - std::cout << "selector GPU end" << std::endl; + logger::println(logger::INFO, "selector GPU end"); return getSyclQueue(device_gpu); } default: { - std::cout << "No Device!" << std::endl; + logger::println(logger::ERROR, "No Device!"); exit(-1); } } From a424d7dbb72e41e10eb8936fb802f7162be37757 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 4 Jul 2023 18:12:53 -0700 Subject: [PATCH 17/48] LR done --- mllib-dal/src/main/native/ALSDALImpl.cpp | 2 +- mllib-dal/src/main/native/GPU.cpp | 2 +- .../src/main/native/LinearRegressionImpl.cpp | 15 +++++++-------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 363f0697c..5021c1fcf 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -447,7 +447,7 @@ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { - // cout << "cShuffleData: rank " << rankId << endl; + //logger::println(logger::DEBUG, "cShuffleData: rank %d", rankId); logger::print(logger::INFO, "RATING_SIZE: %d\n", RATING_SIZE); ccl::communicator &comm = getComm(); diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 303aa47d5..307efdf1f 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -99,7 +99,7 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, } sycl::queue getQueue(const ComputeDevice device) { - std::cout << "Get Queue" << std::endl; + logger::println(logger::INFO, "Get Queue"); switch (device) { case ComputeDevice::host: diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index af0746149..90ace2fb0 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -262,9 +262,8 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); @@ -279,9 +278,9 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra if (useGPU) { #ifdef CPU_GPU_PROFILE int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); + jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); int size = cclComm.size(); auto queue = @@ -303,8 +302,8 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; + logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); if (regParam == 0) { resultTable = linear_regression_compute( rankId, cclComm, pData, pLabel, fitIntercept, executorNum); From badf3540df0f189f6e912d3ef93463f09e11cf42 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 4 Jul 2023 18:44:52 -0700 Subject: [PATCH 18/48] PCA and DF classifier done --- mllib-dal/src/main/native/Common.hpp | 1 - .../native/DecisionForestClassifierImpl.cpp | 46 +++++++-------- mllib-dal/src/main/native/OutputHelpers.hpp | 56 ------------------- mllib-dal/src/main/native/PCAImpl.cpp | 48 +++++++--------- mllib-dal/src/main/native/Profile.hpp | 8 +-- 5 files changed, 48 insertions(+), 111 deletions(-) delete mode 100644 mllib-dal/src/main/native/OutputHelpers.hpp diff --git a/mllib-dal/src/main/native/Common.hpp b/mllib-dal/src/main/native/Common.hpp index baaf4b234..5ead8c8c1 100644 --- a/mllib-dal/src/main/native/Common.hpp +++ b/mllib-dal/src/main/native/Common.hpp @@ -22,5 +22,4 @@ #include "GPU.h" #include "Communicator.hpp" -#include "OutputHelpers.hpp" #include "oneapi/dal/table/homogen.hpp" diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index fa8556b27..3b7a7253d 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -29,6 +29,7 @@ #include "com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" #include "service.h" +#include "Logger.h" using namespace std; using namespace oneapi::dal; @@ -180,11 +181,9 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID learningNodeConstructor = env->GetMethodID(learningNodeClass, "", "()V"); - std::cout << "Number of trees: " << m.get_tree_count() << std::endl; + logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - std::cout - << "Iterate over the C++ map and add each entry to the Java map" - << std::endl; + logger::println(logger::INFO, "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -194,7 +193,7 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID mapPut = env->GetMethodID( mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); - std::cout << "convertJavaMap tree id = " << i << std::endl; + logger::println(logger::INFO, "convertJavaMap tree id = %d", i); // Create a new Integer object with the value key jobject jKey = env->NewObject( env->FindClass("java/lang/Integer"), // Find the Integer class @@ -215,16 +214,17 @@ static jobject doRFClassifierOneAPICompute( jint maxBins, jboolean bootstrap, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table hFeaturetable = *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - std::cout << "doRFClassifierOneAPICompute get_column_count = " - << hFeaturetable.get_column_count() << std::endl; - std::cout << "doRFClassifierOneAPICompute classCount = " << classCount - << std::endl; + logger::println(logger::INFO, "doRFClassifierOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); + logger::println(logger::INFO, "doRFClassifierOneAPICompute classCount = %d", + classCount); + const auto df_desc = df::descriptor{} .set_class_count(classCount) @@ -249,13 +249,14 @@ static jobject doRFClassifierOneAPICompute( preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable); jobject trees = nullptr; if (isRoot) { - std::cout << "Variable importance results:\n" - << result_train.get_var_importance() << std::endl; - std::cout << "OOB error: " << result_train.get_oob_err() << std::endl; - std::cout << "Prediction results:\n" - << result_infer.get_responses() << std::endl; - std::cout << "Probabilities results:\n" - << result_infer.get_probabilities() << std::endl; + logger::println(logger::INFO, "Variable importance results:"); + logger::print(logger::INFO, result_train.get_var_importance()); + logger::println(logger::INFO, "OOB error:"); + logger::print(logger::INFO, result_train.get_oob_err()); + logger::println(logger::INFO, "Prediction results:"); + logger::print(logger::INFO, result_infer.get_responses()); + logger::println(logger::INFO, "Probabilities results:\n"); + logger::print(logger::INFO, result_infer.get_probabilities()); // convert to java hashmap trees = collect_model(env, result_train.get_model(), classCount); @@ -302,16 +303,16 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed, jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels"); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -334,8 +335,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif return hashmapObj; } default: { - std::cout << "RandomForest (native): The compute device " - << "is not supported!" << std::endl; + logger::println(logger::ERROR, "RandomForest (native): The compute device is not supported!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/OutputHelpers.hpp b/mllib-dal/src/main/native/OutputHelpers.hpp deleted file mode 100644 index e86f0667f..000000000 --- a/mllib-dal/src/main/native/OutputHelpers.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/******************************************************************************* - * Copyright 2021 Intel Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ - -#pragma once - -#include -#include - -#include "oneapi/dal/table/row_accessor.hpp" -#include "oneapi/dal/table/common.hpp" - -inline std::ostream &operator<<(std::ostream &stream, const oneapi::dal::table &table) { - auto arr = oneapi::dal::row_accessor(table).pull(); - const auto x = arr.get_data(); - - if (table.get_row_count() <= 10) { - for (std::int64_t i = 0; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - } - else { - for (std::int64_t i = 0; i < 5; i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - std::cout << "..." << (table.get_row_count() - 10) << " lines skipped..." << std::endl; - for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - } - return stream; -} diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 33a0aa2a7..ee4626c86 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -26,6 +26,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_feature_PCADALImpl.h" #include "service.h" +#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -42,7 +43,7 @@ typedef double algorithmFPType; /* Algorithm floating-point type */ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, ccl::communicator &comm, NumericTablePtr &pData, size_t nBlocks, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): CPU compute start"); using daal::byte; auto t1 = std::chrono::high_resolution_clock::now(); @@ -59,8 +60,7 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << " PCA (native): Covariance local step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): Covariance local step took %d secs", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -85,8 +85,7 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "PCA (native): Covariance gather to master took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): Covariance gather to master took %d secs", duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -125,8 +124,7 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): Covariance master step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): Covariance master step took %d secs", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -146,8 +144,7 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): master step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): master step took %d secs", duration / 1000); /* Print the results */ pca_cpu::ResultPtr result = algorithm.getResult(); @@ -183,7 +180,7 @@ static void doPCAOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -196,8 +193,7 @@ static void doPCAOneAPICompute( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "PCA (native): Covariance step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): Covariance step took %d secs", duration / 1000); if (isRoot) { using float_t = double; using method_t = pca_gpu::method::precomputed; @@ -212,8 +208,7 @@ static void doPCAOneAPICompute( duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): Eigen step took " << duration / 1000 - << " secs." << std::endl; + logger::println(logger::INFO, "PCA (native): Eigen step took %d secs", duration / 1000); // Return all eigenvalues & eigenvectors // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -222,10 +217,10 @@ static void doPCAOneAPICompute( env->GetFieldID(clazz, "pcNumericTable", "J"); jfieldID explainedVarianceNumericTableField = env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); - std::cout << "Eigenvectors:\n" - << result_train.get_eigenvectors() << std::endl; - std::cout << "Eigenvalues:\n" - << result_train.get_eigenvalues() << std::endl; + logger::println(logger::INFO, "Eigenvectors:"); + logger::print(logger::INFO, result_train.get_eigenvectors()); + logger::println(logger::INFO, "Eigenvalues:"); + logger::print(logger::INFO, result_train.get_eigenvalues()); HomogenTablePtr eigenvectors = std::make_shared(result_train.get_eigenvectors()); @@ -248,9 +243,9 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -263,8 +258,8 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doPCADAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -272,9 +267,8 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", + nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -293,7 +287,7 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; + logger::println(logger::ERROR, "no supported device!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/Profile.hpp b/mllib-dal/src/main/native/Profile.hpp index d790c4066..429e65b65 100644 --- a/mllib-dal/src/main/native/Profile.hpp +++ b/mllib-dal/src/main/native/Profile.hpp @@ -3,6 +3,7 @@ #include #include #include +#include "Logger.h" class Profiler { public: @@ -10,7 +11,7 @@ class Profiler { void startProfile(std::string s = "") { action = s; - std::cout << subject << " (native): start " << action << std::endl; + logger::println(logger::INFO, "%s (native): start %s", subject.c_str(), action.c_str()); startTime = std::chrono::high_resolution_clock::now(); } @@ -19,12 +20,11 @@ class Profiler { auto duration = std::chrono::duration_cast( end_time - startTime) .count(); - std::cout << subject << " (native): " << action << " took " << (float)duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "%s (native): start %s took %f secs", subject.c_str(), action.c_str(), (float)duration / 1000); } void println(std::string msg) { - std::cout << subject << " (native): " << msg << std::endl; + logger::println(logger::INFO, "%s (native): %s", subject.c_str(), msg.c_str()); } private: From 4ac5e55b46b86ed9a955005689c4e05cc18fd26a Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 4 Jul 2023 19:03:24 -0700 Subject: [PATCH 19/48] table --- mllib-dal/src/main/native/ALSShuffle.cpp | 12 ++++-------- mllib-dal/src/main/native/service.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 9cb073223..a307857b2 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -74,8 +74,8 @@ Rating *shuffle_all2all(ccl::communicator &comm, // Calculate send buffer size for (size_t i = 0; i < nBlocks; i++) { perNodeSendLens[i] = partitions[i].size() * RATING_SIZE; - // cout << "rank " << rankId << " Send partition " << i << " size " << - // perNodeSendLens[i] << endl; + // logger::println(logger::INFO, "rank %d Send partition %d size %d", + // rankId, i, perNodeSendLens[i]); sendBufSize += perNodeSendLens[i]; } logger::print(logger::INFO, "sendData size %d\n", sendBufSize); @@ -96,8 +96,8 @@ Rating *shuffle_all2all(ccl::communicator &comm, // Calculate recv buffer size for (size_t i = 0; i < nBlocks; i++) { - // cout << "rank " << rankId << " Recv partition " << i << " size " << - // perNodeRecvLens[i] << endl; + // logger::println(logger::INFO, "rank %d Reciv partition %d size %d", + // rankId, i, perNodeSendLens[i]); recvBufSize += perNodeRecvLens[i]; } @@ -111,10 +111,6 @@ Rating *shuffle_all2all(ccl::communicator &comm, sort(recvData.begin(), recvData.end(), compareRatingByUser); - // for (auto r : recvData) { - // cout << r.user << " " << r.item << " " << r.rating << endl; - // } - newRatingsNum = recvData.size(); // RatingPartition::iterator iter = std::unique(recvData.begin(), // recvData.end(), compareRatingUserEquality); newCsrRowNum = diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 66b179c6b..288f35243 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -42,7 +42,7 @@ using namespace daal::data_management; #include #include "error_handling.h" -#include "oneapi/dal/table/csr.hpp" +#include "oneapi/dal/table/detail/csr.hpp" #include "oneapi/dal/table/homogen.hpp" using namespace oneapi::dal; From 2c7afcf85a7b22213d647906806848e058ac3ff7 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 5 Jul 2023 09:50:41 +0000 Subject: [PATCH 20/48] Format cpp --- mllib-dal/src/main/native/ALSDALImpl.cpp | 30 +++++--- mllib-dal/src/main/native/ALSShuffle.cpp | 7 +- mllib-dal/src/main/native/CorrelationImpl.cpp | 36 ++++++---- .../native/DecisionForestClassifierImpl.cpp | 29 +++++--- .../native/DecisionForestRegressorImpl.cpp | 32 +++++---- mllib-dal/src/main/native/GPU.cpp | 16 +++-- mllib-dal/src/main/native/KMeansImpl.cpp | 45 +++++++----- .../src/main/native/LinearRegressionImpl.cpp | 21 +++--- mllib-dal/src/main/native/Logger.cpp | 69 ++++++++++--------- mllib-dal/src/main/native/Logger.h | 6 +- .../src/main/native/NaiveBayesDALImpl.cpp | 14 ++-- mllib-dal/src/main/native/OneDAL.cpp | 5 +- mllib-dal/src/main/native/PCAImpl.cpp | 43 ++++++++---- mllib-dal/src/main/native/SummarizerImpl.cpp | 42 +++++++---- mllib-dal/src/main/native/error_handling.cpp | 3 +- 15 files changed, 244 insertions(+), 154 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 5021c1fcf..8f5164504 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -233,7 +233,8 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "ALS (native): initializeModel took %d secs\n", duration); + logger::print(logger::INFO, "ALS (native): initializeModel took %d secs\n", + duration); } training::DistributedPartialResultStep1Ptr computeStep1Local( @@ -424,13 +425,15 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "ALS (native): iteration %d took %f secs\n",iteration ,duration); + logger::print(logger::INFO, "ALS (native): iteration %d took %f secs\n", + iteration, duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); - logger::print(logger::INFO, "ALS (native): trainModel took %d secs\n", durationTotal); + logger::print(logger::INFO, "ALS (native): trainModel took %d secs\n", + durationTotal); } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { @@ -447,7 +450,7 @@ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { - //logger::println(logger::DEBUG, "cShuffleData: rank %d", rankId); + // logger::println(logger::DEBUG, "cShuffleData: rank %d", rankId); logger::print(logger::INFO, "RATING_SIZE: %d\n", RATING_SIZE); ccl::communicator &comm = getComm(); @@ -493,9 +496,12 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( dataTable = *((CSRNumericTablePtr *)numTableAddr); logger::print(logger::INFO, "ALS (native): Input info: "); - logger::print(logger::INFO, "- NumberOfRows: %d\n", dataTable->getNumberOfRows()); - logger::print(logger::INFO, "- NumberOfColumns: %d\n", dataTable->getNumberOfColumns()); - logger::print(logger::INFO, "- NumberOfRatings: %d\n", dataTable->getDataSize()); + logger::print(logger::INFO, "- NumberOfRows: %d\n", + dataTable->getNumberOfRows()); + logger::print(logger::INFO, "- NumberOfColumns: %d\n", + dataTable->getNumberOfColumns()); + logger::print(logger::INFO, "- NumberOfRatings: %d\n", + dataTable->getDataSize()); logger::print(logger::INFO, "- fullNUsers: %d\n", nUsers); logger::print(logger::INFO, "- nFactors: %d\n", nFactors); @@ -503,7 +509,9 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::print(logger::INFO, "oneDAL (native): Number of CPU threads used: %d\n", nThreadsNew); + logger::print(logger::INFO, + "oneDAL (native): Number of CPU threads used: %d\n", + nThreadsNew); int nBlocks = executor_num; initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); @@ -521,8 +529,10 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( 20); printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - logger::print(logger::INFO, "User Offset: %d\n", getOffsetFromOffsetTable(userOffset)); - logger::print(logger::INFO, "Item Offset: %d\n", getOffsetFromOffsetTable(itemOffset)); + logger::print(logger::INFO, "User Offset: %d\n", + getOffsetFromOffsetTable(userOffset)); + logger::print(logger::INFO, "Item Offset: %d\n", + getOffsetFromOffsetTable(itemOffset)); logger::print(logger::INFO, "\n"); // Get the class of the input object diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index a307857b2..a6924a296 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -75,7 +75,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, for (size_t i = 0; i < nBlocks; i++) { perNodeSendLens[i] = partitions[i].size() * RATING_SIZE; // logger::println(logger::INFO, "rank %d Send partition %d size %d", - // rankId, i, perNodeSendLens[i]); + // rankId, i, perNodeSendLens[i]); sendBufSize += perNodeSendLens[i]; } logger::print(logger::INFO, "sendData size %d\n", sendBufSize); @@ -97,7 +97,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // Calculate recv buffer size for (size_t i = 0; i < nBlocks; i++) { // logger::println(logger::INFO, "rank %d Reciv partition %d size %d", - // rankId, i, perNodeSendLens[i]); + // rankId, i, perNodeSendLens[i]); recvBufSize += perNodeRecvLens[i]; } @@ -117,7 +117,8 @@ Rating *shuffle_all2all(ccl::communicator &comm, // std::distance(recvData.begin(), iter); newCsrRowNum = distinct_count(recvData); - logger::print(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", newRatingsNum, newCsrRowNum); + logger::print(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", + newRatingsNum, newCsrRowNum); return recvData.data(); } diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 6f47f06f1..d21f8f165 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -57,7 +57,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "Correleation (native): local step took %d secs\n", duration / 1000); + logger::print(logger::INFO, + "Correleation (native): local step took %d secs\n", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -83,8 +85,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "Correleation (native): ccl_allgatherv took %d secs\n", - duration / 1000); + logger::print(logger::INFO, + "Correleation (native): ccl_allgatherv took %d secs\n", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -122,8 +125,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::print(logger::INFO, "Correleation (native): master step took %d secs\n", - duration / 1000); + logger::print(logger::INFO, + "Correleation (native): master step took %d secs\n", + duration / 1000); /* Print the results */ printNumericTable(result->get(covariance_cpu::correlation), @@ -169,8 +173,10 @@ static void doCorrelationOneAPICompute( auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::print(logger::INFO, "Correlation batch(native): computing step took %d secs.\n", - duration / 1000); + logger::print( + logger::INFO, + "Correlation batch(native): computing step took %d secs.\n", + duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -193,8 +199,9 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::print(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s\n", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::print(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s\n", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -208,8 +215,9 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::print(logger::INFO, "oneDAL (native): Number of CPU threads used %d\n", - nThreadsNew); + logger::print(logger::INFO, + "oneDAL (native): Number of CPU threads used %d\n", + nThreadsNew); doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -217,8 +225,10 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::print(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d\n", - nGpu, rankId); + logger::print( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d\n", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index 3b7a7253d..0bc427dbd 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -25,11 +25,11 @@ #ifdef CPU_GPU_PROFILE #include "Common.hpp" +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" #include "service.h" -#include "Logger.h" using namespace std; using namespace oneapi::dal; @@ -183,7 +183,9 @@ jobject collect_model(JNIEnv *env, const df::model &m, logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - logger::println(logger::INFO, "Iterate over the C++ map and add each entry to the Java map"); + logger::println( + logger::INFO, + "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -220,10 +222,11 @@ static jobject doRFClassifierOneAPICompute( *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - logger::println(logger::INFO, "doRFClassifierOneAPICompute get_column_count = %d", - hFeaturetable.get_column_count()); + logger::println(logger::INFO, + "doRFClassifierOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); logger::println(logger::INFO, "doRFClassifierOneAPICompute classCount = %d", - classCount); + classCount); const auto df_desc = df::descriptor{} @@ -252,11 +255,11 @@ static jobject doRFClassifierOneAPICompute( logger::println(logger::INFO, "Variable importance results:"); logger::print(logger::INFO, result_train.get_var_importance()); logger::println(logger::INFO, "OOB error:"); - logger::print(logger::INFO, result_train.get_oob_err()); + logger::print(logger::INFO, result_train.get_oob_err()); logger::println(logger::INFO, "Prediction results:"); - logger::print(logger::INFO, result_infer.get_responses()); + logger::print(logger::INFO, result_infer.get_responses()); logger::println(logger::INFO, "Probabilities results:\n"); - logger::print(logger::INFO, result_infer.get_probabilities()); + logger::print(logger::INFO, result_infer.get_probabilities()); // convert to java hashmap trees = collect_model(env, result_train.get_model(), classCount); @@ -311,8 +314,10 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -335,7 +340,9 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif return hashmapObj; } default: { - logger::println(logger::ERROR, "RandomForest (native): The compute device is not supported!"); + logger::println( + logger::ERROR, + "RandomForest (native): The compute device is not supported!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index fb42074e6..87c5e3fad 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -25,11 +25,11 @@ #ifdef CPU_GPU_PROFILE #include "Common.hpp" +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" #include "service.h" -#include "Logger.h" using namespace std; using namespace oneapi::dal; @@ -182,7 +182,9 @@ jobject collect_model(JNIEnv *env, const df::model &m, logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - logger::println(logger::INFO, "Iterate over the C++ map and add each entry to the Java map"); + logger::println( + logger::INFO, + "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -217,8 +219,9 @@ static jobject doRFRegressorOneAPICompute( *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - logger::println(logger::INFO, "doRFRegressorOneAPICompute get_column_count = %d", - hFeaturetable.get_column_count()); + logger::println(logger::INFO, + "doRFRegressorOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); const auto df_desc = df::descriptor{} .set_tree_count(treeCount) @@ -238,11 +241,11 @@ static jobject doRFRegressorOneAPICompute( jobject trees = nullptr; if (isRoot) { logger::println(logger::INFO, "Variable importance results:"); - logger::print(logger::INFO, result_train.get_var_importance()); + logger::print(logger::INFO, result_train.get_var_importance()); logger::println(logger::INFO, "OOB error:"); - logger::print(logger::INFO, result_train.get_oob_err()); + logger::print(logger::INFO, result_train.get_oob_err()); logger::println(logger::INFO, "Prediction results:"); - logger::print(logger::INFO, result_infer.get_responses()); + logger::print(logger::INFO, result_infer.get_responses()); // convert c++ map to java hashmap jint statsSize = 3; // spark create VarianceCalculator needs array of @@ -291,8 +294,9 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -300,8 +304,10 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -322,7 +328,9 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra return hashmapObj; } default: { - logger::println(logger::ERROR, "RandomForest (native): The compute device is not supported!"); + logger::println( + logger::ERROR, + "RandomForest (native): The compute device is not supported!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 307efdf1f..6959ea0be 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -71,7 +71,9 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - logger::println(logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); + logger::println( + logger::ERROR, + "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { @@ -79,11 +81,13 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, auto local_rank = getLocalRank(comm, size, rankId); auto gpus = get_gpus(); - logger::println(logger::INFO, "rank: %d size: %d local_rank: %d n_gpu: %d", - rankId, size, local_rank, n_gpu); + logger::println(logger::INFO, + "rank: %d size: %d local_rank: %d n_gpu: %d", rankId, + size, local_rank, n_gpu); auto gpu_selected = gpu_indices[local_rank % n_gpu]; - logger::println(logger::INFO, "GPU selected for current rank: %d", gpu_selected); + logger::println(logger::INFO, "GPU selected for current rank: %d", + gpu_selected); // In case gpu_selected index is larger than number of GPU SYCL devices auto rank_gpu = gpus[gpu_selected % gpus.size()]; @@ -104,7 +108,9 @@ sycl::queue getQueue(const ComputeDevice device) { switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - logger::println(logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); + logger::println( + logger::ERROR, + "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index 16ac2551f..17088cce6 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -23,10 +23,10 @@ #include "oneapi/dal/algo/kmeans.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_clustering_KMeansDALImpl.h" #include "service.h" -#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -209,17 +209,20 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "KMeans (native): iteration %d took %d secs", - it, duration / 1000); + logger::println(logger::INFO, + "KMeans (native): iteration %d took %d secs", it, + duration / 1000); } if (rankId == ccl_root) { if (it == iteration_num) - logger::println(logger::INFO, "KMeans (native): reached %d max iterations.", - iteration_num); + logger::println(logger::INFO, + "KMeans (native): reached %d max iterations.", + iteration_num); else - logger::println(logger::INFO, "KMeans (native): converged in %d iterations.", - iteration_num); + logger::println(logger::INFO, + "KMeans (native): converged in %d iterations.", + iteration_num); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -261,15 +264,17 @@ static jlong doKMeansOneAPICompute( kmeans_gpu::train_result result_train = preview::train(comm, kmeans_desc, local_input); if (isRoot) { - logger::println(logger::INFO, "Iteration count: %d", result_train.get_iteration_count()); - logger::println(logger::INFO, "Centroids:"); - logger::print(logger::INFO, result_train.get_model().get_centroids()); + logger::println(logger::INFO, "Iteration count: %d", + result_train.get_iteration_count()); + logger::println(logger::INFO, "Centroids:"); + logger::print(logger::INFO, result_train.get_model().get_centroids()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "KMeans (native): training step took %d secs", - duration / 1000); + logger::println(logger::INFO, + "KMeans (native): training step took %d secs", + duration / 1000); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); // Get Field references @@ -304,8 +309,9 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); jlong ret = 0L; ccl::communicator &cclComm = getComm(); @@ -321,8 +327,9 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); ret = doKMeansDaalCompute(env, obj, rankId, cclComm, pData, centroids, clusterNum, tolerance, iterationNum, executorNum, resultObj); @@ -331,8 +338,10 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 90ace2fb0..14940e8f2 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -26,10 +26,10 @@ #include "oneapi/dal/algo/linear_regression.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_regression_LinearRegressionDALImpl.h" #include "service.h" -#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -221,7 +221,8 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { - logger::print(logger::INFO, "oneDAL (native): GPU compute start , rankid %d\n", rankId); + logger::print(logger::INFO, + "oneDAL (native): GPU compute start , rankid %d\n", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); @@ -262,8 +263,9 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); @@ -278,8 +280,10 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra if (useGPU) { #ifdef CPU_GPU_PROFILE int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); int size = cclComm.size(); @@ -302,8 +306,9 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); if (regParam == 0) { resultTable = linear_regression_compute( rankId, cclComm, pData, pLabel, fitIntercept, executorNum); diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 168b4394c..a4850a850 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -3,35 +3,36 @@ #include "Logger.h" -namespace logger{ +namespace logger { std::tuple get_prefix(MessageType message_type) { std::string prefix; bool enable{true}; switch (message_type) { - case NONE: - break; - case INFO: - prefix = "[INFO ]"; - break; - case WARN: - prefix = "[WARNING]"; - break; - case ERROR: - prefix = "[ERROR ]"; - break; - case DEBUG: - prefix = "[DEBUG ]"; - break; - case ASSERT: - prefix = "[ASSERT ]"; - break; - default: - break; + case NONE: + break; + case INFO: + prefix = "[INFO ]"; + break; + case WARN: + prefix = "[WARNING]"; + break; + case ERROR: + prefix = "[ERROR ]"; + break; + case DEBUG: + prefix = "[DEBUG ]"; + break; + case ASSERT: + prefix = "[ASSERT ]"; + break; + default: + break; } return {prefix + " ", enable}; } -int print2streamFromArgs(MessageType message_type, FILE *stream, const char *format, va_list args) { +int print2streamFromArgs(MessageType message_type, FILE *stream, + const char *format, va_list args) { // print prefix auto [prefix, enable] = get_prefix(message_type); if (!enable) @@ -44,7 +45,8 @@ int print2streamFromArgs(MessageType message_type, FILE *stream, const char *for return ret; } -int print2stream(MessageType message_type, FILE *stream, const char *format, ...) { +int print2stream(MessageType message_type, FILE *stream, const char *format, + ...) { va_list args; va_start(args, format); int ret = print2streamFromArgs(message_type, stream, format, args); @@ -93,29 +95,30 @@ int print(MessageType message_type, const oneapi::dal::table &table) { for (std::int64_t i = 0; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(output, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - fprintf(output, "\n"); + fprintf(output, "\n"); } - } - else { + } else { for (std::int64_t i = 0; i < 5; i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(stdout, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - fprintf(output, "\n"); + fprintf(output, "\n"); } fprintf(output, "%s", prefix.c_str()); - fprintf(output, "...%ld lines skipped...\n", (table.get_row_count() - 10)); - for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { + fprintf(output, "...%ld lines skipped...\n", + (table.get_row_count() - 10)); + for (std::int64_t i = table.get_row_count() - 5; + i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { fprintf(stdout, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); + fprintf(output, "%10f", x[i * table.get_column_count() + j]); } - fprintf(output, "\n"); + fprintf(output, "\n"); } } return 0; } -}; +}; // namespace logger diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 831c179b5..4da5ec672 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -3,10 +3,10 @@ #include #include -#include "oneapi/dal/table/row_accessor.hpp" #include "oneapi/dal/table/common.hpp" +#include "oneapi/dal/table/row_accessor.hpp" -namespace logger{ +namespace logger { // message type for print functions enum MessageType { NONE = 0, @@ -22,4 +22,4 @@ int print(MessageType message_type, const char *format, ...); int print(MessageType message_type, const oneapi::dal::table &table); int println(MessageType message_type, const char *format, ...); int println(MessageType message_type, const std::string &msg); -}; +}; // namespace logger diff --git a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp index 85c95c4cd..38a60241e 100644 --- a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp +++ b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp @@ -1,9 +1,9 @@ #include +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_classification_NaiveBayesDALImpl.h" #include "service.h" -#include "Logger.h" #define PROFILE 1 @@ -138,18 +138,22 @@ Java_com_intel_oap_mllib_classification_NaiveBayesDALImpl_cNaiveBayesDALCompute( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); auto t1 = std::chrono::high_resolution_clock::now(); // Support both dense and csr numeric table training::ResultPtr trainingResult; if (featuresTab->getDataLayout() == NumericTable::StorageLayout::csrArray) { - logger::println(logger::INFO, "oneDAL (native): training model with fastCSR method"); + logger::println(logger::INFO, + "oneDAL (native): training model with fastCSR method"); trainingResult = trainModel(comm, featuresTab, labelsTab, class_num); } else { - logger::println(logger::INFO, "oneDAL (native): training model with defaultDense method"); + logger::println( + logger::INFO, + "oneDAL (native): training model with defaultDense method"); trainingResult = trainModel( comm, featuresTab, labelsTab, class_num); } diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index 37f37e4e7..ee39509f0 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -17,9 +17,9 @@ #include #include +#include "Logger.h" #include "com_intel_oap_mllib_OneDAL__.h" #include "service.h" -#include "Logger.h" using namespace daal; using namespace daal::data_management; @@ -59,7 +59,8 @@ JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetDoubleBatch( ((SerializationIfacePtr *)numTableAddr)->get()); jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); if (values == NULL) { - logger::println(logger::ERROR, "Error: unable to obtain critical array"); + logger::println(logger::ERROR, + "Error: unable to obtain critical array"); exit(-1); } std::memcpy((*nt)[curRows], values, numRows * numCols * sizeof(double)); diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index ee4626c86..76b125899 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -23,10 +23,10 @@ #include "oneapi/dal/algo/pca.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_feature_PCADALImpl.h" #include "service.h" -#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -60,7 +60,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::println(logger::INFO, "PCA (native): Covariance local step took %d secs", duration / 1000); + logger::println(logger::INFO, + "PCA (native): Covariance local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -85,7 +87,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - logger::println(logger::INFO, "PCA (native): Covariance gather to master took %d secs", duration / 1000); + logger::println(logger::INFO, + "PCA (native): Covariance gather to master took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -124,7 +128,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "PCA (native): Covariance master step took %d secs", duration / 1000); + logger::println(logger::INFO, + "PCA (native): Covariance master step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -144,7 +150,8 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "PCA (native): master step took %d secs", duration / 1000); + logger::println(logger::INFO, "PCA (native): master step took %d secs", + duration / 1000); /* Print the results */ pca_cpu::ResultPtr result = algorithm.getResult(); @@ -193,7 +200,8 @@ static void doPCAOneAPICompute( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::println(logger::INFO, "PCA (native): Covariance step took %d secs", duration / 1000); + logger::println(logger::INFO, "PCA (native): Covariance step took %d secs", + duration / 1000); if (isRoot) { using float_t = double; using method_t = pca_gpu::method::precomputed; @@ -208,7 +216,8 @@ static void doPCAOneAPICompute( duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "PCA (native): Eigen step took %d secs", duration / 1000); + logger::println(logger::INFO, "PCA (native): Eigen step took %d secs", + duration / 1000); // Return all eigenvalues & eigenvectors // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -218,9 +227,9 @@ static void doPCAOneAPICompute( jfieldID explainedVarianceNumericTableField = env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); logger::println(logger::INFO, "Eigenvectors:"); - logger::print(logger::INFO, result_train.get_eigenvectors()); + logger::print(logger::INFO, result_train.get_eigenvectors()); logger::println(logger::INFO, "Eigenvalues:"); - logger::print(logger::INFO, result_train.get_eigenvalues()); + logger::print(logger::INFO, result_train.get_eigenvalues()); HomogenTablePtr eigenvectors = std::make_shared(result_train.get_eigenvectors()); @@ -243,8 +252,9 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); @@ -258,8 +268,9 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doPCADAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -267,8 +278,10 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 6a08d62b7..378b13b00 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -21,10 +21,10 @@ #include "oneapi/dal/algo/basic_statistics.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_stat_SummarizerDALImpl.h" #include "service.h" -#include "Logger.h" using namespace std; #ifdef CPU_GPU_PROFILE @@ -57,7 +57,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::println(logger::INFO, "low_order_moments (native): local step took %d secs", duration / 1000); + logger::println(logger::INFO, + "low_order_moments (native): local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -83,7 +85,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - logger::println(logger::INFO, "low_order_moments (native): ccl_gather took %d secs", duration / 1000); + logger::println(logger::INFO, + "low_order_moments (native): ccl_gather took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -121,7 +125,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "low_order_moments (native): master step took %d secs", duration / 1000); + logger::println(logger::INFO, + "low_order_moments (native): master step took %d secs", + duration / 1000); /* Print the results */ printNumericTable(result->get(low_order_moments::mean), @@ -209,19 +215,21 @@ static void doSummarizerOneAPICompute( const auto result_train = preview::compute(comm, bs_desc, htable); if (isRoot) { logger::println(logger::INFO, "Minimum"); - logger::print(logger::INFO, result_train.get_min()); + logger::print(logger::INFO, result_train.get_min()); logger::println(logger::INFO, "Maximum"); - logger::print(logger::INFO, result_train.get_max()); + logger::print(logger::INFO, result_train.get_max()); logger::println(logger::INFO, "Mean"); - logger::print(logger::INFO, result_train.get_mean()); + logger::print(logger::INFO, result_train.get_mean()); logger::println(logger::INFO, "Variation"); - logger::print(logger::INFO, result_train.get_variance()); + logger::print(logger::INFO, result_train.get_variance()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = (float)std::chrono::duration_cast(t2 - t1) .count(); - logger::println(logger::INFO, "Summarizer (native): computing step took %d secs", duration / 1000); + logger::println(logger::INFO, + "Summarizer (native): computing step took %d secs", + duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -261,8 +269,9 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -276,8 +285,9 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::println(logger::INFO, "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doSummarizerDAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -285,8 +295,10 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::println(logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", - nGpu, rankId); + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index dc238259e..9e12e1c21 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -54,6 +54,7 @@ void sparceFileReadError() { } void deviceError() { - logger::println(logger::ERROR, "Error: no supported device, please select HOST/CPU/GPU"); + logger::println(logger::ERROR, + "Error: no supported device, please select HOST/CPU/GPU"); exit(-1); } From 8076a487ca1926825844ca1abd0a3ab927534492 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 6 Jul 2023 17:38:38 -0700 Subject: [PATCH 21/48] All done --- mllib-dal/src/main/native/Logger.cpp | 28 ++++++++++++++++++ mllib-dal/src/main/native/Logger.h | 5 ++++ mllib-dal/src/main/native/OneCCL.cpp | 20 ++++++------- mllib-dal/src/main/native/service.cpp | 42 +++++++++++---------------- 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index a4850a850..433fabc77 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -83,6 +83,34 @@ int println(MessageType message_type, const char *format, ...) { return ret; } +int printerr(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stdout, msg.c_str()); + return ret; +} + +int printerr(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stderr, format, args); + va_end(args); + return ret; +} + +int printerrln(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stderr, msg.c_str()); + fprintf(stderr, "\n"); + return ret; +} + +int printerrln(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); + return ret; +} + int print(MessageType message_type, const oneapi::dal::table &table) { auto [prefix, enable] = get_prefix(message_type); if (!enable) diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 4da5ec672..2a6172918 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -22,4 +22,9 @@ int print(MessageType message_type, const char *format, ...); int print(MessageType message_type, const oneapi::dal::table &table); int println(MessageType message_type, const char *format, ...); int println(MessageType message_type, const std::string &msg); + +int printerr(MessageType message_type, const std::string &msg); +int printerr(MessageType message_type, const char *format, ...); +int printerrln(MessageType message_type, const char *format, ...); +int printerrln(MessageType message_type, const std::string &msg); }; // namespace logger diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index 38557d615..e119e8216 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -30,6 +30,7 @@ #include "OneCCL.h" #include "com_intel_oap_mllib_OneCCL__.h" +#include "Logger.h" extern const size_t ccl_root = 0; @@ -47,7 +48,8 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jobject param) { - std::cerr << "OneCCL (native): init" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init"); + auto t1 = std::chrono::high_resolution_clock::now(); @@ -68,8 +70,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cerr << "OneCCL (native): init took " << duration << " secs" - << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init took %d secs", duration); rank_id = getComm().rank(); comm_size = getComm().size(); @@ -92,7 +93,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( */ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject) { - std::cerr << "OneCCL (native): init dpcpp" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init dpcpp"); ccl::init(); return 1; @@ -100,7 +101,7 @@ Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject) { JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1cleanup(JNIEnv *env, jobject obj) { - std::cerr << "OneCCL (native): cleanup" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): cleanup"); g_kvs.pop_back(); g_comms.pop_back(); } @@ -135,7 +136,7 @@ static int fill_local_host_ip() { int family = AF_UNSPEC; char local_ip[CCL_IP_LEN]; if (getifaddrs(&ifaddr) < 0) { - std::cerr << "OneCCL (native): can not get host IP" << std::endl; + logger::printerrln(logger::ERROR, "OneCCL (native): can not get host IP"); return -1; } @@ -157,7 +158,7 @@ static int fill_local_host_ip() { if (res != 0) { std::string s("OneCCL (native): getnameinfo error > "); s.append(gai_strerror(res)); - std::cerr << s << std::endl; + logger::printerrln(logger::ERROR, s); return -1; } local_host_ips.push_back(local_ip); @@ -165,8 +166,7 @@ static int fill_local_host_ip() { } } if (local_host_ips.empty()) { - std::cerr << "OneCCL (native): can't find interface to get host IP" - << std::endl; + logger::printerrln(logger::ERROR, "OneCCL (native): can't find interface to get host IP"); return -1; } @@ -177,7 +177,7 @@ static int fill_local_host_ip() { static bool is_valid_ip(char ip[]) { if (fill_local_host_ip() == -1) { - std::cerr << "OneCCL (native): get local host ip error" << std::endl; + logger::printerrln(logger::ERROR, "OneCCL (native): get local host ip error"); return false; }; diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 66bd1d06b..9de698d9a 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,5 +1,6 @@ #include "service.h" #include "error_handling.h" +#include "Logger.h" using namespace daal; using namespace daal::data_management; @@ -36,18 +37,15 @@ template void printArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j < nPrintedCols; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[i * nCols + j]; + logger::print(logger::NONE, "%*.3f", interval, array[i * nCols + j]); } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } template @@ -59,44 +57,38 @@ void printArray(T *array, const size_t nCols, const size_t nRows, template void printLowerArray(T *array, const size_t nPrintedRows, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); int ind = 0; for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j <= i; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[ind++]; + logger::print(logger::NONE, "%*.3f", interval, array[ind++]); } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } template void printUpperArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); int ind = 0; for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j < i; j++) { - std::cout << " "; + logger::print(logger::NONE, " "); } for (size_t j = i; j < nPrintedCols; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[ind++]; + logger::print(logger::NONE, "%*.3f", interval, array[ind++]); } for (size_t j = nPrintedCols; j < nCols; j++) { ind++; } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } void printNumericTable(NumericTable *dataTable, const char *message = "", From 2934766be2317eed2d0a460da8064b372a80371b Mon Sep 17 00:00:00 2001 From: kunpeng Date: Fri, 7 Jul 2023 08:14:52 +0000 Subject: [PATCH 22/48] Format --- mllib-dal/src/main/native/OneCCL.cpp | 16 ++++++++++------ mllib-dal/src/main/native/service.cpp | 5 +++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index e119e8216..c50099f9d 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -28,9 +28,9 @@ #include +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_OneCCL__.h" -#include "Logger.h" extern const size_t ccl_root = 0; @@ -49,7 +49,6 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( jobject param) { logger::printerrln(logger::INFO, "OneCCL (native): init"); - auto t1 = std::chrono::high_resolution_clock::now(); @@ -70,7 +69,8 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::printerrln(logger::INFO, "OneCCL (native): init took %d secs", duration); + logger::printerrln(logger::INFO, "OneCCL (native): init took %d secs", + duration); rank_id = getComm().rank(); comm_size = getComm().size(); @@ -136,7 +136,8 @@ static int fill_local_host_ip() { int family = AF_UNSPEC; char local_ip[CCL_IP_LEN]; if (getifaddrs(&ifaddr) < 0) { - logger::printerrln(logger::ERROR, "OneCCL (native): can not get host IP"); + logger::printerrln(logger::ERROR, + "OneCCL (native): can not get host IP"); return -1; } @@ -166,7 +167,9 @@ static int fill_local_host_ip() { } } if (local_host_ips.empty()) { - logger::printerrln(logger::ERROR, "OneCCL (native): can't find interface to get host IP"); + logger::printerrln( + logger::ERROR, + "OneCCL (native): can't find interface to get host IP"); return -1; } @@ -177,7 +180,8 @@ static int fill_local_host_ip() { static bool is_valid_ip(char ip[]) { if (fill_local_host_ip() == -1) { - logger::printerrln(logger::ERROR, "OneCCL (native): get local host ip error"); + logger::printerrln(logger::ERROR, + "OneCCL (native): get local host ip error"); return false; }; diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 9de698d9a..32bd4dcd1 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,6 +1,6 @@ #include "service.h" -#include "error_handling.h" #include "Logger.h" +#include "error_handling.h" using namespace daal; using namespace daal::data_management; @@ -41,7 +41,8 @@ void printArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, for (size_t i = 0; i < nPrintedRows; i++) { logger::print(logger::INFO, ""); for (size_t j = 0; j < nPrintedCols; j++) { - logger::print(logger::NONE, "%*.3f", interval, array[i * nCols + j]); + logger::print(logger::NONE, "%*.3f", interval, + array[i * nCols + j]); } logger::println(logger::NONE, ""); } From 696db11f9e216b18f9ad809ff26d992139842bc9 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 20 Jul 2023 15:54:01 -0700 Subject: [PATCH 23/48] Replace print() --- mllib-dal/src/main/native/ALSDALImpl.cpp | 40 +++++++++---------- mllib-dal/src/main/native/ALSShuffle.cpp | 4 +- mllib-dal/src/main/native/CorrelationImpl.cpp | 32 +++++++-------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 8f5164504..2bb0ce841 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -214,7 +214,7 @@ void initializeStep2Local( void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nUsers, size_t nFactors) { - logger::print(logger::INFO, "ALS (native): initializeModel \n"); + logger::println(logger::INFO, "ALS (native): initializeModel "); auto t1 = std::chrono::high_resolution_clock::now(); @@ -233,7 +233,7 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "ALS (native): initializeModel took %d secs\n", + logger::println(logger::INFO, "ALS (native): initializeModel took %d secs", duration); } @@ -316,7 +316,7 @@ computeStep4Local(const CSRNumericTablePtr &dataTable, void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nFactors, size_t maxIterations) { - logger::print(logger::INFO, "ALS (native): trainModel\n"); + logger::println(logger::INFO, "ALS (native): trainModel"); auto tStart = std::chrono::high_resolution_clock::now(); @@ -425,14 +425,14 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, "ALS (native): iteration %d took %f secs\n", + logger::println(logger::INFO, "ALS (native): iteration %d took %f secs", iteration, duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); - logger::print(logger::INFO, "ALS (native): trainModel took %d secs\n", + logger::println(logger::INFO, "ALS (native): trainModel took %d secs", durationTotal); } @@ -451,7 +451,7 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { // logger::println(logger::DEBUG, "cShuffleData: rank %d", rankId); - logger::print(logger::INFO, "RATING_SIZE: %d\n", RATING_SIZE); + logger::println(logger::INFO, "RATING_SIZE: %d", RATING_SIZE); ccl::communicator &comm = getComm(); @@ -495,22 +495,22 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( dataTable = *((CSRNumericTablePtr *)numTableAddr); - logger::print(logger::INFO, "ALS (native): Input info: "); - logger::print(logger::INFO, "- NumberOfRows: %d\n", + logger::println(logger::INFO, "ALS (native): Input info:"); + logger::println(logger::INFO, "- NumberOfRows: %d", dataTable->getNumberOfRows()); - logger::print(logger::INFO, "- NumberOfColumns: %d\n", + logger::println(logger::INFO, "- NumberOfColumns: %d", dataTable->getNumberOfColumns()); - logger::print(logger::INFO, "- NumberOfRatings: %d\n", + logger::println(logger::INFO, "- NumberOfRatings: %d", dataTable->getDataSize()); - logger::print(logger::INFO, "- fullNUsers: %d\n", nUsers); - logger::print(logger::INFO, "- nFactors: %d\n", nFactors); + logger::println(logger::INFO, "- fullNUsers: %d", nUsers); + logger::println(logger::INFO, "- nFactors: %d", nFactors); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::print(logger::INFO, - "oneDAL (native): Number of CPU threads used: %d\n", + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used: %d", nThreadsNew); int nBlocks = executor_num; @@ -522,18 +522,18 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1) ->getFactors(); - logger::print(logger::INFO, "\n"); - logger::print(logger::INFO, "=== Results for Rank %d ===\n", rankId); - logger::print(logger::INFO, "\n"); + logger::println(logger::INFO, ""); + logger::println(logger::INFO, "=== Results for Rank %d ===", rankId); + logger::println(logger::INFO, ""); printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - logger::print(logger::INFO, "User Offset: %d\n", + logger::println(logger::INFO, "User Offset: %d", getOffsetFromOffsetTable(userOffset)); - logger::print(logger::INFO, "Item Offset: %d\n", + logger::println(logger::INFO, "Item Offset: %d", getOffsetFromOffsetTable(itemOffset)); - logger::print(logger::INFO, "\n"); + logger::println(logger::INFO, ""); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index a6924a296..4a3ce5e56 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -78,7 +78,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // rankId, i, perNodeSendLens[i]); sendBufSize += perNodeSendLens[i]; } - logger::print(logger::INFO, "sendData size %d\n", sendBufSize); + logger::println(logger::INFO, "sendData size %d", sendBufSize); sendData.resize(sendBufSize); // Fill in send buffer @@ -117,7 +117,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, // std::distance(recvData.begin(), iter); newCsrRowNum = distinct_count(recvData); - logger::print(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d\n", + logger::println(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d", newRatingsNum, newCsrRowNum); return recvData.data(); diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index d21f8f165..7d86bca44 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -57,8 +57,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, - "Correleation (native): local step took %d secs\n", + logger::println(logger::INFO, + "Correleation (native): local step took %d secs", duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -85,8 +85,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - logger::print(logger::INFO, - "Correleation (native): ccl_allgatherv took %d secs\n", + logger::println(logger::INFO, + "Correleation (native): ccl_allgatherv took %d secs", duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); @@ -125,8 +125,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::print(logger::INFO, - "Correleation (native): master step took %d secs\n", + logger::println(logger::INFO, + "Correleation (native): master step took %d secs", duration / 1000); /* Print the results */ @@ -154,7 +154,7 @@ static void doCorrelationOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - logger::print(logger::INFO, "oneDAL (native): GPU compute start\n"); + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -165,17 +165,17 @@ static void doCorrelationOneAPICompute( auto t1 = std::chrono::high_resolution_clock::now(); const auto result_train = preview::compute(comm, cor_desc, htable); if (isRoot) { - logger::print(logger::INFO, "Mean:\n"); + logger::println(logger::INFO, "Mean:"); logger::print(logger::INFO, result_train.get_means()); - logger::print(logger::INFO, "Correlation:\n"); + logger::println(logger::INFO, "Correlation:"); logger::print(logger::INFO, result_train.get_cor_matrix()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) .count(); - logger::print( + logger::println( logger::INFO, - "Correlation batch(native): computing step took %d secs.\n", + "Correlation batch(native): computing step took %d secs.", duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -199,8 +199,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - logger::print(logger::INFO, - "oneDAL (native): use DPC++ kernels; device %s\n", + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); @@ -225,9 +225,9 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - logger::print( + logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d\n", nGpu, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -247,7 +247,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #endif default: { - logger::print(logger::ERROR, "no supported device!\n"); + logger::println(logger::ERROR, "no supported device!"); exit(-1); } } From d270f98bb398d83a8a261f399cf98d43411bd7c5 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 20 Jul 2023 16:18:10 -0700 Subject: [PATCH 24/48] Move print(table to service --- mllib-dal/src/main/native/CorrelationImpl.cpp | 8 ++-- .../native/DecisionForestClassifierImpl.cpp | 8 ++-- .../native/DecisionForestRegressorImpl.cpp | 6 +-- mllib-dal/src/main/native/KMeansImpl.cpp | 2 +- .../src/main/native/LinearRegressionImpl.cpp | 4 +- mllib-dal/src/main/native/Logger.cpp | 38 ------------------- mllib-dal/src/main/native/Logger.h | 4 -- mllib-dal/src/main/native/Makefile | 2 +- mllib-dal/src/main/native/PCAImpl.cpp | 4 +- mllib-dal/src/main/native/SummarizerImpl.cpp | 8 ++-- mllib-dal/src/main/native/service.cpp | 29 ++++++++++++++ mllib-dal/src/main/native/service.h | 1 + 12 files changed, 51 insertions(+), 63 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 7d86bca44..f099b10b6 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -166,9 +166,9 @@ static void doCorrelationOneAPICompute( const auto result_train = preview::compute(comm, cor_desc, htable); if (isRoot) { logger::println(logger::INFO, "Mean:"); - logger::print(logger::INFO, result_train.get_means()); + printHomegenTable(result_train.get_means()); logger::println(logger::INFO, "Correlation:"); - logger::print(logger::INFO, result_train.get_cor_matrix()); + printHomegenTable(result_train.get_cor_matrix()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) @@ -215,8 +215,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - logger::print(logger::INFO, - "oneDAL (native): Number of CPU threads used %d\n", + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", nThreadsNew); doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index 0bc427dbd..cb9a2764e 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -253,13 +253,13 @@ static jobject doRFClassifierOneAPICompute( jobject trees = nullptr; if (isRoot) { logger::println(logger::INFO, "Variable importance results:"); - logger::print(logger::INFO, result_train.get_var_importance()); + printHomegenTable(result_train.get_var_importance()); logger::println(logger::INFO, "OOB error:"); - logger::print(logger::INFO, result_train.get_oob_err()); + printHomegenTable(result_train.get_oob_err()); logger::println(logger::INFO, "Prediction results:"); - logger::print(logger::INFO, result_infer.get_responses()); + printHomegenTable(result_infer.get_responses()); logger::println(logger::INFO, "Probabilities results:\n"); - logger::print(logger::INFO, result_infer.get_probabilities()); + printHomegenTable(result_infer.get_probabilities()); // convert to java hashmap trees = collect_model(env, result_train.get_model(), classCount); diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 87c5e3fad..a18f54ad2 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -241,11 +241,11 @@ static jobject doRFRegressorOneAPICompute( jobject trees = nullptr; if (isRoot) { logger::println(logger::INFO, "Variable importance results:"); - logger::print(logger::INFO, result_train.get_var_importance()); + printHomegenTable(result_train.get_var_importance()); logger::println(logger::INFO, "OOB error:"); - logger::print(logger::INFO, result_train.get_oob_err()); + printHomegenTable(result_train.get_oob_err()); logger::println(logger::INFO, "Prediction results:"); - logger::print(logger::INFO, result_infer.get_responses()); + printHomegenTable(result_infer.get_responses()); // convert c++ map to java hashmap jint statsSize = 3; // spark create VarianceCalculator needs array of diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index 17088cce6..790e58a32 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -267,7 +267,7 @@ static jlong doKMeansOneAPICompute( logger::println(logger::INFO, "Iteration count: %d", result_train.get_iteration_count()); logger::println(logger::INFO, "Centroids:"); - logger::print(logger::INFO, result_train.get_model().get_centroids()); + printHomegenTable(result_train.get_model().get_centroids()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 14940e8f2..4b2da372a 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -221,8 +221,8 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { - logger::print(logger::INFO, - "oneDAL (native): GPU compute start , rankid %d\n", rankId); + logger::println(logger::INFO, + "oneDAL (native): GPU compute start , rankid %d", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 433fabc77..000e9e9bf 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -111,42 +111,4 @@ int printerrln(MessageType message_type, const char *format, ...) { return ret; } -int print(MessageType message_type, const oneapi::dal::table &table) { - auto [prefix, enable] = get_prefix(message_type); - if (!enable) - return 0; - FILE *output = stdout; - - auto arr = oneapi::dal::row_accessor(table).pull(); - const auto x = arr.get_data(); - if (table.get_row_count() <= 10) { - for (std::int64_t i = 0; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - fprintf(output, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); - } - fprintf(output, "\n"); - } - } else { - for (std::int64_t i = 0; i < 5; i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - fprintf(stdout, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); - } - fprintf(output, "\n"); - } - fprintf(output, "%s", prefix.c_str()); - fprintf(output, "...%ld lines skipped...\n", - (table.get_row_count() - 10)); - for (std::int64_t i = table.get_row_count() - 5; - i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - fprintf(stdout, "%s", prefix.c_str()); - fprintf(output, "%10f", x[i * table.get_column_count() + j]); - } - fprintf(output, "\n"); - } - } - return 0; -} }; // namespace logger diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 2a6172918..3d77a2f4d 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -3,9 +3,6 @@ #include #include -#include "oneapi/dal/table/common.hpp" -#include "oneapi/dal/table/row_accessor.hpp" - namespace logger { // message type for print functions enum MessageType { @@ -19,7 +16,6 @@ enum MessageType { int print(MessageType message_type, const std::string &msg); int print(MessageType message_type, const char *format, ...); -int print(MessageType message_type, const oneapi::dal::table &table); int println(MessageType message_type, const char *format, ...); int println(MessageType message_type, const std::string &msg); diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index 5589f5bec..40869ca4d 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -99,7 +99,7 @@ CPP_SRCS += \ ./oneapi/dal/RowAccessorImpl.cpp OBJS += \ - ./Logger.o\ + ./Logger.o \ ./OneCCL.o ./OneDAL.o ./service.o ./error_handling.o \ ./KMeansImpl.o \ ./PCAImpl.o \ diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 76b125899..8f3337dcc 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -227,9 +227,9 @@ static void doPCAOneAPICompute( jfieldID explainedVarianceNumericTableField = env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); logger::println(logger::INFO, "Eigenvectors:"); - logger::print(logger::INFO, result_train.get_eigenvectors()); + printHomegenTable(result_train.get_eigenvectors()); logger::println(logger::INFO, "Eigenvalues:"); - logger::print(logger::INFO, result_train.get_eigenvalues()); + printHomegenTable(result_train.get_eigenvalues()); HomogenTablePtr eigenvectors = std::make_shared(result_train.get_eigenvectors()); diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 378b13b00..794c7e473 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -215,13 +215,13 @@ static void doSummarizerOneAPICompute( const auto result_train = preview::compute(comm, bs_desc, htable); if (isRoot) { logger::println(logger::INFO, "Minimum"); - logger::print(logger::INFO, result_train.get_min()); + printHomegenTable(result_train.get_min()); logger::println(logger::INFO, "Maximum"); - logger::print(logger::INFO, result_train.get_max()); + printHomegenTable(result_train.get_max()); logger::println(logger::INFO, "Mean"); - logger::print(logger::INFO, result_train.get_mean()); + printHomegenTable(result_train.get_mean()); logger::println(logger::INFO, "Variation"); - logger::print(logger::INFO, result_train.get_variance()); + printHomegenTable(result_train.get_variance()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = (float)std::chrono::duration_cast(t2 - diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 32bd4dcd1..37e372fe8 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -235,4 +235,33 @@ NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen) { return ntSycl; } + +void printHomegenTable(const oneapi::dal::table &table) { + auto arr = oneapi::dal::row_accessor(table).pull(); + const auto x = arr.get_data(); + if (table.get_row_count() <= 10) { + for (std::int64_t i = 0; i < table.get_row_count(); i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + } + logger::println(""); + } + } else { + for (std::int64_t i = 0; i < 5; i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + } + logger::println(""); + } + logger::println(logger::INFO, "...%ld lines skipped...", (table.get_row_count() - 10)); + for (std::int64_t i = table.get_row_count() - 5; + i < table.get_row_count(); i++) { + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + } + logger::println(""); + } + } + return 0; +} #endif diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 288f35243..f084422f3 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -68,4 +68,5 @@ void saveCSRTablePtrToVector(const CSRTablePtr &ptr); #ifdef CPU_GPU_PROFILE NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen); +void printHomegenTable(const oneapi::dal::table &table); #endif From a0b88d8bc445e033aa1a4531b5a54c62ca455b52 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Fri, 21 Jul 2023 06:56:00 +0000 Subject: [PATCH 25/48] Format --- mllib-dal/src/main/native/ALSDALImpl.cpp | 20 +++++++++---------- mllib-dal/src/main/native/ALSShuffle.cpp | 2 +- mllib-dal/src/main/native/CorrelationImpl.cpp | 20 +++++++++---------- .../src/main/native/LinearRegressionImpl.cpp | 2 +- mllib-dal/src/main/native/service.cpp | 18 ++++++++++------- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 2bb0ce841..1c3c2f654 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -234,7 +234,7 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto duration = std::chrono::duration_cast(t2 - t1).count(); logger::println(logger::INFO, "ALS (native): initializeModel took %d secs", - duration); + duration); } training::DistributedPartialResultStep1Ptr computeStep1Local( @@ -426,14 +426,14 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto duration = std::chrono::duration_cast(t2 - t1).count(); logger::println(logger::INFO, "ALS (native): iteration %d took %f secs", - iteration, duration); + iteration, duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); logger::println(logger::INFO, "ALS (native): trainModel took %d secs", - durationTotal); + durationTotal); } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { @@ -497,11 +497,11 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( logger::println(logger::INFO, "ALS (native): Input info:"); logger::println(logger::INFO, "- NumberOfRows: %d", - dataTable->getNumberOfRows()); + dataTable->getNumberOfRows()); logger::println(logger::INFO, "- NumberOfColumns: %d", - dataTable->getNumberOfColumns()); + dataTable->getNumberOfColumns()); logger::println(logger::INFO, "- NumberOfRatings: %d", - dataTable->getDataSize()); + dataTable->getDataSize()); logger::println(logger::INFO, "- fullNUsers: %d", nUsers); logger::println(logger::INFO, "- nFactors: %d", nFactors); @@ -510,8 +510,8 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); logger::println(logger::INFO, - "oneDAL (native): Number of CPU threads used: %d", - nThreadsNew); + "oneDAL (native): Number of CPU threads used: %d", + nThreadsNew); int nBlocks = executor_num; initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); @@ -530,9 +530,9 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); logger::println(logger::INFO, "User Offset: %d", - getOffsetFromOffsetTable(userOffset)); + getOffsetFromOffsetTable(userOffset)); logger::println(logger::INFO, "Item Offset: %d", - getOffsetFromOffsetTable(itemOffset)); + getOffsetFromOffsetTable(itemOffset)); logger::println(logger::INFO, ""); // Get the class of the input object diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 4a3ce5e56..b07d27076 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -118,7 +118,7 @@ Rating *shuffle_all2all(ccl::communicator &comm, newCsrRowNum = distinct_count(recvData); logger::println(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d", - newRatingsNum, newCsrRowNum); + newRatingsNum, newCsrRowNum); return recvData.data(); } diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index f099b10b6..b21f98030 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -58,8 +58,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1).count(); logger::println(logger::INFO, - "Correleation (native): local step took %d secs", - duration / 1000); + "Correleation (native): local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -86,8 +86,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); logger::println(logger::INFO, - "Correleation (native): ccl_allgatherv took %d secs", - duration / 1000); + "Correleation (native): ccl_allgatherv took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -126,8 +126,8 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, std::chrono::duration_cast(t2 - t1) .count(); logger::println(logger::INFO, - "Correleation (native): master step took %d secs", - duration / 1000); + "Correleation (native): master step took %d secs", + duration / 1000); /* Print the results */ printNumericTable(result->get(covariance_cpu::correlation), @@ -200,8 +200,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, - "oneDAL (native): use DPC++ kernels; device %s", - ComputeDeviceString[computeDeviceOrdinal].c_str()); + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -216,8 +216,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); logger::println(logger::INFO, - "oneDAL (native): Number of CPU threads used %d", - nThreadsNew); + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 4b2da372a..b4de714f8 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -222,7 +222,7 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jboolean jfitIntercept, jint executorNum, jobject resultObj) { logger::println(logger::INFO, - "oneDAL (native): GPU compute start , rankid %d", rankId); + "oneDAL (native): GPU compute start , rankid %d", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 37e372fe8..9d86cbf69 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -242,24 +242,28 @@ void printHomegenTable(const oneapi::dal::table &table) { if (table.get_row_count() <= 10) { for (std::int64_t i = 0; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + logger::print(logger::INFO, "%10f", + x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(""); } } else { for (std::int64_t i = 0; i < 5; i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + logger::print(logger::INFO, "%10f", + x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(""); } - logger::println(logger::INFO, "...%ld lines skipped...", (table.get_row_count() - 10)); + logger::println(logger::INFO, "...%ld lines skipped...", + (table.get_row_count() - 10)); for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); + logger::print(logger::INFO, "%10f", + x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(""); } } return 0; From 540680d6c4ca55349abc14cc78b132c2bb5cb81d Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 20 Jul 2023 19:11:22 -0700 Subject: [PATCH 26/48] Change pringf --- .../native/oneapi/dal/HomogenTableImpl.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp index acfd5acb8..8b73d4586 100644 --- a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp @@ -71,7 +71,7 @@ static data_layout getDataLayout(jint cLayout) { template inline jlong MergeHomogenTable(homogen_table &targetTable, homogen_table &sourceTable, const jint computeDeviceOrdinal) { - printf("oneDal merge HomogenTable \n"); + logger::println(logger::INFO, "oneDal merge HomogenTable"); const T *targetData = targetTable.get_data(); const int targetDatasize = targetTable.get_column_count() * targetTable.get_row_count(); @@ -126,7 +126,7 @@ template JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jintArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable int init \n"); + logger::println(logger::INFO, "HomogenTable int init"); jint *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { logger::println(logger::ERROR, "Error: unable to obtain critical array"); @@ -170,7 +170,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jfloatArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable float init \n"); + logger::println(logger::INFO, "HomogenTable float init"); jfloat *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { logger::println(logger::ERROR, "Error: unable to obtain critical array"); @@ -213,7 +213,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jdoubleArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable double init \n"); + logger::println(logger::INFO, "HomogenTable double init"); jdouble *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { logger::println(logger::ERROR, "Error: unable to obtain critical array"); @@ -257,7 +257,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jlongArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable long init \n"); + logger::println(logger::INFO, "HomogenTable long init"); jlong *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { logger::println(logger::ERROR, "Error: unable to obtain critical array"); @@ -301,7 +301,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetColumnCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getcolumncount %ld \n", cTableAddr); + logger::println(logger::INFO, "HomogenTable getcolumncount %ld", cTableAddr); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_column_count(); } @@ -314,7 +314,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetColumnCount( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetRowCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getrowcount \n"); + logger::println(logger::INFO, "HomogenTable getrowcount"); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_row_count(); } @@ -327,7 +327,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetRowCount( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetKind(JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getkind \n"); + logger::println(logger::INFO, "HomogenTable getkind"); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_kind(); } @@ -340,7 +340,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetKind(JNIEnv *env, jobject, JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDataLayout( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getDataLayout \n"); + logger::println(logger::INFO, "HomogenTable getDataLayout"); homogen_table htable = *reinterpret_cast(cTableAddr); return (jint)htable.get_data_layout(); } @@ -353,7 +353,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDataLayout( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetMetaData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getMetaData \n"); + logger::println(logger::INFO, "HomogenTable getMetaData"); homogen_table htable = *reinterpret_cast(cTableAddr); const table_metadata *mdata = reinterpret_cast(&htable.get_metadata()); TableMetadataPtr metaPtr = std::make_shared(*mdata); @@ -370,7 +370,7 @@ JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetIntData(JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getIntData \n"); + logger::println(logger::INFO, "HomogenTable getIntData"); homogen_table htable = *reinterpret_cast(cTableAddr); const int *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -387,7 +387,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetIntData(JNIEnv *env, JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetFloatData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getFloatData \n"); + logger::println(logger::INFO, "HomogenTable getFloatData"); homogen_table htable = *reinterpret_cast(cTableAddr); const float *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -405,7 +405,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetFloatData( JNIEXPORT jlongArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetLongData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getLongData \n"); + logger::println(logger::INFO, "HomogenTable getLongData"); homogen_table htable = *reinterpret_cast(cTableAddr); const long *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -423,7 +423,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetLongData( JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDoubleData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getDoubleData \n"); + logger::println(logger::INFO, "HomogenTable getDoubleData"); homogen_table htable = *reinterpret_cast(cTableAddr); const double *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -439,7 +439,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDoubleData( */ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cEmptyTableInit (JNIEnv *env, jobject) { - printf(" init empty HomogenTable \n"); + logger::println(logger::INFO, " init empty HomogenTable"); HomogenTablePtr tablePtr = std::make_shared(); saveHomogenTablePtrToVector(tablePtr); return (jlong)tablePtr.get(); @@ -451,7 +451,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cEmptyT */ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHomogenTable (JNIEnv *env, jobject, jlong targetTablePtr, jlong sourceTablePtr, jint cComputeDevice){ - printf("oneDal addHomogenTable \n"); + logger::println(logger::INFO, "oneDal addHomogenTable"); homogen_table targetTable = *reinterpret_cast(targetTablePtr); homogen_table sourceTable = *reinterpret_cast(sourceTablePtr); const auto targetMetaData = targetTable.get_metadata(); From da72f392bc600a23550ad8f53cdd28702a6da5d1 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 9 Aug 2023 18:04:40 -0700 Subject: [PATCH 27/48] Remove spaces --- mllib-dal/src/main/native/Logger.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 000e9e9bf..eab798aa9 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -11,19 +11,19 @@ std::tuple get_prefix(MessageType message_type) { case NONE: break; case INFO: - prefix = "[INFO ]"; + prefix = "[INFO]"; break; case WARN: prefix = "[WARNING]"; break; case ERROR: - prefix = "[ERROR ]"; + prefix = "[ERROR]"; break; case DEBUG: - prefix = "[DEBUG ]"; + prefix = "[DEBUG]"; break; case ASSERT: - prefix = "[ASSERT ]"; + prefix = "[ASSERT]"; break; default: break; From 9c5fa2ba2785456135489fd3d6330dd9b157cef2 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 9 Aug 2023 18:24:02 -0700 Subject: [PATCH 28/48] Make all error print in stderr --- mllib-dal/src/main/native/CorrelationImpl.cpp | 3 +-- .../src/main/native/DecisionForestClassifierImpl.cpp | 5 +---- .../src/main/native/DecisionForestRegressorImpl.cpp | 5 +---- mllib-dal/src/main/native/GPU.cpp | 10 +++++----- mllib-dal/src/main/native/KMeansImpl.cpp | 3 +-- mllib-dal/src/main/native/OneDAL.cpp | 2 +- mllib-dal/src/main/native/PCAImpl.cpp | 3 +-- mllib-dal/src/main/native/SummarizerImpl.cpp | 3 +-- mllib-dal/src/main/native/error_handling.cpp | 12 ++++++------ 9 files changed, 18 insertions(+), 28 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index b21f98030..7cf285470 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -247,8 +247,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #endif default: { - logger::println(logger::ERROR, "no supported device!"); - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index cb9a2764e..07c97440f 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -340,10 +340,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif return hashmapObj; } default: { - logger::println( - logger::ERROR, - "RandomForest (native): The compute device is not supported!"); - exit(-1); + deviceError(); } } return nullptr; diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index a18f54ad2..351f2eede 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -328,10 +328,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra return hashmapObj; } default: { - logger::println( - logger::ERROR, - "RandomForest (native): The compute device is not supported!"); - exit(-1); + deviceError(); } } return nullptr; diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 6959ea0be..4d60f9d78 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -18,7 +18,7 @@ static std::vector get_gpus() { return devices; } } - logger::println(logger::ERROR, "No GPUs!"); + logger::printerrln(logger::ERROR, "No GPUs!"); exit(-1); return {}; @@ -71,7 +71,7 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - logger::println( + logger::printerrln( logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); @@ -96,7 +96,7 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, } default: { - logger::println(logger::ERROR, "No Device!"); + logger::printerrln(logger::ERROR, "No Device!"); exit(-1); } } @@ -108,7 +108,7 @@ sycl::queue getQueue(const ComputeDevice device) { switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - logger::println( + logger::printerrln( logger::ERROR, "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); @@ -120,7 +120,7 @@ sycl::queue getQueue(const ComputeDevice device) { return getSyclQueue(device_gpu); } default: { - logger::println(logger::ERROR, "No Device!"); + logger::printerrln(logger::ERROR, "No Device!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index 790e58a32..d356fd73e 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -363,8 +363,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe } #endif default: { - logger::println(logger::ERROR, "no supported device!"); - exit(-1); + deviceError(); } } return ret; diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index ee39509f0..af8560f5e 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -59,7 +59,7 @@ JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetDoubleBatch( ((SerializationIfacePtr *)numTableAddr)->get()); jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); if (values == NULL) { - logger::println(logger::ERROR, + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 8f3337dcc..db23e1ff4 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -300,8 +300,7 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( } #endif default: { - logger::println(logger::ERROR, "no supported device!"); - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 794c7e473..9aeebe2db 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -317,8 +317,7 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #endif default: { - logger::println(logger::ERROR, "no supported device!"); - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index 9e12e1c21..111a18980 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -26,35 +26,35 @@ void checkAllocation(void *ptr) { if (!ptr) { - logger::println(logger::ERROR, "Error: Memory allocation failed"); + logger::printerrln(logger::ERROR, "Error: Memory allocation failed"); exit(-1); } } void checkPtr(void *ptr) { if (!ptr) { - logger::println(logger::ERROR, "Error: NULL pointer"); + logger::printerrln(logger::ERROR, "Error: NULL pointer"); exit(-2); } } void fileOpenError(const char *filename) { - logger::println(logger::ERROR, "Unable to open file '%s'", filename); + logger::printerrln(logger::ERROR, "Unable to open file '%s'", filename); exit(fileError); } void fileReadError() { - logger::println(logger::ERROR, "Unable to read next line"); + logger::printerrln(logger::ERROR, "Unable to read next line"); exit(fileError); } void sparceFileReadError() { - logger::println(logger::ERROR, "Incorrect format of file"); + logger::printerrln(logger::ERROR, "Incorrect format of file"); exit(fileError); } void deviceError() { - logger::println(logger::ERROR, + logger::printerrln(logger::ERROR, "Error: no supported device, please select HOST/CPU/GPU"); exit(-1); } From f3c909d96016db095a5d007edefb5cd4315f1d5e Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 9 Aug 2023 19:28:50 -0700 Subject: [PATCH 29/48] Add switch --- mllib-dal/src/main/native/Logger.cpp | 6 ++++-- mllib-dal/src/main/native/Logger.h | 2 ++ mllib-dal/src/main/native/OneDAL.cpp | 6 ++++++ .../src/main/native/javah/com_intel_oap_mllib_OneDAL__.h | 8 ++++++++ mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala | 3 +++ mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala | 5 +++++ 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index eab798aa9..5d0fcf431 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -4,9 +4,11 @@ #include "Logger.h" namespace logger { + +bool isLoggerEnabled = true; + std::tuple get_prefix(MessageType message_type) { std::string prefix; - bool enable{true}; switch (message_type) { case NONE: break; @@ -28,7 +30,7 @@ std::tuple get_prefix(MessageType message_type) { default: break; } - return {prefix + " ", enable}; + return {prefix + " ", isLoggerEnabled}; } int print2streamFromArgs(MessageType message_type, FILE *stream, diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 3d77a2f4d..ef79dfe9e 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -14,6 +14,8 @@ enum MessageType { ASSERT = 5 }; +extern bool isLoggerEnabled; + int print(MessageType message_type, const std::string &msg); int print(MessageType message_type, const char *format, ...); int println(MessageType message_type, const char *format, ...); diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index af8560f5e..911184db7 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -157,3 +157,9 @@ Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTableDouble( return (jlong)ret; } + + +JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf + (JNIEnv *env, jobject, jboolean isEnabled) { + logger::isLoggerEnabled = isEnabled; +} diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h index bf1a4388d..293c2286b 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h @@ -63,6 +63,14 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTabl JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTableDouble (JNIEnv *, jobject, jdoubleArray, jlongArray, jlongArray, jlong, jlong); +/* + * Class: com_intel_oap_mllib_OneDAL__ + * Method: cSetCppLoggerConf + * Signature: (Z)V + */ +JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf + (JNIEnv *, jobject, jboolean); + #ifdef __cplusplus } #endif diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala index be4b8aa00..577b49c7c 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala @@ -49,6 +49,7 @@ import scala.concurrent.{Await, Future} object OneDAL { LibLoader.loadLibraries() + cSetCppLoggerConf(Utils.isCPPLoggerEnabled()) private val logger = Logger.getLogger("util.OneDAL") private val logLevel = Level.INFO @@ -752,4 +753,6 @@ object OneDAL { @native def cNewCSRNumericTableDouble(data: Array[Double], colIndices: Array[Long], rowOffsets: Array[Long], nFeatures: Long, nVectors: Long): Long + + @native def cSetCppLoggerConf(enable: Boolean) } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala index e471eba97..8df02e1b9 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala @@ -143,6 +143,11 @@ object Utils { sc.getConf.getBoolean("spark.oap.mllib.performance.recording", false) } + def isCPPLoggerEnabled(): Boolean = { + val sc = SparkSession.active.sparkContext + sc.getConf.getBoolean("spark.oap.mllib.logger.cpp.enabled", false) + } + def getOneCCLIPPort(data: RDD[_]): String = { val executorIPAddress = Utils.sparkFirstExecutorIP(data.sparkContext) val kvsIP = data.sparkContext.getConf.get("spark.oap.mllib.oneccl.kvs.ip", From 14bc53ddb006cf22a1ef82c0e89efe72ec2d8151 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 10:18:16 +0000 Subject: [PATCH 30/48] Code style --- mllib-dal/src/main/native/CorrelationImpl.cpp | 2 +- mllib-dal/src/main/native/OneDAL.cpp | 7 +++---- mllib-dal/src/main/native/error_handling.cpp | 5 +++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index 7cf285470..d3f95bb0e 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -247,7 +247,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #endif default: { - deviceError(); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index 911184db7..51768b14f 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -60,7 +60,7 @@ JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetDoubleBatch( jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); if (values == NULL) { logger::printerrln(logger::ERROR, - "Error: unable to obtain critical array"); + "Error: unable to obtain critical array"); exit(-1); } std::memcpy((*nt)[curRows], values, numRows * numCols * sizeof(double)); @@ -158,8 +158,7 @@ Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTableDouble( return (jlong)ret; } - -JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf - (JNIEnv *env, jobject, jboolean isEnabled) { +JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf( + JNIEnv *env, jobject, jboolean isEnabled) { logger::isLoggerEnabled = isEnabled; } diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index 111a18980..987903b04 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -54,7 +54,8 @@ void sparceFileReadError() { } void deviceError() { - logger::printerrln(logger::ERROR, - "Error: no supported device, please select HOST/CPU/GPU"); + logger::printerrln( + logger::ERROR, + "Error: no supported device, please select HOST/CPU/GPU"); exit(-1); } From 5dca00b5fa5f5b77ca10b2fb9113bf00b6d4db70 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 00:22:03 -0700 Subject: [PATCH 31/48] Recover --- .../src/main/native/javah/com_intel_oap_mllib_OneDAL__.h | 8 -------- mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala | 3 --- mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala | 5 ----- 3 files changed, 16 deletions(-) diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h index 293c2286b..bf1a4388d 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneDAL__.h @@ -63,14 +63,6 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTabl JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTableDouble (JNIEnv *, jobject, jdoubleArray, jlongArray, jlongArray, jlong, jlong); -/* - * Class: com_intel_oap_mllib_OneDAL__ - * Method: cSetCppLoggerConf - * Signature: (Z)V - */ -JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf - (JNIEnv *, jobject, jboolean); - #ifdef __cplusplus } #endif diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala index 577b49c7c..be4b8aa00 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneDAL.scala @@ -49,7 +49,6 @@ import scala.concurrent.{Await, Future} object OneDAL { LibLoader.loadLibraries() - cSetCppLoggerConf(Utils.isCPPLoggerEnabled()) private val logger = Logger.getLogger("util.OneDAL") private val logLevel = Level.INFO @@ -753,6 +752,4 @@ object OneDAL { @native def cNewCSRNumericTableDouble(data: Array[Double], colIndices: Array[Long], rowOffsets: Array[Long], nFeatures: Long, nVectors: Long): Long - - @native def cSetCppLoggerConf(enable: Boolean) } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala index 8df02e1b9..e471eba97 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/Utils.scala @@ -143,11 +143,6 @@ object Utils { sc.getConf.getBoolean("spark.oap.mllib.performance.recording", false) } - def isCPPLoggerEnabled(): Boolean = { - val sc = SparkSession.active.sparkContext - sc.getConf.getBoolean("spark.oap.mllib.logger.cpp.enabled", false) - } - def getOneCCLIPPort(data: RDD[_]): String = { val executorIPAddress = Utils.sparkFirstExecutorIP(data.sparkContext) val kvsIP = data.sparkContext.getConf.get("spark.oap.mllib.oneccl.kvs.ip", From 96c05e9b386efe1145da3a5da30bf9b2c925968a Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 00:36:33 -0700 Subject: [PATCH 32/48] Add environment control --- mllib-dal/src/main/native/Logger.cpp | 13 ++++++++++++- mllib-dal/src/main/native/Logger.h | 3 +-- mllib-dal/src/main/native/OneDAL.cpp | 9 +-------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 5d0fcf431..154464427 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -1,14 +1,24 @@ #include +#include #include #include "Logger.h" namespace logger { -bool isLoggerEnabled = true; std::tuple get_prefix(MessageType message_type) { std::string prefix; + bool isLoggerEnabled = false; + if(const char* env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_ENABLED")) { + if(std::strncmp(env_p, "0", 1) == 0) { + isLoggerEnabled = false; + } else if(std::strncmp(env_p, "1", 1) == 0) { + isLoggerEnabled = true; + } else { + isLoggerEnabled = false; + } + } switch (message_type) { case NONE: break; @@ -30,6 +40,7 @@ std::tuple get_prefix(MessageType message_type) { default: break; } + return {prefix + " ", isLoggerEnabled}; } diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index ef79dfe9e..7f1d64708 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace logger { @@ -14,8 +15,6 @@ enum MessageType { ASSERT = 5 }; -extern bool isLoggerEnabled; - int print(MessageType message_type, const std::string &msg); int print(MessageType message_type, const char *format, ...); int println(MessageType message_type, const char *format, ...); diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index 51768b14f..ff2323031 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -17,7 +17,6 @@ #include #include -#include "Logger.h" #include "com_intel_oap_mllib_OneDAL__.h" #include "service.h" @@ -59,8 +58,7 @@ JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetDoubleBatch( ((SerializationIfacePtr *)numTableAddr)->get()); jdouble *values = (jdouble *)env->GetPrimitiveArrayCritical(batch, 0); if (values == NULL) { - logger::printerrln(logger::ERROR, - "Error: unable to obtain critical array"); + std::cout << "Error: unable to obtain critical array" << std::endl; exit(-1); } std::memcpy((*nt)[curRows], values, numRows * numCols * sizeof(double)); @@ -157,8 +155,3 @@ Java_com_intel_oap_mllib_OneDAL_00024_cNewCSRNumericTableDouble( return (jlong)ret; } - -JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cSetCppLoggerConf( - JNIEnv *env, jobject, jboolean isEnabled) { - logger::isLoggerEnabled = isEnabled; -} From 1684abec31c30f0091a4f01ce8c597a40b68cadb Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 15:24:08 +0000 Subject: [PATCH 33/48] Format --- mllib-dal/src/main/native/Logger.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 154464427..f24651863 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -1,23 +1,22 @@ -#include #include +#include #include #include "Logger.h" namespace logger { - std::tuple get_prefix(MessageType message_type) { std::string prefix; bool isLoggerEnabled = false; - if(const char* env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_ENABLED")) { - if(std::strncmp(env_p, "0", 1) == 0) { + if (const char *env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_ENABLED")) { + if (std::strncmp(env_p, "0", 1) == 0) { isLoggerEnabled = false; - } else if(std::strncmp(env_p, "1", 1) == 0) { + } else if (std::strncmp(env_p, "1", 1) == 0) { isLoggerEnabled = true; } else { isLoggerEnabled = false; - } + } } switch (message_type) { case NONE: From 73ce1a567d2231971efa0fe9d11bababd188157c Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 01:02:54 -0700 Subject: [PATCH 34/48] Clean up --- examples/scala/linear-regression-scala/run-gpu.sh | 3 ++- mllib-dal/pom.xml | 2 +- .../src/main/native/oneapi/dal/HomogenTableImpl.cpp | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/examples/scala/linear-regression-scala/run-gpu.sh b/examples/scala/linear-regression-scala/run-gpu.sh index bb8896c4a..35faf9067 100755 --- a/examples/scala/linear-regression-scala/run-gpu.sh +++ b/examples/scala/linear-regression-scala/run-gpu.sh @@ -16,8 +16,9 @@ WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 +export OAP_MLLIB_LOGGER_CPP_ENABLED=1 # Should run in standalone mode -time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ + time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --num-executors $SPARK_NUM_EXECUTORS \ --executor-cores $SPARK_EXECUTOR_CORES \ --total-executor-cores $SPARK_TOTAL_CORES \ diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 24d7e460a..32224e3ef 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -198,7 +198,7 @@ com.fasterxml.jackson.core jackson-databind - 2.12.7.1 + 2.10.5 org.apache.commons diff --git a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp index 8b73d4586..06f673125 100644 --- a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp @@ -129,7 +129,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( logger::println(logger::INFO, "HomogenTable int init"); jint *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - logger::println(logger::ERROR, "Error: unable to obtain critical array"); + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -173,7 +173,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( logger::println(logger::INFO, "HomogenTable float init"); jfloat *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - logger::println(logger::ERROR, "Error: unable to obtain critical array"); + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -216,7 +216,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( logger::println(logger::INFO, "HomogenTable double init"); jdouble *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - logger::println(logger::ERROR, "Error: unable to obtain critical array"); + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -260,7 +260,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( logger::println(logger::INFO, "HomogenTable long init"); jlong *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - logger::println(logger::ERROR, "Error: unable to obtain critical array"); + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -460,7 +460,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom const auto targetDataType = targetMetaData.get_data_type(0); const auto sourceDataType = sourceMetaData.get_data_type(0); if( targetDataType != sourceDataType ) { - logger::println(logger::ERROR, "different data type"); + logger::printerrln(logger::ERROR, "different data type"); exit(-1); } else { switch(targetDataType){ @@ -477,7 +477,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom return MergeHomogenTable(targetTable, sourceTable, cComputeDevice); } default: { - logger::println(logger::ERROR, "no base type"); + logger::printerrln(logger::ERROR, "no base type"); exit(-1); } } From ea983b981f9d15d4f284724ab9e8282330407a5e Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 01:43:50 -0700 Subject: [PATCH 35/48] Fix other printf, fix new line bug --- mllib-dal/src/main/native/Logger.cpp | 37 +++++++++++++++---- .../native/oneapi/dal/ColumnAccessorImpl.cpp | 7 ++-- .../native/oneapi/dal/RowAccessorImpl.cpp | 7 ++-- .../native/oneapi/dal/SimpleMetadataImpl.cpp | 8 ++-- mllib-dal/src/main/native/service.cpp | 6 +-- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index f24651863..a221bca8a 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -57,6 +57,21 @@ int print2streamFromArgs(MessageType message_type, FILE *stream, return ret; } +int print2streamFromArgsln(MessageType message_type, FILE *stream, + const char *format, va_list args) { + // print prefix + auto [prefix, enable] = get_prefix(message_type); + if (!enable) + return 0; + fprintf(stream, "%s", prefix.c_str()); + + // print message + int ret = vfprintf(stream, format, args); + fprintf(stream, "\n"); + + return ret; +} + int print2stream(MessageType message_type, FILE *stream, const char *format, ...) { va_list args; @@ -67,6 +82,16 @@ int print2stream(MessageType message_type, FILE *stream, const char *format, return ret; } +int print2streamln(MessageType message_type, FILE *stream, const char *format, + ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgsln(message_type, stream, format, args); + va_end(args); + + return ret; +} + int print(MessageType message_type, const std::string &msg) { int ret = print2stream(message_type, stdout, msg.c_str()); return ret; @@ -81,17 +106,15 @@ int print(MessageType message_type, const char *format, ...) { } int println(MessageType message_type, const std::string &msg) { - int ret = print2stream(message_type, stdout, msg.c_str()); - fprintf(stdout, "\n"); + int ret = print2streamln(message_type, stdout, msg.c_str()); return ret; } int println(MessageType message_type, const char *format, ...) { va_list args; va_start(args, format); - int ret = print2streamFromArgs(message_type, stdout, format, args); + int ret = print2streamFromArgsln(message_type, stdout, format, args); va_end(args); - fprintf(stdout, "\n"); return ret; } @@ -109,17 +132,15 @@ int printerr(MessageType message_type, const char *format, ...) { } int printerrln(MessageType message_type, const std::string &msg) { - int ret = print2stream(message_type, stderr, msg.c_str()); - fprintf(stderr, "\n"); + int ret = print2streamln(message_type, stderr, msg.c_str()); return ret; } int printerrln(MessageType message_type, const char *format, ...) { va_list args; va_start(args, format); - int ret = print2streamFromArgs(message_type, stderr, format, args); + int ret = print2streamFromArgsln(message_type, stderr, format, args); va_end(args); - fprintf(stderr, "\n"); return ret; } diff --git a/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp index 8f67b3256..fb64251b7 100644 --- a/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp @@ -23,6 +23,7 @@ #include #ifdef CPU_GPU_PROFILE +#include "Logger.h" #include "Common.hpp" #include "com_intel_oneapi_dal_table_ColumnAccessor.h" @@ -40,7 +41,7 @@ using namespace oneapi::dal; JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullDouble (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullDouble \n"); + logger::println(logger::INFO, "ColumnAccessor PullDouble"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc{ htable }; oneapi::dal::array col_values; @@ -74,7 +75,7 @@ JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cP JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullFloat (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullFloat \n"); + logger::println(logger::INFO, "ColumnAccessor PullFloat"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc{ htable }; oneapi::dal::array col_values; @@ -108,7 +109,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPu JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullInt (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullInt \n"); + logger::println(logger::INFO, "ColumnAccessor PullInt"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc { htable }; oneapi::dal::array col_values; diff --git a/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp index a79406b65..2e91c99c1 100644 --- a/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp @@ -23,6 +23,7 @@ #include #ifdef CPU_GPU_PROFILE +#include "Logger.h" #include "Common.hpp" #include "com_intel_oneapi_dal_table_RowAccessor.h" @@ -40,7 +41,7 @@ using namespace oneapi::dal; JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullDouble (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullDouble \n"); + logger::println(logger::INFO, "RowAccessor PullDouble"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc {htable}; jdoubleArray newDoubleArray = nullptr; @@ -74,7 +75,7 @@ JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPull JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullFloat (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullFloat \n"); + logger::println(logger::INFO, "RowAccessor PullFloat"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc { htable }; jfloatArray newFloatArray = nullptr; @@ -108,7 +109,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullF JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullInt (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullInt \n"); + logger::println(logger::INFO, "RowAccessor PullInt"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc { htable }; jintArray newIntArray = nullptr; diff --git a/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp index 2fc370458..a2aa49941 100644 --- a/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp @@ -22,7 +22,7 @@ #include #ifdef CPU_GPU_PROFILE - +#include "Logger.h" #include "com_intel_oneapi_dal_table_SimpleMetadataImpl.h" #include "oneapi/dal/table/homogen.hpp" @@ -37,7 +37,7 @@ using namespace oneapi::dal; JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("SimpleMetadata getfeaturecount \n"); + logger::println(logger::INFO, "SimpleMetadata getfeaturecount"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jlong)mdata.get_feature_count(); @@ -51,7 +51,7 @@ Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureCount( JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureType( JNIEnv *env, jobject, jlong cTableAddr, jint cindex) { - printf("SimpleMetadata getfeaturetype \n"); + logger::println(logger::INFO, "SimpleMetadata getfeaturetype"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jint)mdata.get_feature_type(cindex); } @@ -64,7 +64,7 @@ Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureType( JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetDataType( JNIEnv *env, jobject, jlong cTableAddr, jint cindex) { - printf("SimpleMetadata getdatatype \n"); + logger::println(logger::INFO, "SimpleMetadata getdatatype"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jint)mdata.get_data_type(cindex); diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 9d86cbf69..b41a52b05 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -245,7 +245,7 @@ void printHomegenTable(const oneapi::dal::table &table) { logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(logger::NONE, ""); } } else { for (std::int64_t i = 0; i < 5; i++) { @@ -253,7 +253,7 @@ void printHomegenTable(const oneapi::dal::table &table) { logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(logger::NONE, ""); } logger::println(logger::INFO, "...%ld lines skipped...", (table.get_row_count() - 10)); @@ -263,7 +263,7 @@ void printHomegenTable(const oneapi::dal::table &table) { logger::print(logger::INFO, "%10f", x[i * table.get_column_count() + j]); } - logger::println(""); + logger::println(logger::NONE, ""); } } return 0; From 8bac817a2bf225a58941512c15d0efc77cf67ede Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 16:20:32 +0000 Subject: [PATCH 36/48] Format --- mllib-dal/src/main/native/Logger.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index a221bca8a..b0f90e80f 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -58,7 +58,7 @@ int print2streamFromArgs(MessageType message_type, FILE *stream, } int print2streamFromArgsln(MessageType message_type, FILE *stream, - const char *format, va_list args) { + const char *format, va_list args) { // print prefix auto [prefix, enable] = get_prefix(message_type); if (!enable) @@ -83,7 +83,7 @@ int print2stream(MessageType message_type, FILE *stream, const char *format, } int print2streamln(MessageType message_type, FILE *stream, const char *format, - ...) { + ...) { va_list args; va_start(args, format); int ret = print2streamFromArgsln(message_type, stream, format, args); From e90087483e8c2a7282e9d7f2b14665c02f78ba06 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 01:57:19 -0700 Subject: [PATCH 37/48] Recover pom --- mllib-dal/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 32224e3ef..24d7e460a 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -198,7 +198,7 @@ com.fasterxml.jackson.core jackson-databind - 2.10.5 + 2.12.7.1 org.apache.commons From c215128ed7b472e6ba0c0eb44c880752c0e5a77a Mon Sep 17 00:00:00 2001 From: Xiaochang Wu Date: Fri, 11 Aug 2023 10:30:01 +0800 Subject: [PATCH 38/48] Update run-gpu.sh --- examples/scala/linear-regression-scala/run-gpu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/scala/linear-regression-scala/run-gpu.sh b/examples/scala/linear-regression-scala/run-gpu.sh index 35faf9067..6d13c71c8 100755 --- a/examples/scala/linear-regression-scala/run-gpu.sh +++ b/examples/scala/linear-regression-scala/run-gpu.sh @@ -18,7 +18,7 @@ TASK_GPU_AMOUNT=1 export OAP_MLLIB_LOGGER_CPP_ENABLED=1 # Should run in standalone mode - time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --num-executors $SPARK_NUM_EXECUTORS \ --executor-cores $SPARK_EXECUTOR_CORES \ --total-executor-cores $SPARK_TOTAL_CORES \ From af87e65789f937552ff9c0bfdb8bf3d990f9046c Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 10 Aug 2023 23:11:43 -0700 Subject: [PATCH 39/48] Add level --- mllib-dal/src/main/native/Logger.cpp | 26 ++++++++++++++++++-------- mllib-dal/src/main/native/Logger.h | 12 ++++++------ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index b0f90e80f..db726e628 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -6,17 +6,27 @@ namespace logger { +class LoggerLevel { +public: + int level; + LoggerLevel(){ + level = 2; + if (const char *env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_LEVEL")) { + level = atoi(env_p); + } + if (level > 5 || level < 0 || level == 3) { + level = 2; + } + } + int get_level(){return level;} +} logger_level; + + std::tuple get_prefix(MessageType message_type) { std::string prefix; bool isLoggerEnabled = false; - if (const char *env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_ENABLED")) { - if (std::strncmp(env_p, "0", 1) == 0) { - isLoggerEnabled = false; - } else if (std::strncmp(env_p, "1", 1) == 0) { - isLoggerEnabled = true; - } else { - isLoggerEnabled = false; - } + if (message_type >= logger_level.get_level()) { + isLoggerEnabled = true; } switch (message_type) { case NONE: diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h index 7f1d64708..84484aa80 100644 --- a/mllib-dal/src/main/native/Logger.h +++ b/mllib-dal/src/main/native/Logger.h @@ -7,12 +7,12 @@ namespace logger { // message type for print functions enum MessageType { - NONE = 0, - INFO = 1, - WARN = 2, - ERROR = 3, - DEBUG = 4, - ASSERT = 5 + DEBUG = 0, + ASSERT = 1, + INFO = 2, + NONE = 3, + WARN = 4, + ERROR = 5 }; int print(MessageType message_type, const std::string &msg); From 139c0d6329ce3b65cb3cb8ac202b5984c78c4d1b Mon Sep 17 00:00:00 2001 From: kunpeng Date: Fri, 11 Aug 2023 13:52:46 +0000 Subject: [PATCH 40/48] Format --- mllib-dal/src/main/native/Logger.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index db726e628..9bfdb22f0 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -7,9 +7,9 @@ namespace logger { class LoggerLevel { -public: + public: int level; - LoggerLevel(){ + LoggerLevel() { level = 2; if (const char *env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_LEVEL")) { level = atoi(env_p); @@ -18,10 +18,9 @@ class LoggerLevel { level = 2; } } - int get_level(){return level;} + int get_level() { return level; } } logger_level; - std::tuple get_prefix(MessageType message_type) { std::string prefix; bool isLoggerEnabled = false; From 9a4ac84c386fd17a9509e9da835ca28348592560 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Fri, 11 Aug 2023 14:03:47 +0000 Subject: [PATCH 41/48] Remove variable --- examples/scala/linear-regression-scala/run-gpu.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/scala/linear-regression-scala/run-gpu.sh b/examples/scala/linear-regression-scala/run-gpu.sh index 6d13c71c8..bb8896c4a 100755 --- a/examples/scala/linear-regression-scala/run-gpu.sh +++ b/examples/scala/linear-regression-scala/run-gpu.sh @@ -16,7 +16,6 @@ WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 -export OAP_MLLIB_LOGGER_CPP_ENABLED=1 # Should run in standalone mode time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --num-executors $SPARK_NUM_EXECUTORS \ From 3c736b15d17244ac7d22652bc44549faca4dc7f3 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 16 Aug 2023 00:13:18 -0700 Subject: [PATCH 42/48] Fix typo --- mllib-dal/pom.xml | 2 +- mllib-dal/src/main/native/PCAImpl.cpp | 2 +- mllib-dal/src/main/native/service.cpp | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 24d7e460a..32224e3ef 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -198,7 +198,7 @@ com.fasterxml.jackson.core jackson-databind - 2.12.7.1 + 2.10.5 org.apache.commons diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index db23e1ff4..67ed79bb0 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -23,10 +23,10 @@ #include "oneapi/dal/algo/pca.hpp" #endif +#include "service.h" #include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_feature_PCADALImpl.h" -#include "service.h" using namespace std; #ifdef CPU_GPU_PROFILE diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index b41a52b05..f0048d4c4 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,6 +1,7 @@ #include "service.h" #include "Logger.h" #include "error_handling.h" +#include "oneapi/dal/table/row_accessor.hpp" using namespace daal; using namespace daal::data_management; @@ -201,7 +202,7 @@ void saveHomogenTablePtrToVector(const HomogenTablePtr &ptr) { g_kmtx.unlock(); } -#ifdef CPU_GPU_PRFILE +#ifdef CPU_GPU_PROFILE NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen) { int nRows = ntHomogen->getNumberOfRows(); int nColumns = ntHomogen->getNumberOfColumns(); @@ -266,6 +267,6 @@ void printHomegenTable(const oneapi::dal::table &table) { logger::println(logger::NONE, ""); } } - return 0; + return; } #endif From 435503ec634ad1b5e37af03feb3c5f5e58feb3c1 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 16 Aug 2023 21:46:07 -0700 Subject: [PATCH 43/48] Add flush --- mllib-dal/src/main/native/Logger.cpp | 3 +++ mllib-dal/src/main/native/service.cpp | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 9bfdb22f0..9862a26c6 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -62,6 +62,7 @@ int print2streamFromArgs(MessageType message_type, FILE *stream, // print message int ret = vfprintf(stream, format, args); + fflush(stream); return ret; } @@ -76,7 +77,9 @@ int print2streamFromArgsln(MessageType message_type, FILE *stream, // print message int ret = vfprintf(stream, format, args); + fflush(stream); fprintf(stream, "\n"); + fflush(stream); return ret; } diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index f0048d4c4..fdd24602e 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,7 +1,11 @@ #include "service.h" #include "Logger.h" #include "error_handling.h" + +#ifdef CPU_GPU_PROFILE +#include "oneapi/dal/table/common.hpp" #include "oneapi/dal/table/row_accessor.hpp" +#endif using namespace daal; using namespace daal::data_management; @@ -238,6 +242,7 @@ NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen) { } void printHomegenTable(const oneapi::dal::table &table) { + logger::println(logger::INFO, "printerr"); auto arr = oneapi::dal::row_accessor(table).pull(); const auto x = arr.get_data(); if (table.get_row_count() <= 10) { From ff1b95596f49598c732a9b9dfbda580fe425636f Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 22 Aug 2023 19:42:23 -0700 Subject: [PATCH 44/48] Fix device call host function --- mllib-dal/pom.xml | 2 +- mllib-dal/src/main/native/service.cpp | 38 ------------------------- mllib-dal/src/main/native/service.h | 40 ++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index 32224e3ef..24d7e460a 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -198,7 +198,7 @@ com.fasterxml.jackson.core jackson-databind - 2.10.5 + 2.12.7.1 org.apache.commons diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index fdd24602e..3a8bfae27 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -2,10 +2,6 @@ #include "Logger.h" #include "error_handling.h" -#ifdef CPU_GPU_PROFILE -#include "oneapi/dal/table/common.hpp" -#include "oneapi/dal/table/row_accessor.hpp" -#endif using namespace daal; using namespace daal::data_management; @@ -240,38 +236,4 @@ NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen) { return ntSycl; } - -void printHomegenTable(const oneapi::dal::table &table) { - logger::println(logger::INFO, "printerr"); - auto arr = oneapi::dal::row_accessor(table).pull(); - const auto x = arr.get_data(); - if (table.get_row_count() <= 10) { - for (std::int64_t i = 0; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", - x[i * table.get_column_count() + j]); - } - logger::println(logger::NONE, ""); - } - } else { - for (std::int64_t i = 0; i < 5; i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", - x[i * table.get_column_count() + j]); - } - logger::println(logger::NONE, ""); - } - logger::println(logger::INFO, "...%ld lines skipped...", - (table.get_row_count() - 10)); - for (std::int64_t i = table.get_row_count() - 5; - i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", - x[i * table.get_column_count() + j]); - } - logger::println(logger::NONE, ""); - } - } - return; -} #endif diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index f084422f3..33936d8aa 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -44,6 +44,7 @@ using namespace daal::data_management; #include "error_handling.h" #include "oneapi/dal/table/detail/csr.hpp" #include "oneapi/dal/table/homogen.hpp" +#include "Logger.h" using namespace oneapi::dal; using namespace oneapi::dal::detail; @@ -67,6 +68,43 @@ void saveHomogenTablePtrToVector(const HomogenTablePtr &ptr); void saveCSRTablePtrToVector(const CSRTablePtr &ptr); #ifdef CPU_GPU_PROFILE +#include "oneapi/dal/table/row_accessor.hpp" +#include "oneapi/dal/table/common.hpp" + NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen); -void printHomegenTable(const oneapi::dal::table &table); +inline void printHomegenTable(const oneapi::dal::table &table) { + auto arr = oneapi::dal::row_accessor(table).pull(); + const auto x = arr.get_data(); + if (table.get_row_count() <= 10) { + for (std::int64_t i = 0; i < table.get_row_count(); i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::NONE, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + } else { + for (std::int64_t i = 0; i < 5; i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::INFO, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + logger::println(logger::INFO, "...%ld lines skipped...", + (table.get_row_count() - 10)); + for (std::int64_t i = table.get_row_count() - 5; + i < table.get_row_count(); i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::NONE, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + } + return; +} #endif From fa3cda7b4c70c2682cfe2ed3ef065949e16f49d0 Mon Sep 17 00:00:00 2001 From: kunpeng Date: Wed, 23 Aug 2023 10:20:04 +0000 Subject: [PATCH 45/48] Format --- mllib-dal/src/main/native/PCAImpl.cpp | 2 +- mllib-dal/src/main/native/service.cpp | 1 - mllib-dal/src/main/native/service.h | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 67ed79bb0..db23e1ff4 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -23,10 +23,10 @@ #include "oneapi/dal/algo/pca.hpp" #endif -#include "service.h" #include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_feature_PCADALImpl.h" +#include "service.h" using namespace std; #ifdef CPU_GPU_PROFILE diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 3a8bfae27..632f1b0d4 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -2,7 +2,6 @@ #include "Logger.h" #include "error_handling.h" - using namespace daal; using namespace daal::data_management; using namespace daal::services; diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 33936d8aa..82cf60282 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -41,10 +41,10 @@ using namespace daal::data_management; #include #include +#include "Logger.h" #include "error_handling.h" #include "oneapi/dal/table/detail/csr.hpp" #include "oneapi/dal/table/homogen.hpp" -#include "Logger.h" using namespace oneapi::dal; using namespace oneapi::dal::detail; @@ -68,8 +68,8 @@ void saveHomogenTablePtrToVector(const HomogenTablePtr &ptr); void saveCSRTablePtrToVector(const CSRTablePtr &ptr); #ifdef CPU_GPU_PROFILE -#include "oneapi/dal/table/row_accessor.hpp" #include "oneapi/dal/table/common.hpp" +#include "oneapi/dal/table/row_accessor.hpp" NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen); inline void printHomegenTable(const oneapi::dal::table &table) { From f832381200a8f31cc1dea723fc659e70364f7c7d Mon Sep 17 00:00:00 2001 From: kunpeng Date: Tue, 29 Aug 2023 18:40:57 -0700 Subject: [PATCH 46/48] OneDAL rename --- mllib-dal/src/main/native/ALSDALImpl.cpp | 4 ++-- .../src/main/native/DecisionForestRegressorImpl.cpp | 6 +++--- mllib-dal/src/main/native/KMeansImpl.cpp | 12 ++++++------ mllib-dal/src/main/native/NaiveBayesDALImpl.cpp | 10 +++++----- mllib-dal/src/main/native/OneDAL.cpp | 4 ++-- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index 1c3c2f654..9b175aabd 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -505,12 +505,12 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( logger::println(logger::INFO, "- fullNUsers: %d", nUsers); logger::println(logger::INFO, "- nFactors: %d", nFactors); - // Set number of threads for oneDAL to use for each rank + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); logger::println(logger::INFO, - "oneDAL (native): Number of CPU threads used: %d", + "OneDAL (native): Number of CPU threads used: %d", nThreadsNew); int nBlocks = executor_num; diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 351f2eede..d00c2794d 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -213,7 +213,7 @@ static jobject doRFRegressorOneAPICompute( jboolean bootstrap, preview::spmd::communicator comm, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): GPU compute start"); + logger::println(logger::INFO, "OneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table hFeaturetable = *reinterpret_cast(pNumTabFeature); @@ -295,7 +295,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, - "oneDAL (native): use DPC++ kernels; device %s", + "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); @@ -306,7 +306,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index d356fd73e..8f11def31 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -181,7 +181,7 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, NumericTablePtr ¢roids, jint cluster_num, jdouble tolerance, jint iteration_num, jint executor_num, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): CPU compute start"); + logger::println(logger::INFO, "OneDAL (native): CPU compute start"); algorithmFPType totalCost; NumericTablePtr newCentroids; @@ -249,7 +249,7 @@ static jlong doKMeansOneAPICompute( jdouble tolerance, jint iterationNum, preview::spmd::communicator comm, jobject resultObj) { - logger::println(logger::INFO, "oneDAL (native): GPU compute start"); + logger::println(logger::INFO, "OneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -310,7 +310,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, - "oneDAL (native): use DPC++ kernels; device %s", + "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); jlong ret = 0L; @@ -322,13 +322,13 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe case ComputeDevice::cpu: { NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); NumericTablePtr centroids = *((NumericTablePtr *)pNumTabCenters); - // Set number of threads for oneDAL to use for each rank + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executorCores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); logger::println(logger::INFO, - "oneDAL (native): Number of CPU threads used %d", + "OneDAL (native): Number of CPU threads used %d", nThreadsNew); ret = doKMeansDaalCompute(env, obj, rankId, cclComm, pData, centroids, clusterNum, tolerance, iterationNum, @@ -340,7 +340,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); diff --git a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp index 38a60241e..01b93c787 100644 --- a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp +++ b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp @@ -133,13 +133,13 @@ Java_com_intel_oap_mllib_classification_NaiveBayesDALImpl_cNaiveBayesDALCompute( NumericTablePtr featuresTab = *((NumericTablePtr *)pFeaturesTab); NumericTablePtr labelsTab = *((NumericTablePtr *)pLabelsTab); - // Set number of threads for oneDAL to use for each rank + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); logger::println(logger::INFO, - "oneDAL (native): Number of CPU threads used %d", + "OneDAL (native): Number of CPU threads used %d", nThreadsNew); auto t1 = std::chrono::high_resolution_clock::now(); @@ -147,18 +147,18 @@ Java_com_intel_oap_mllib_classification_NaiveBayesDALImpl_cNaiveBayesDALCompute( training::ResultPtr trainingResult; if (featuresTab->getDataLayout() == NumericTable::StorageLayout::csrArray) { logger::println(logger::INFO, - "oneDAL (native): training model with fastCSR method"); + "OneDAL (native): training model with fastCSR method"); trainingResult = trainModel(comm, featuresTab, labelsTab, class_num); } else { logger::println( logger::INFO, - "oneDAL (native): training model with defaultDense method"); + "OneDAL (native): training model with defaultDense method"); trainingResult = trainModel( comm, featuresTab, labelsTab, class_num); } - logger::println(logger::INFO, "oneDAL (native): training model finished"); + logger::println(logger::INFO, "OneDAL (native): training model finished"); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index ff2323031..320d5f199 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -23,7 +23,7 @@ using namespace daal; using namespace daal::data_management; -// Use oneDAL lib function +// Use OneDAL lib function extern bool daal_check_is_intel_cpu(); JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cAddNumericTable( @@ -76,7 +76,7 @@ JNIEXPORT jboolean JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cCheckPlatformCompatibility(JNIEnv *, jobject) { // Only guarantee compatibility and performance on Intel platforms, use - // oneDAL lib function + // OneDAL lib function return daal_check_is_intel_cpu(); } From b8d30fdcd006f9aed2a43374a9e26dee2c8c079b Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 31 Aug 2023 01:04:14 -0700 Subject: [PATCH 47/48] fix typo --- mllib-dal/src/main/native/Logger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp index 9862a26c6..6ecb5fcf3 100644 --- a/mllib-dal/src/main/native/Logger.cpp +++ b/mllib-dal/src/main/native/Logger.cpp @@ -131,7 +131,7 @@ int println(MessageType message_type, const char *format, ...) { } int printerr(MessageType message_type, const std::string &msg) { - int ret = print2stream(message_type, stdout, msg.c_str()); + int ret = print2stream(message_type, stderr, msg.c_str()); return ret; } From cc4fd70180ae6e54a28a31d79cc8ff7034897c2b Mon Sep 17 00:00:00 2001 From: kunpeng Date: Thu, 31 Aug 2023 01:12:11 -0700 Subject: [PATCH 48/48] Fix typo --- mllib-dal/src/main/native/service.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 82cf60282..ca25ca9f9 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -88,7 +88,7 @@ inline void printHomegenTable(const oneapi::dal::table &table) { for (std::int64_t i = 0; i < 5; i++) { logger::print(logger::INFO, ""); for (std::int64_t j = 0; j < table.get_column_count(); j++) { - logger::print(logger::INFO, "%10f", + logger::print(logger::NONE, "%10f", x[i * table.get_column_count() + j]); } logger::println(logger::NONE, "");