diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp index d029d08dc..9b175aabd 100644 --- a/mllib-dal/src/main/native/ALSDALImpl.cpp +++ b/mllib-dal/src/main/native/ALSDALImpl.cpp @@ -24,6 +24,8 @@ #include "ALSShuffle.h" +#include "Logger.h" + using namespace std; using namespace daal; using namespace daal::algorithms; @@ -212,7 +214,7 @@ void initializeStep2Local( void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nUsers, size_t nFactors) { - std::cout << "ALS (native): initializeModel " << std::endl; + logger::println(logger::INFO, "ALS (native): initializeModel "); auto t1 = std::chrono::high_resolution_clock::now(); @@ -231,8 +233,8 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): initializeModel took " << duration << " secs" - << std::endl; + logger::println(logger::INFO, "ALS (native): initializeModel took %d secs", + duration); } training::DistributedPartialResultStep1Ptr computeStep1Local( @@ -314,7 +316,7 @@ computeStep4Local(const CSRNumericTablePtr &dataTable, void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, size_t nBlocks, size_t nFactors, size_t maxIterations) { - std::cout << "ALS (native): trainModel" << std::endl; + logger::println(logger::INFO, "ALS (native): trainModel"); auto tStart = std::chrono::high_resolution_clock::now(); @@ -423,15 +425,15 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "ALS (native): iteration " << iteration << " took " - << duration << " secs" << std::endl; + logger::println(logger::INFO, "ALS (native): iteration %d took %f secs", + iteration, duration); } auto tEnd = std::chrono::high_resolution_clock::now(); auto durationTotal = std::chrono::duration_cast(tEnd - tStart).count(); - std::cout << "ALS (native): trainModel took " << durationTotal << " secs" - << std::endl; + logger::println(logger::INFO, "ALS (native): trainModel took %d secs", + durationTotal); } static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) { @@ -448,8 +450,8 @@ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData( JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks, jobject infoObj) { - // cout << "cShuffleData: rank " << rankId << endl; - cout << "RATING_SIZE: " << RATING_SIZE << endl; + // logger::println(logger::DEBUG, "cShuffleData: rank %d", rankId); + logger::println(logger::INFO, "RATING_SIZE: %d", RATING_SIZE); ccl::communicator &comm = getComm(); @@ -493,19 +495,23 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( dataTable = *((CSRNumericTablePtr *)numTableAddr); - cout << "ALS (native): Input info: " << endl; - cout << "- NumberOfRows: " << dataTable->getNumberOfRows() << endl; - cout << "- NumberOfColumns: " << dataTable->getNumberOfColumns() << endl; - cout << "- NumberOfRatings: " << dataTable->getDataSize() << endl; - cout << "- fullNUsers: " << nUsers << endl; - cout << "- nFactors: " << nFactors << endl; - - // Set number of threads for oneDAL to use for each rank + logger::println(logger::INFO, "ALS (native): Input info:"); + logger::println(logger::INFO, "- NumberOfRows: %d", + dataTable->getNumberOfRows()); + logger::println(logger::INFO, "- NumberOfColumns: %d", + dataTable->getNumberOfColumns()); + logger::println(logger::INFO, "- NumberOfRatings: %d", + dataTable->getDataSize()); + logger::println(logger::INFO, "- fullNUsers: %d", nUsers); + logger::println(logger::INFO, "- nFactors: %d", nFactors); + + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; + logger::println(logger::INFO, + "OneDAL (native): Number of CPU threads used: %d", + nThreadsNew); int nBlocks = executor_num; initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors); @@ -516,16 +522,18 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS( auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1) ->getFactors(); - std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl; + logger::println(logger::INFO, ""); + logger::println(logger::INFO, "=== Results for Rank %d ===", rankId); + logger::println(logger::INFO, ""); printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20); printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20); - std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) - << std::endl; - std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) - << std::endl; - std::cout << std::endl; + logger::println(logger::INFO, "User Offset: %d", + getOffsetFromOffsetTable(userOffset)); + logger::println(logger::INFO, "Item Offset: %d", + getOffsetFromOffsetTable(itemOffset)); + logger::println(logger::INFO, ""); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); diff --git a/mllib-dal/src/main/native/ALSShuffle.cpp b/mllib-dal/src/main/native/ALSShuffle.cpp index 7759e3cc0..b07d27076 100644 --- a/mllib-dal/src/main/native/ALSShuffle.cpp +++ b/mllib-dal/src/main/native/ALSShuffle.cpp @@ -22,6 +22,8 @@ #include "ALSShuffle.h" +#include "Logger.h" + using namespace std; std::vector recvData; @@ -72,11 +74,11 @@ Rating *shuffle_all2all(ccl::communicator &comm, // Calculate send buffer size for (size_t i = 0; i < nBlocks; i++) { perNodeSendLens[i] = partitions[i].size() * RATING_SIZE; - // cout << "rank " << rankId << " Send partition " << i << " size " << - // perNodeSendLens[i] << endl; + // logger::println(logger::INFO, "rank %d Send partition %d size %d", + // rankId, i, perNodeSendLens[i]); sendBufSize += perNodeSendLens[i]; } - cout << "sendData size " << sendBufSize << endl; + logger::println(logger::INFO, "sendData size %d", sendBufSize); sendData.resize(sendBufSize); // Fill in send buffer @@ -94,8 +96,8 @@ Rating *shuffle_all2all(ccl::communicator &comm, // Calculate recv buffer size for (size_t i = 0; i < nBlocks; i++) { - // cout << "rank " << rankId << " Recv partition " << i << " size " << - // perNodeRecvLens[i] << endl; + // logger::println(logger::INFO, "rank %d Reciv partition %d size %d", + // rankId, i, perNodeSendLens[i]); recvBufSize += perNodeRecvLens[i]; } @@ -109,18 +111,14 @@ Rating *shuffle_all2all(ccl::communicator &comm, sort(recvData.begin(), recvData.end(), compareRatingByUser); - // for (auto r : recvData) { - // cout << r.user << " " << r.item << " " << r.rating << endl; - // } - newRatingsNum = recvData.size(); // RatingPartition::iterator iter = std::unique(recvData.begin(), // recvData.end(), compareRatingUserEquality); newCsrRowNum = // std::distance(recvData.begin(), iter); newCsrRowNum = distinct_count(recvData); - cout << "newRatingsNum: " << newRatingsNum - << " newCsrRowNum: " << newCsrRowNum << endl; + logger::println(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d", + newRatingsNum, newCsrRowNum); return recvData.data(); } diff --git a/mllib-dal/src/main/native/Common.hpp b/mllib-dal/src/main/native/Common.hpp index baaf4b234..5ead8c8c1 100644 --- a/mllib-dal/src/main/native/Common.hpp +++ b/mllib-dal/src/main/native/Common.hpp @@ -22,5 +22,4 @@ #include "GPU.h" #include "Communicator.hpp" -#include "OutputHelpers.hpp" #include "oneapi/dal/table/homogen.hpp" diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index f42811d36..d3f95bb0e 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -25,6 +25,8 @@ #include "com_intel_oap_mllib_stat_CorrelationDALImpl.h" #include "service.h" +#include "Logger.h" + using namespace std; #ifdef CPU_GPU_PROFILE namespace covariance_gpu = oneapi::dal::covariance; @@ -55,8 +57,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "Correleation (native): local step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, + "Correleation (native): local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -82,8 +85,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "Correleation (native): ccl_allgatherv took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "Correleation (native): ccl_allgatherv took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -121,8 +125,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Correlation (native): master step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "Correleation (native): master step took %d secs", + duration / 1000); /* Print the results */ printNumericTable(result->get(covariance_cpu::correlation), @@ -149,7 +154,7 @@ static void doCorrelationOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -160,15 +165,18 @@ static void doCorrelationOneAPICompute( auto t1 = std::chrono::high_resolution_clock::now(); const auto result_train = preview::compute(comm, cor_desc, htable); if (isRoot) { - std::cout << "Mean:\n" << result_train.get_means() << std::endl; - std::cout << "Correlation:\n" - << result_train.get_cor_matrix() << std::endl; + logger::println(logger::INFO, "Mean:"); + printHomegenTable(result_train.get_means()); + logger::println(logger::INFO, "Correlation:"); + printHomegenTable(result_train.get_cor_matrix()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Correlation batch(native): computing step took " - << duration / 1000 << " secs." << std::endl; + logger::println( + logger::INFO, + "Correlation batch(native): computing step took %d secs.", + duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -191,9 +199,10 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -206,8 +215,9 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used" - << nThreadsNew << std::endl; + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -215,9 +225,10 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -236,8 +247,7 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index fa8556b27..07c97440f 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -25,6 +25,7 @@ #ifdef CPU_GPU_PROFILE #include "Common.hpp" +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" @@ -180,11 +181,11 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID learningNodeConstructor = env->GetMethodID(learningNodeClass, "", "()V"); - std::cout << "Number of trees: " << m.get_tree_count() << std::endl; + logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - std::cout - << "Iterate over the C++ map and add each entry to the Java map" - << std::endl; + logger::println( + logger::INFO, + "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -194,7 +195,7 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID mapPut = env->GetMethodID( mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); - std::cout << "convertJavaMap tree id = " << i << std::endl; + logger::println(logger::INFO, "convertJavaMap tree id = %d", i); // Create a new Integer object with the value key jobject jKey = env->NewObject( env->FindClass("java/lang/Integer"), // Find the Integer class @@ -215,16 +216,18 @@ static jobject doRFClassifierOneAPICompute( jint maxBins, jboolean bootstrap, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table hFeaturetable = *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - std::cout << "doRFClassifierOneAPICompute get_column_count = " - << hFeaturetable.get_column_count() << std::endl; - std::cout << "doRFClassifierOneAPICompute classCount = " << classCount - << std::endl; + logger::println(logger::INFO, + "doRFClassifierOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); + logger::println(logger::INFO, "doRFClassifierOneAPICompute classCount = %d", + classCount); + const auto df_desc = df::descriptor{} .set_class_count(classCount) @@ -249,13 +252,14 @@ static jobject doRFClassifierOneAPICompute( preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable); jobject trees = nullptr; if (isRoot) { - std::cout << "Variable importance results:\n" - << result_train.get_var_importance() << std::endl; - std::cout << "OOB error: " << result_train.get_oob_err() << std::endl; - std::cout << "Prediction results:\n" - << result_infer.get_responses() << std::endl; - std::cout << "Probabilities results:\n" - << result_infer.get_probabilities() << std::endl; + logger::println(logger::INFO, "Variable importance results:"); + printHomegenTable(result_train.get_var_importance()); + logger::println(logger::INFO, "OOB error:"); + printHomegenTable(result_train.get_oob_err()); + logger::println(logger::INFO, "Prediction results:"); + printHomegenTable(result_infer.get_responses()); + logger::println(logger::INFO, "Probabilities results:\n"); + printHomegenTable(result_infer.get_probabilities()); // convert to java hashmap trees = collect_model(env, result_train.get_model(), classCount); @@ -302,16 +306,18 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed, jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " << std::endl; + logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels"); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -334,9 +340,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif return hashmapObj; } default: { - std::cout << "RandomForest (native): The compute device " - << "is not supported!" << std::endl; - exit(-1); + deviceError(); } } return nullptr; diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 1cc5e17a2..d00c2794d 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -25,6 +25,7 @@ #ifdef CPU_GPU_PROFILE #include "Common.hpp" +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h" #include "oneapi/dal/algo/decision_forest.hpp" @@ -179,11 +180,11 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID learningNodeConstructor = env->GetMethodID(learningNodeClass, "", "()V"); - std::cout << "Number of trees: " << m.get_tree_count() << std::endl; + logger::println(logger::INFO, "Number of trees: %d", m.get_tree_count()); for (std::int64_t i = 0, n = m.get_tree_count(); i < n; ++i) { - std::cout - << "Iterate over the C++ map and add each entry to the Java map" - << std::endl; + logger::println( + logger::INFO, + "Iterate over the C++ map and add each entry to the Java map"); // Create a new Java ArrayList to hold the LearningNode objects jobject jList = env->NewObject(listClass, listConstructor); m.traverse_depth_first(i, collect_nodes{env, classCount, jList, @@ -193,7 +194,7 @@ jobject collect_model(JNIEnv *env, const df::model &m, jmethodID mapPut = env->GetMethodID( mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); - std::cout << "convertJavaMap tree id = " << i << std::endl; + logger::println(logger::INFO, "convertJavaMap tree id = %d", i); // Create a new Integer object with the value key jobject jKey = env->NewObject( env->FindClass("java/lang/Integer"), // Find the Integer class @@ -212,14 +213,15 @@ static jobject doRFRegressorOneAPICompute( jboolean bootstrap, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): compute start" << std::endl; + logger::println(logger::INFO, "OneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table hFeaturetable = *reinterpret_cast(pNumTabFeature); homogen_table hLabeltable = *reinterpret_cast(pNumTabLabel); - std::cout << "doRFRegressorOneAPICompute get_column_count = " - << hFeaturetable.get_column_count() << std::endl; + logger::println(logger::INFO, + "doRFRegressorOneAPICompute get_column_count = %d", + hFeaturetable.get_column_count()); const auto df_desc = df::descriptor{} .set_tree_count(treeCount) @@ -238,11 +240,12 @@ static jobject doRFRegressorOneAPICompute( preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable); jobject trees = nullptr; if (isRoot) { - std::cout << "Variable importance results:\n" - << result_train.get_var_importance() << std::endl; - std::cout << "OOB error: " << result_train.get_oob_err() << std::endl; - std::cout << "Prediction results:\n" - << result_infer.get_responses() << std::endl; + logger::println(logger::INFO, "Variable importance results:"); + printHomegenTable(result_train.get_var_importance()); + logger::println(logger::INFO, "OOB error:"); + printHomegenTable(result_train.get_oob_err()); + logger::println(logger::INFO, "Prediction results:"); + printHomegenTable(result_infer.get_responses()); // convert c++ map to java hashmap jint statsSize = 3; // spark create VarianceCalculator needs array of @@ -291,16 +294,20 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " << std::endl; + logger::println(logger::INFO, + "OneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -321,9 +328,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra return hashmapObj; } default: { - std::cout << "RandomForest (native): The compute device " - << "is not supported!" << std::endl; - exit(-1); + deviceError(); } } return nullptr; diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 4205b8c26..4d60f9d78 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -3,6 +3,7 @@ #include #include "GPU.h" +#include "Logger.h" typedef std::shared_ptr queuePtr; @@ -17,7 +18,7 @@ static std::vector get_gpus() { return devices; } } - std::cout << "No GPUs!" << std::endl; + logger::printerrln(logger::ERROR, "No GPUs!"); exit(-1); return {}; @@ -70,23 +71,23 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - std::cout - << "Not implemented for HOST/CPU device, Please run on GPU device." - << std::endl; + logger::printerrln( + logger::ERROR, + "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { - std::cout << "selector GPU" << std::endl; + logger::println(logger::INFO, "selector GPU"); auto local_rank = getLocalRank(comm, size, rankId); auto gpus = get_gpus(); - std::cout << "rank: " << rankId << " size: " << size - << " local_rank: " << local_rank << " n_gpu: " << n_gpu - << std::endl; + logger::println(logger::INFO, + "rank: %d size: %d local_rank: %d n_gpu: %d", rankId, + size, local_rank, n_gpu); auto gpu_selected = gpu_indices[local_rank % n_gpu]; - std::cout << "GPU selected for current rank: " << gpu_selected - << std::endl; + logger::println(logger::INFO, "GPU selected for current rank: %d", + gpu_selected); // In case gpu_selected index is larger than number of GPU SYCL devices auto rank_gpu = gpus[gpu_selected % gpus.size()]; @@ -95,31 +96,31 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, } default: { - std::cout << "No Device!" << std::endl; + logger::printerrln(logger::ERROR, "No Device!"); exit(-1); } } } sycl::queue getQueue(const ComputeDevice device) { - std::cout << "Get Queue" << std::endl; + logger::println(logger::INFO, "Get Queue"); switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { - std::cout << "Not implemented for HOST/CPU device, Please run on " - "GPU device." - << std::endl; + logger::printerrln( + logger::ERROR, + "Not implemented for HOST/CPU device, Please run on GPU device."); exit(-1); } case ComputeDevice::gpu: { - std::cout << "selector GPU" << std::endl; + logger::println(logger::INFO, "selector GPU"); auto device_gpu = sycl::gpu_selector{}.select_device(); - std::cout << "selector GPU end" << std::endl; + logger::println(logger::INFO, "selector GPU end"); return getSyclQueue(device_gpu); } default: { - std::cout << "No Device!" << std::endl; + logger::printerrln(logger::ERROR, "No Device!"); exit(-1); } } diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index a7a40a90f..8f11def31 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -23,6 +23,7 @@ #include "oneapi/dal/algo/kmeans.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_clustering_KMeansDALImpl.h" #include "service.h" @@ -180,7 +181,7 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, NumericTablePtr ¢roids, jint cluster_num, jdouble tolerance, jint iteration_num, jint executor_num, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "OneDAL (native): CPU compute start"); algorithmFPType totalCost; NumericTablePtr newCentroids; @@ -208,17 +209,20 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "KMeans (native): iteration " << it << " took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "KMeans (native): iteration %d took %d secs", it, + duration / 1000); } if (rankId == ccl_root) { if (it == iteration_num) - std::cout << "KMeans (native): reached " << iteration_num - << " max iterations." << std::endl; + logger::println(logger::INFO, + "KMeans (native): reached %d max iterations.", + iteration_num); else - std::cout << "KMeans (native): converged in " << it - << " iterations." << std::endl; + logger::println(logger::INFO, + "KMeans (native): converged in %d iterations.", + iteration_num); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -245,7 +249,7 @@ static jlong doKMeansOneAPICompute( jdouble tolerance, jint iterationNum, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "OneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -260,16 +264,17 @@ static jlong doKMeansOneAPICompute( kmeans_gpu::train_result result_train = preview::train(comm, kmeans_desc, local_input); if (isRoot) { - std::cout << "Iteration count: " << result_train.get_iteration_count() - << std::endl; - std::cout << "Centroids:\n" - << result_train.get_model().get_centroids() << std::endl; + logger::println(logger::INFO, "Iteration count: %d", + result_train.get_iteration_count()); + logger::println(logger::INFO, "Centroids:"); + printHomegenTable(result_train.get_model().get_centroids()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "KMeans (native): training step took " << duration / 1000 - << " secs." << std::endl; + logger::println(logger::INFO, + "KMeans (native): training step took %d secs", + duration / 1000); // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); // Get Field references @@ -304,9 +309,10 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, + "OneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + jlong ret = 0L; ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); @@ -316,13 +322,14 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe case ComputeDevice::cpu: { NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); NumericTablePtr centroids = *((NumericTablePtr *)pNumTabCenters); - // Set number of threads for oneDAL to use for each rank + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executorCores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, + "OneDAL (native): Number of CPU threads used %d", + nThreadsNew); ret = doKMeansDaalCompute(env, obj, rankId, cclComm, pData, centroids, clusterNum, tolerance, iterationNum, executorNum, resultObj); @@ -331,9 +338,10 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -355,8 +363,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe } #endif default: { - std::cout << "no supported device!" << std::endl; - exit(-1); + deviceError(); } } return ret; diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 433c73a6d..b4de714f8 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -26,6 +26,7 @@ #include "oneapi/dal/algo/linear_regression.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_regression_LinearRegressionDALImpl.h" #include "service.h" @@ -152,7 +153,6 @@ ridge_regression_compute(size_t rankId, ccl::communicator &comm, InputDataArchive dataArch; localAlgorithm.getPartialResult()->serialize(dataArch); size_t perNodeArchLength = dataArch.getSizeOfArchive(); - // std::cout << "perNodeArchLength: " << perNodeArchLength << std::endl; serializedData = services::SharedPtr(new byte[perNodeArchLength * nBlocks]); @@ -221,8 +221,8 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, jlong pData, jlong pLabel, jboolean jfitIntercept, jint executorNum, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start , rankid " << rankId - << std::endl; + logger::println(logger::INFO, + "oneDAL (native): GPU compute start , rankid %d", rankId); const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); @@ -263,9 +263,9 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); @@ -280,9 +280,11 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra if (useGPU) { #ifdef CPU_GPU_PROFILE int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); + jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); int size = cclComm.size(); auto queue = @@ -304,8 +306,9 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); if (regParam == 0) { resultTable = linear_regression_compute( rankId, cclComm, pData, pLabel, fitIntercept, executorNum); diff --git a/mllib-dal/src/main/native/Logger.cpp b/mllib-dal/src/main/native/Logger.cpp new file mode 100644 index 000000000..6ecb5fcf3 --- /dev/null +++ b/mllib-dal/src/main/native/Logger.cpp @@ -0,0 +1,159 @@ +#include +#include +#include + +#include "Logger.h" + +namespace logger { + +class LoggerLevel { + public: + int level; + LoggerLevel() { + level = 2; + if (const char *env_p = std::getenv("OAP_MLLIB_LOGGER_CPP_LEVEL")) { + level = atoi(env_p); + } + if (level > 5 || level < 0 || level == 3) { + level = 2; + } + } + int get_level() { return level; } +} logger_level; + +std::tuple get_prefix(MessageType message_type) { + std::string prefix; + bool isLoggerEnabled = false; + if (message_type >= logger_level.get_level()) { + isLoggerEnabled = true; + } + switch (message_type) { + case NONE: + break; + case INFO: + prefix = "[INFO]"; + break; + case WARN: + prefix = "[WARNING]"; + break; + case ERROR: + prefix = "[ERROR]"; + break; + case DEBUG: + prefix = "[DEBUG]"; + break; + case ASSERT: + prefix = "[ASSERT]"; + break; + default: + break; + } + + return {prefix + " ", isLoggerEnabled}; +} + +int print2streamFromArgs(MessageType message_type, FILE *stream, + const char *format, va_list args) { + // print prefix + auto [prefix, enable] = get_prefix(message_type); + if (!enable) + return 0; + fprintf(stream, "%s", prefix.c_str()); + + // print message + int ret = vfprintf(stream, format, args); + fflush(stream); + + return ret; +} + +int print2streamFromArgsln(MessageType message_type, FILE *stream, + const char *format, va_list args) { + // print prefix + auto [prefix, enable] = get_prefix(message_type); + if (!enable) + return 0; + fprintf(stream, "%s", prefix.c_str()); + + // print message + int ret = vfprintf(stream, format, args); + fflush(stream); + fprintf(stream, "\n"); + fflush(stream); + + return ret; +} + +int print2stream(MessageType message_type, FILE *stream, const char *format, + ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stream, format, args); + va_end(args); + + return ret; +} + +int print2streamln(MessageType message_type, FILE *stream, const char *format, + ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgsln(message_type, stream, format, args); + va_end(args); + + return ret; +} + +int print(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stdout, msg.c_str()); + return ret; +} + +int print(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stdout, format, args); + va_end(args); + return ret; +} + +int println(MessageType message_type, const std::string &msg) { + int ret = print2streamln(message_type, stdout, msg.c_str()); + return ret; +} + +int println(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgsln(message_type, stdout, format, args); + va_end(args); + return ret; +} + +int printerr(MessageType message_type, const std::string &msg) { + int ret = print2stream(message_type, stderr, msg.c_str()); + return ret; +} + +int printerr(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgs(message_type, stderr, format, args); + va_end(args); + return ret; +} + +int printerrln(MessageType message_type, const std::string &msg) { + int ret = print2streamln(message_type, stderr, msg.c_str()); + return ret; +} + +int printerrln(MessageType message_type, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = print2streamFromArgsln(message_type, stderr, format, args); + va_end(args); + return ret; +} + +}; // namespace logger diff --git a/mllib-dal/src/main/native/Logger.h b/mllib-dal/src/main/native/Logger.h new file mode 100644 index 000000000..84484aa80 --- /dev/null +++ b/mllib-dal/src/main/native/Logger.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +namespace logger { +// message type for print functions +enum MessageType { + DEBUG = 0, + ASSERT = 1, + INFO = 2, + NONE = 3, + WARN = 4, + ERROR = 5 +}; + +int print(MessageType message_type, const std::string &msg); +int print(MessageType message_type, const char *format, ...); +int println(MessageType message_type, const char *format, ...); +int println(MessageType message_type, const std::string &msg); + +int printerr(MessageType message_type, const std::string &msg); +int printerr(MessageType message_type, const char *format, ...); +int printerrln(MessageType message_type, const char *format, ...); +int printerrln(MessageType message_type, const std::string &msg); +}; // namespace logger diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index b25072c04..40869ca4d 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -82,6 +82,7 @@ else ifeq ($(PLATFORM_PROFILE),CPU_GPU_PROFILE) endif CPP_SRCS += \ + ./Logger.cpp \ ./OneCCL.cpp ./OneDAL.cpp ./service.cpp ./error_handling.cpp \ ./KMeansImpl.cpp \ ./PCAImpl.cpp \ @@ -98,6 +99,7 @@ CPP_SRCS += \ ./oneapi/dal/RowAccessorImpl.cpp OBJS += \ + ./Logger.o \ ./OneCCL.o ./OneDAL.o ./service.o ./error_handling.o \ ./KMeansImpl.o \ ./PCAImpl.o \ diff --git a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp index 305a867df..01b93c787 100644 --- a/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp +++ b/mllib-dal/src/main/native/NaiveBayesDALImpl.cpp @@ -1,5 +1,6 @@ #include +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_classification_NaiveBayesDALImpl.h" #include "service.h" @@ -132,39 +133,37 @@ Java_com_intel_oap_mllib_classification_NaiveBayesDALImpl_cNaiveBayesDALCompute( NumericTablePtr featuresTab = *((NumericTablePtr *)pFeaturesTab); NumericTablePtr labelsTab = *((NumericTablePtr *)pLabelsTab); - // Set number of threads for oneDAL to use for each rank + // Set number of threads for OneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executor_cores); int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew - << endl; - + logger::println(logger::INFO, + "OneDAL (native): Number of CPU threads used %d", + nThreadsNew); auto t1 = std::chrono::high_resolution_clock::now(); // Support both dense and csr numeric table training::ResultPtr trainingResult; if (featuresTab->getDataLayout() == NumericTable::StorageLayout::csrArray) { - cout << "oneDAL (native): training model with fastCSR method" << endl; + logger::println(logger::INFO, + "OneDAL (native): training model with fastCSR method"); trainingResult = trainModel(comm, featuresTab, labelsTab, class_num); } else { - cout << "oneDAL (native): training model with defaultDense method" - << endl; + logger::println( + logger::INFO, + "OneDAL (native): training model with defaultDense method"); trainingResult = trainModel( comm, featuresTab, labelsTab, class_num); } - cout << "oneDAL (native): training model finished" << endl; + logger::println(logger::INFO, "OneDAL (native): training model finished"); auto t2 = std::chrono::high_resolution_clock::now(); - - std::cout << "training took " - << (float)std::chrono::duration_cast( - t2 - t1) - .count() / - 1000 - << " secs" << std::endl; + auto duration = + std::chrono::duration_cast(t2 - t1).count(); + logger::println(logger::INFO, "training took %d secs", duration / 1000); if (rankId == ccl_root) { multinomial_naive_bayes::ModelPtr model = diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index 38557d615..c50099f9d 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -28,6 +28,7 @@ #include +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_OneCCL__.h" @@ -47,7 +48,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jobject param) { - std::cerr << "OneCCL (native): init" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init"); auto t1 = std::chrono::high_resolution_clock::now(); @@ -68,8 +69,8 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cerr << "OneCCL (native): init took " << duration << " secs" - << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init took %d secs", + duration); rank_id = getComm().rank(); comm_size = getComm().size(); @@ -92,7 +93,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( */ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject) { - std::cerr << "OneCCL (native): init dpcpp" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): init dpcpp"); ccl::init(); return 1; @@ -100,7 +101,7 @@ Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject) { JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1cleanup(JNIEnv *env, jobject obj) { - std::cerr << "OneCCL (native): cleanup" << std::endl; + logger::printerrln(logger::INFO, "OneCCL (native): cleanup"); g_kvs.pop_back(); g_comms.pop_back(); } @@ -135,7 +136,8 @@ static int fill_local_host_ip() { int family = AF_UNSPEC; char local_ip[CCL_IP_LEN]; if (getifaddrs(&ifaddr) < 0) { - std::cerr << "OneCCL (native): can not get host IP" << std::endl; + logger::printerrln(logger::ERROR, + "OneCCL (native): can not get host IP"); return -1; } @@ -157,7 +159,7 @@ static int fill_local_host_ip() { if (res != 0) { std::string s("OneCCL (native): getnameinfo error > "); s.append(gai_strerror(res)); - std::cerr << s << std::endl; + logger::printerrln(logger::ERROR, s); return -1; } local_host_ips.push_back(local_ip); @@ -165,8 +167,9 @@ static int fill_local_host_ip() { } } if (local_host_ips.empty()) { - std::cerr << "OneCCL (native): can't find interface to get host IP" - << std::endl; + logger::printerrln( + logger::ERROR, + "OneCCL (native): can't find interface to get host IP"); return -1; } @@ -177,7 +180,8 @@ static int fill_local_host_ip() { static bool is_valid_ip(char ip[]) { if (fill_local_host_ip() == -1) { - std::cerr << "OneCCL (native): get local host ip error" << std::endl; + logger::printerrln(logger::ERROR, + "OneCCL (native): get local host ip error"); return false; }; diff --git a/mllib-dal/src/main/native/OneDAL.cpp b/mllib-dal/src/main/native/OneDAL.cpp index ff2323031..320d5f199 100644 --- a/mllib-dal/src/main/native/OneDAL.cpp +++ b/mllib-dal/src/main/native/OneDAL.cpp @@ -23,7 +23,7 @@ using namespace daal; using namespace daal::data_management; -// Use oneDAL lib function +// Use OneDAL lib function extern bool daal_check_is_intel_cpu(); JNIEXPORT void JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cAddNumericTable( @@ -76,7 +76,7 @@ JNIEXPORT jboolean JNICALL Java_com_intel_oap_mllib_OneDAL_00024_cCheckPlatformCompatibility(JNIEnv *, jobject) { // Only guarantee compatibility and performance on Intel platforms, use - // oneDAL lib function + // OneDAL lib function return daal_check_is_intel_cpu(); } diff --git a/mllib-dal/src/main/native/OutputHelpers.hpp b/mllib-dal/src/main/native/OutputHelpers.hpp deleted file mode 100644 index e86f0667f..000000000 --- a/mllib-dal/src/main/native/OutputHelpers.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/******************************************************************************* - * Copyright 2021 Intel Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ - -#pragma once - -#include -#include - -#include "oneapi/dal/table/row_accessor.hpp" -#include "oneapi/dal/table/common.hpp" - -inline std::ostream &operator<<(std::ostream &stream, const oneapi::dal::table &table) { - auto arr = oneapi::dal::row_accessor(table).pull(); - const auto x = arr.get_data(); - - if (table.get_row_count() <= 10) { - for (std::int64_t i = 0; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - } - else { - for (std::int64_t i = 0; i < 5; i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - std::cout << "..." << (table.get_row_count() - 10) << " lines skipped..." << std::endl; - for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) { - for (std::int64_t j = 0; j < table.get_column_count(); j++) { - std::cout << std::setw(10) << std::setiosflags(std::ios::fixed) - << std::setprecision(6) << x[i * table.get_column_count() + j]; - } - std::cout << std::endl; - } - } - return stream; -} diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 33a0aa2a7..db23e1ff4 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -23,6 +23,7 @@ #include "oneapi/dal/algo/pca.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_feature_PCADALImpl.h" #include "service.h" @@ -42,7 +43,7 @@ typedef double algorithmFPType; /* Algorithm floating-point type */ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, ccl::communicator &comm, NumericTablePtr &pData, size_t nBlocks, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): CPU compute start"); using daal::byte; auto t1 = std::chrono::high_resolution_clock::now(); @@ -59,8 +60,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << " PCA (native): Covariance local step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, + "PCA (native): Covariance local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -85,8 +87,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "PCA (native): Covariance gather to master took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "PCA (native): Covariance gather to master took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -125,8 +128,9 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): Covariance master step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "PCA (native): Covariance master step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -146,8 +150,8 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): master step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): master step took %d secs", + duration / 1000); /* Print the results */ pca_cpu::ResultPtr result = algorithm.getResult(); @@ -183,7 +187,7 @@ static void doPCAOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -196,8 +200,8 @@ static void doPCAOneAPICompute( auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "PCA (native): Covariance step took " << duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "PCA (native): Covariance step took %d secs", + duration / 1000); if (isRoot) { using float_t = double; using method_t = pca_gpu::method::precomputed; @@ -212,8 +216,8 @@ static void doPCAOneAPICompute( duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "PCA (native): Eigen step took " << duration / 1000 - << " secs." << std::endl; + logger::println(logger::INFO, "PCA (native): Eigen step took %d secs", + duration / 1000); // Return all eigenvalues & eigenvectors // Get the class of the input object jclass clazz = env->GetObjectClass(resultObj); @@ -222,10 +226,10 @@ static void doPCAOneAPICompute( env->GetFieldID(clazz, "pcNumericTable", "J"); jfieldID explainedVarianceNumericTableField = env->GetFieldID(clazz, "explainedVarianceNumericTable", "J"); - std::cout << "Eigenvectors:\n" - << result_train.get_eigenvectors() << std::endl; - std::cout << "Eigenvalues:\n" - << result_train.get_eigenvalues() << std::endl; + logger::println(logger::INFO, "Eigenvectors:"); + printHomegenTable(result_train.get_eigenvectors()); + logger::println(logger::INFO, "Eigenvalues:"); + printHomegenTable(result_train.get_eigenvalues()); HomogenTablePtr eigenvectors = std::make_shared(result_train.get_eigenvectors()); @@ -248,9 +252,10 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); size_t rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -263,8 +268,9 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doPCADAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -272,9 +278,10 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -293,8 +300,7 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/Profile.hpp b/mllib-dal/src/main/native/Profile.hpp index d790c4066..429e65b65 100644 --- a/mllib-dal/src/main/native/Profile.hpp +++ b/mllib-dal/src/main/native/Profile.hpp @@ -3,6 +3,7 @@ #include #include #include +#include "Logger.h" class Profiler { public: @@ -10,7 +11,7 @@ class Profiler { void startProfile(std::string s = "") { action = s; - std::cout << subject << " (native): start " << action << std::endl; + logger::println(logger::INFO, "%s (native): start %s", subject.c_str(), action.c_str()); startTime = std::chrono::high_resolution_clock::now(); } @@ -19,12 +20,11 @@ class Profiler { auto duration = std::chrono::duration_cast( end_time - startTime) .count(); - std::cout << subject << " (native): " << action << " took " << (float)duration / 1000 - << " secs" << std::endl; + logger::println(logger::INFO, "%s (native): start %s took %f secs", subject.c_str(), action.c_str(), (float)duration / 1000); } void println(std::string msg) { - std::cout << subject << " (native): " << msg << std::endl; + logger::println(logger::INFO, "%s (native): %s", subject.c_str(), msg.c_str()); } private: diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 1e99be460..9aeebe2db 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -21,6 +21,7 @@ #include "oneapi/dal/algo/basic_statistics.hpp" #endif +#include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_stat_SummarizerDALImpl.h" #include "service.h" @@ -39,7 +40,7 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, ccl::communicator &comm, const NumericTablePtr &pData, size_t nBlocks, jobject resultObj) { - std::cout << "oneDAL (native): CPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): CPU compute start"); using daal::byte; auto t1 = std::chrono::high_resolution_clock::now(); @@ -56,8 +57,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "low_order_moments (native): local step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "low_order_moments (native): local step took %d secs", + duration / 1000); t1 = std::chrono::high_resolution_clock::now(); @@ -83,8 +85,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "low_order_moments (native): ccl_gather took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "low_order_moments (native): ccl_gather took %d secs", + duration / 1000); if (isRoot) { auto t1 = std::chrono::high_resolution_clock::now(); /* Create an algorithm to compute covariance on the master node */ @@ -122,8 +125,9 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId, auto duration = std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "low_order_moments (native): master step took " - << duration / 1000 << " secs" << std::endl; + logger::println(logger::INFO, + "low_order_moments (native): master step took %d secs", + duration / 1000); /* Print the results */ printNumericTable(result->get(low_order_moments::mean), @@ -202,7 +206,7 @@ static void doSummarizerOneAPICompute( JNIEnv *env, jlong pNumTabData, preview::spmd::communicator comm, jobject resultObj) { - std::cout << "oneDAL (native): GPU compute start" << std::endl; + logger::println(logger::INFO, "oneDAL (native): GPU compute start"); const bool isRoot = (comm.get_rank() == ccl_root); homogen_table htable = *reinterpret_cast(pNumTabData); @@ -210,17 +214,22 @@ static void doSummarizerOneAPICompute( auto t1 = std::chrono::high_resolution_clock::now(); const auto result_train = preview::compute(comm, bs_desc, htable); if (isRoot) { - std::cout << "Minimum:\n" << result_train.get_min() << std::endl; - std::cout << "Maximum:\n" << result_train.get_max() << std::endl; - std::cout << "Mean:\n" << result_train.get_mean() << std::endl; - std::cout << "Variance:\n" << result_train.get_variance() << std::endl; + logger::println(logger::INFO, "Minimum"); + printHomegenTable(result_train.get_min()); + logger::println(logger::INFO, "Maximum"); + printHomegenTable(result_train.get_max()); + logger::println(logger::INFO, "Mean"); + printHomegenTable(result_train.get_mean()); + logger::println(logger::INFO, "Variation"); + printHomegenTable(result_train.get_variance()); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = (float)std::chrono::duration_cast(t2 - t1) .count(); - std::cout << "Summarizer (native): computing step took " - << duration / 1000 << " secs." << std::endl; + logger::println(logger::INFO, + "Summarizer (native): computing step took %d secs", + duration / 1000); // Return all covariance & mean jclass clazz = env->GetObjectClass(resultObj); @@ -260,9 +269,10 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { - std::cout << "oneDAL (native): use DPC++ kernels " - << "; device " << ComputeDeviceString[computeDeviceOrdinal] - << std::endl; + logger::println(logger::INFO, + "oneDAL (native): use DPC++ kernels; device %s", + ComputeDeviceString[computeDeviceOrdinal].c_str()); + ccl::communicator &cclComm = getComm(); int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); @@ -275,8 +285,9 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads(); - std::cout << "oneDAL (native): Number of CPU threads used " - << nThreadsNew << std::endl; + logger::println(logger::INFO, + "oneDAL (native): Number of CPU threads used %d", + nThreadsNew); doSummarizerDAALCompute(env, obj, rankId, cclComm, pData, executorNum, resultObj); break; @@ -284,9 +295,10 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { int nGpu = env->GetArrayLength(gpuIdxArray); - std::cout << "oneDAL (native): use GPU kernels with " << nGpu - << " GPU(s)" - << " rankid " << rankId << std::endl; + logger::println( + logger::INFO, + "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, + rankId); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); @@ -305,8 +317,7 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #endif default: { - std::cout << "no supported device!" << std::endl; - exit(-1); + deviceError(); } } return 0; diff --git a/mllib-dal/src/main/native/error_handling.cpp b/mllib-dal/src/main/native/error_handling.cpp index 45967009a..987903b04 100644 --- a/mllib-dal/src/main/native/error_handling.cpp +++ b/mllib-dal/src/main/native/error_handling.cpp @@ -21,39 +21,41 @@ !******************************************************************************/ #include "error_handling.h" +#include "Logger.h" #include void checkAllocation(void *ptr) { if (!ptr) { - std::cout << "Error: Memory allocation failed" << std::endl; + logger::printerrln(logger::ERROR, "Error: Memory allocation failed"); exit(-1); } } void checkPtr(void *ptr) { if (!ptr) { - std::cout << "Error: NULL pointer" << std::endl; + logger::printerrln(logger::ERROR, "Error: NULL pointer"); exit(-2); } } void fileOpenError(const char *filename) { - std::cout << "Unable to open file '" << filename << "'" << std::endl; + logger::printerrln(logger::ERROR, "Unable to open file '%s'", filename); exit(fileError); } void fileReadError() { - std::cout << "Unable to read next line" << std::endl; + logger::printerrln(logger::ERROR, "Unable to read next line"); exit(fileError); } void sparceFileReadError() { - std::cout << "Incorrect format of file" << std::endl; + logger::printerrln(logger::ERROR, "Incorrect format of file"); exit(fileError); } void deviceError() { - std::cout << "Error: no supported device, please select HOST/CPU/GPU" - << std::endl; + logger::printerrln( + logger::ERROR, + "Error: no supported device, please select HOST/CPU/GPU"); exit(-1); } diff --git a/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp index 8f67b3256..fb64251b7 100644 --- a/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/ColumnAccessorImpl.cpp @@ -23,6 +23,7 @@ #include #ifdef CPU_GPU_PROFILE +#include "Logger.h" #include "Common.hpp" #include "com_intel_oneapi_dal_table_ColumnAccessor.h" @@ -40,7 +41,7 @@ using namespace oneapi::dal; JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullDouble (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullDouble \n"); + logger::println(logger::INFO, "ColumnAccessor PullDouble"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc{ htable }; oneapi::dal::array col_values; @@ -74,7 +75,7 @@ JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cP JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullFloat (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullFloat \n"); + logger::println(logger::INFO, "ColumnAccessor PullFloat"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc{ htable }; oneapi::dal::array col_values; @@ -108,7 +109,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPu JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_ColumnAccessor_cPullInt (JNIEnv *env, jobject, jlong cTableAddr, jlong cColumnIndex, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal) { - printf("ColumnAccessor PullInt \n"); + logger::println(logger::INFO, "ColumnAccessor PullInt"); homogen_table htable = *reinterpret_cast(cTableAddr); column_accessor acc { htable }; oneapi::dal::array col_values; diff --git a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp index 69149c106..06f673125 100644 --- a/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/HomogenTableImpl.cpp @@ -14,7 +14,6 @@ * limitations under the License. *******************************************************************************/ #include -#include #include #include #include @@ -28,6 +27,7 @@ #include "com_intel_oneapi_dal_table_HomogenTableImpl.h" #include "service.h" +#include "Logger.h" using namespace std; using namespace oneapi::dal; @@ -71,7 +71,7 @@ static data_layout getDataLayout(jint cLayout) { template inline jlong MergeHomogenTable(homogen_table &targetTable, homogen_table &sourceTable, const jint computeDeviceOrdinal) { - printf("oneDal merge HomogenTable \n"); + logger::println(logger::INFO, "oneDal merge HomogenTable"); const T *targetData = targetTable.get_data(); const int targetDatasize = targetTable.get_column_count() * targetTable.get_row_count(); @@ -126,10 +126,10 @@ template JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jintArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable int init \n"); + logger::println(logger::INFO, "HomogenTable int init"); jint *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -170,10 +170,10 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_iInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jfloatArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable float init \n"); + logger::println(logger::INFO, "HomogenTable float init"); jfloat *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -213,10 +213,10 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_fInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jdoubleArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable double init \n"); + logger::println(logger::INFO, "HomogenTable double init"); jdouble *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -257,10 +257,10 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_dInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( JNIEnv *env, jobject, jlong cRowCount, jlong cColCount, jlongArray cData, jint cLayout, jint computeDeviceOrdinal) { - printf("HomogenTable long init \n"); + logger::println(logger::INFO, "HomogenTable long init"); jlong *fData = static_cast(env->GetPrimitiveArrayCritical(cData, NULL)); if (fData == NULL) { - std::cout << "Error: unable to obtain critical array" << std::endl; + logger::printerrln(logger::ERROR, "Error: unable to obtain critical array"); exit(-1); } const std::vector dependencies = {}; @@ -301,7 +301,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_lInit( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetColumnCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getcolumncount %ld \n", cTableAddr); + logger::println(logger::INFO, "HomogenTable getcolumncount %ld", cTableAddr); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_column_count(); } @@ -314,7 +314,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetColumnCount( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetRowCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getrowcount \n"); + logger::println(logger::INFO, "HomogenTable getrowcount"); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_row_count(); } @@ -327,7 +327,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetRowCount( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetKind(JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getkind \n"); + logger::println(logger::INFO, "HomogenTable getkind"); homogen_table htable = *reinterpret_cast(cTableAddr); return htable.get_kind(); } @@ -340,7 +340,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetKind(JNIEnv *env, jobject, JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDataLayout( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getDataLayout \n"); + logger::println(logger::INFO, "HomogenTable getDataLayout"); homogen_table htable = *reinterpret_cast(cTableAddr); return (jint)htable.get_data_layout(); } @@ -353,7 +353,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDataLayout( JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetMetaData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getMetaData \n"); + logger::println(logger::INFO, "HomogenTable getMetaData"); homogen_table htable = *reinterpret_cast(cTableAddr); const table_metadata *mdata = reinterpret_cast(&htable.get_metadata()); TableMetadataPtr metaPtr = std::make_shared(*mdata); @@ -370,7 +370,7 @@ JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetIntData(JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getIntData \n"); + logger::println(logger::INFO, "HomogenTable getIntData"); homogen_table htable = *reinterpret_cast(cTableAddr); const int *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -387,7 +387,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetIntData(JNIEnv *env, JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetFloatData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getFloatData \n"); + logger::println(logger::INFO, "HomogenTable getFloatData"); homogen_table htable = *reinterpret_cast(cTableAddr); const float *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -405,7 +405,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetFloatData( JNIEXPORT jlongArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetLongData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getLongData \n"); + logger::println(logger::INFO, "HomogenTable getLongData"); homogen_table htable = *reinterpret_cast(cTableAddr); const long *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -423,7 +423,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetLongData( JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDoubleData( JNIEnv *env, jobject, jlong cTableAddr) { - printf("HomogenTable getDoubleData \n"); + logger::println(logger::INFO, "HomogenTable getDoubleData"); homogen_table htable = *reinterpret_cast(cTableAddr); const double *data = htable.get_data(); const int datasize = htable.get_column_count() * htable.get_row_count(); @@ -439,7 +439,7 @@ Java_com_intel_oneapi_dal_table_HomogenTableImpl_cGetDoubleData( */ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cEmptyTableInit (JNIEnv *env, jobject) { - printf(" init empty HomogenTable \n"); + logger::println(logger::INFO, " init empty HomogenTable"); HomogenTablePtr tablePtr = std::make_shared(); saveHomogenTablePtrToVector(tablePtr); return (jlong)tablePtr.get(); @@ -451,7 +451,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cEmptyT */ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHomogenTable (JNIEnv *env, jobject, jlong targetTablePtr, jlong sourceTablePtr, jint cComputeDevice){ - printf("oneDal addHomogenTable \n"); + logger::println(logger::INFO, "oneDal addHomogenTable"); homogen_table targetTable = *reinterpret_cast(targetTablePtr); homogen_table sourceTable = *reinterpret_cast(sourceTablePtr); const auto targetMetaData = targetTable.get_metadata(); @@ -460,7 +460,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom const auto targetDataType = targetMetaData.get_data_type(0); const auto sourceDataType = sourceMetaData.get_data_type(0); if( targetDataType != sourceDataType ) { - std::cout << "different data type" << std::endl; + logger::printerrln(logger::ERROR, "different data type"); exit(-1); } else { switch(targetDataType){ @@ -477,7 +477,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_HomogenTableImpl_cAddHom return MergeHomogenTable(targetTable, sourceTable, cComputeDevice); } default: { - std::cout << "no base type" << std::endl; + logger::printerrln(logger::ERROR, "no base type"); exit(-1); } } diff --git a/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp index a79406b65..2e91c99c1 100644 --- a/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/RowAccessorImpl.cpp @@ -23,6 +23,7 @@ #include #ifdef CPU_GPU_PROFILE +#include "Logger.h" #include "Common.hpp" #include "com_intel_oneapi_dal_table_RowAccessor.h" @@ -40,7 +41,7 @@ using namespace oneapi::dal; JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullDouble (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullDouble \n"); + logger::println(logger::INFO, "RowAccessor PullDouble"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc {htable}; jdoubleArray newDoubleArray = nullptr; @@ -74,7 +75,7 @@ JNIEXPORT jdoubleArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPull JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullFloat (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullFloat \n"); + logger::println(logger::INFO, "RowAccessor PullFloat"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc { htable }; jfloatArray newFloatArray = nullptr; @@ -108,7 +109,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullF JNIEXPORT jintArray JNICALL Java_com_intel_oneapi_dal_table_RowAccessor_cPullInt (JNIEnv *env, jobject, jlong cTableAddr, jlong cRowStartIndex, jlong cRowEndIndex, jint computeDeviceOrdinal){ - printf("RowAccessor PullInt \n"); + logger::println(logger::INFO, "RowAccessor PullInt"); homogen_table htable = *reinterpret_cast(cTableAddr); row_accessor acc { htable }; jintArray newIntArray = nullptr; diff --git a/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp b/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp index 2fc370458..a2aa49941 100644 --- a/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp +++ b/mllib-dal/src/main/native/oneapi/dal/SimpleMetadataImpl.cpp @@ -22,7 +22,7 @@ #include #ifdef CPU_GPU_PROFILE - +#include "Logger.h" #include "com_intel_oneapi_dal_table_SimpleMetadataImpl.h" #include "oneapi/dal/table/homogen.hpp" @@ -37,7 +37,7 @@ using namespace oneapi::dal; JNIEXPORT jlong JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureCount( JNIEnv *env, jobject, jlong cTableAddr) { - printf("SimpleMetadata getfeaturecount \n"); + logger::println(logger::INFO, "SimpleMetadata getfeaturecount"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jlong)mdata.get_feature_count(); @@ -51,7 +51,7 @@ Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureCount( JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureType( JNIEnv *env, jobject, jlong cTableAddr, jint cindex) { - printf("SimpleMetadata getfeaturetype \n"); + logger::println(logger::INFO, "SimpleMetadata getfeaturetype"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jint)mdata.get_feature_type(cindex); } @@ -64,7 +64,7 @@ Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetFeatureType( JNIEXPORT jint JNICALL Java_com_intel_oneapi_dal_table_SimpleMetadataImpl_cGetDataType( JNIEnv *env, jobject, jlong cTableAddr, jint cindex) { - printf("SimpleMetadata getdatatype \n"); + logger::println(logger::INFO, "SimpleMetadata getdatatype"); table_metadata mdata = *reinterpret_cast(cTableAddr); return (jint)mdata.get_data_type(cindex); diff --git a/mllib-dal/src/main/native/service.cpp b/mllib-dal/src/main/native/service.cpp index 66bd1d06b..632f1b0d4 100644 --- a/mllib-dal/src/main/native/service.cpp +++ b/mllib-dal/src/main/native/service.cpp @@ -1,4 +1,5 @@ #include "service.h" +#include "Logger.h" #include "error_handling.h" using namespace daal; @@ -36,18 +37,16 @@ template void printArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j < nPrintedCols; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[i * nCols + j]; + logger::print(logger::NONE, "%*.3f", interval, + array[i * nCols + j]); } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } template @@ -59,44 +58,38 @@ void printArray(T *array, const size_t nCols, const size_t nRows, template void printLowerArray(T *array, const size_t nPrintedRows, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); int ind = 0; for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j <= i; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[ind++]; + logger::print(logger::NONE, "%*.3f", interval, array[ind++]); } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } template void printUpperArray(T *array, const size_t nPrintedCols, const size_t nPrintedRows, const size_t nCols, const std::string &message, size_t interval = 10) { - std::cout << std::setiosflags(std::ios::left); - std::cout << message << std::endl; + logger::println(logger::INFO, message); int ind = 0; for (size_t i = 0; i < nPrintedRows; i++) { + logger::print(logger::INFO, ""); for (size_t j = 0; j < i; j++) { - std::cout << " "; + logger::print(logger::NONE, " "); } for (size_t j = i; j < nPrintedCols; j++) { - std::cout << std::setw(interval) - << std::setiosflags(std::ios::fixed) - << std::setprecision(3); - std::cout << array[ind++]; + logger::print(logger::NONE, "%*.3f", interval, array[ind++]); } for (size_t j = nPrintedCols; j < nCols; j++) { ind++; } - std::cout << std::endl; + logger::println(logger::NONE, ""); } - std::cout << std::endl; + logger::println(logger::INFO, ""); } void printNumericTable(NumericTable *dataTable, const char *message = "", @@ -208,7 +201,7 @@ void saveHomogenTablePtrToVector(const HomogenTablePtr &ptr) { g_kmtx.unlock(); } -#ifdef CPU_GPU_PRFILE +#ifdef CPU_GPU_PROFILE NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen) { int nRows = ntHomogen->getNumberOfRows(); int nColumns = ntHomogen->getNumberOfColumns(); diff --git a/mllib-dal/src/main/native/service.h b/mllib-dal/src/main/native/service.h index 288f35243..ca25ca9f9 100644 --- a/mllib-dal/src/main/native/service.h +++ b/mllib-dal/src/main/native/service.h @@ -41,6 +41,7 @@ using namespace daal::data_management; #include #include +#include "Logger.h" #include "error_handling.h" #include "oneapi/dal/table/detail/csr.hpp" #include "oneapi/dal/table/homogen.hpp" @@ -67,5 +68,43 @@ void saveHomogenTablePtrToVector(const HomogenTablePtr &ptr); void saveCSRTablePtrToVector(const CSRTablePtr &ptr); #ifdef CPU_GPU_PROFILE +#include "oneapi/dal/table/common.hpp" +#include "oneapi/dal/table/row_accessor.hpp" + NumericTablePtr homegenToSyclHomogen(NumericTablePtr ntHomogen); +inline void printHomegenTable(const oneapi::dal::table &table) { + auto arr = oneapi::dal::row_accessor(table).pull(); + const auto x = arr.get_data(); + if (table.get_row_count() <= 10) { + for (std::int64_t i = 0; i < table.get_row_count(); i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::NONE, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + } else { + for (std::int64_t i = 0; i < 5; i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::NONE, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + logger::println(logger::INFO, "...%ld lines skipped...", + (table.get_row_count() - 10)); + for (std::int64_t i = table.get_row_count() - 5; + i < table.get_row_count(); i++) { + logger::print(logger::INFO, ""); + for (std::int64_t j = 0; j < table.get_column_count(); j++) { + logger::print(logger::NONE, "%10f", + x[i * table.get_column_count() + j]); + } + logger::println(logger::NONE, ""); + } + } + return; +} #endif