Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
minmingzhu committed Nov 15, 2024
1 parent 6310909 commit 6a09a34
Show file tree
Hide file tree
Showing 24 changed files with 23 additions and 72 deletions.
2 changes: 1 addition & 1 deletion mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif
jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode,
jint minObservationsSplitNode, jdouble minWeightFractionLeafNode,
jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed,
jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, jstring ip_port,
jint maxBins, jboolean bootstrap, jintArray gpuIdxArray,
jstring breakdown_name, jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels");

Expand Down
1 change: 0 additions & 1 deletion mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra
numFeaturesPerNode, minObservationsLeafNode, maxTreeDepth, seed,
maxbins, bootstrap, comm, c_breakdown_name, resultObj);

env->ReleaseStringUTFChars(ip_port, str);
env->ReleaseStringUTFChars(breakdown_name, cstr);
return hashmapObj;
}
Expand Down
35 changes: 1 addition & 34 deletions mllib-dal/src/main/native/GPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ std::vector<sycl::device> get_gpus() {
}

static int getLocalRank(ccl::communicator &comm, int size, int rank) {
const int MPI_MAX_PROCESSOR_NAME = 128;
/* Obtain local rank among nodes sharing the same host name */
char zero = static_cast<char>(0);
std::vector<char> name(MPI_MAX_PROCESSOR_NAME + 1, zero);
Expand Down Expand Up @@ -112,37 +113,3 @@ sycl::queue getQueue(const ComputeDevice device) {
}
}
}

preview::spmd::communicator<preview::spmd::device_memory_access::usm>
createDalCommunicator(const jint executorNum, const jint rank,
const ccl::string ccl_ip_port, std::string breakdown_name) {
auto gpus = get_gpus();

auto t1 = std::chrono::high_resolution_clock::now();

ccl::init();

auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();

logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, OneCCL singleton init took %f secs.", rank, duration / 1000 );

auto kvs_attr = ccl::create_kvs_attr();

kvs_attr.set<ccl::kvs_attr_id::ip_port>(ccl_ip_port);

ccl::shared_ptr_class<ccl::kvs> kvs = ccl::create_main_kvs(kvs_attr);

sycl::queue queue{gpus[0]};
t1 = std::chrono::high_resolution_clock::now();
auto comm = preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, executorNum, rank, kvs);
t2 = std::chrono::high_resolution_clock::now();
duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, create communicator took %f secs.", rank, duration / 1000 );
return comm;
}
1 change: 0 additions & 1 deletion mllib-dal/src/main/native/GPU.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include "Communicator.hpp"
#include "service.h"
#include <CL/cl.h>
#include <CL/sycl.hpp>
Expand Down
2 changes: 1 addition & 1 deletion mllib-dal/src/main/native/KMeansImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe
auto comm = getDalComm();
ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols,
pNumTabCenters, clusterNum, tolerance,
iterationNum, comm, resultObj);
iterationNum, comm, c_breakdown_name, resultObj);
env->ReleaseStringUTFChars(breakdown_name, cstr);

break;
Expand Down
1 change: 0 additions & 1 deletion mllib-dal/src/main/native/OneCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init(
jint computeDeviceOrdinal, jobject param) {

logger::println(logger::INFO, "OneCCL (native): init");

const char *str = env->GetStringUTFChars(ip_port, 0);
ccl::string ccl_ip_port(str);
auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port);
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ object OneCCL extends Logging {
var cclParam = new CCLParam()

def init(executor_num: Int, rank: Int, ip_port: String,
computeDeviceOrdinal: Int = Common.ComputeDevice.CPU.ordinal()): Unit = {
computeDevice: Int = Common.ComputeDevice.CPU.ordinal()): Unit = {

logInfo(s"Initializing with IP_PORT: ${ip_port}")

// cclParam is output from native code
c_init(executor_num, rank, ip_port, cclParam)
c_init(executor_num, rank, ip_port, computeDevice, cclParam)

// executor number should equal to oneCCL world size
assert(executor_num == cclParam.getCommSize,
Expand Down Expand Up @@ -63,7 +63,8 @@ object OneCCL extends Logging {

@native def c_getAvailPort(localIP: String): Int

@native private def c_init(size: Int, rank: Int, ip_port: String, param: CCLParam): Int
@native private def c_init(size: Int, rank: Int, ip_port: String,
computeDevice: Int, param: CCLParam): Int

@native private def c_cleanup(): Unit
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class RandomForestClassifierDALImpl(val uid: String,
maxBins,
bootstrap,
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result)

Expand Down Expand Up @@ -162,7 +161,6 @@ class RandomForestClassifierDALImpl(val uid: String,
maxBins: Int,
bootstrap: Boolean,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: RandomForestResult):
java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class KMeansDALImpl(var nClusters: Int,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result
)
Expand Down Expand Up @@ -151,7 +150,6 @@ class KMeansDALImpl(var nClusters: Int,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: KMeansResult): Long
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ class PCADALImpl(val k: Int,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result
)
Expand Down Expand Up @@ -223,7 +222,6 @@ class PCADALImpl(val k: Int,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: PCAResult): Long
}
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result
)
Expand Down Expand Up @@ -203,7 +202,6 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: LiRResult): Long

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ class RandomForestRegressorDALImpl(val uid: String,
maxbins,
bootstrap,
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result)

Expand Down Expand Up @@ -157,7 +156,6 @@ class RandomForestRegressorDALImpl(val uid: String,
maxbins: Int,
bootstrap: Boolean,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: RandomForestResult): java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]]
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ class CorrelationDALImpl(
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result
)
Expand Down Expand Up @@ -127,7 +126,6 @@ class CorrelationDALImpl(
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: CorrelationResult): Long
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ class SummarizerDALImpl(val executorNum: Int,
executorCores,
computeDevice.ordinal(),
gpuIndices,
kvsIPPort,
trainingBreakdownName,
result
)
Expand Down Expand Up @@ -160,7 +159,6 @@ class SummarizerDALImpl(val executorNum: Int,
executorCores: Int,
computeDeviceOrdinal: Int,
gpuIndices: Array[Int],
kvsIPPort: String,
breakdownName: String,
result: SummarizerResult): Long
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging {
val metadata = table.getMetaData
for (i <- 0 until 10) {
assert(metadata.getDataType(i) == FLOAT64)
assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO)
assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO)
}

assertArrayEquals(table.getDoubleData, TestCommon.convertArray(data))
Expand All @@ -75,7 +75,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging {
val metadata = table.getMetaData
for (i <- 0 until 10) {
assert(metadata.getDataType(i) == FLOAT64)
assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO)
assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO)
}
assertArrayEquals(table.getDoubleData, data)

Expand Down Expand Up @@ -105,7 +105,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging {
val metadata = table.getMetaData
for (i <- 0 until 10) {
assert(metadata.getDataType(i) == FLOAT64)
assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO)
assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO)
}

assertArrayEquals(table.getDoubleData, TestCommon.convertArray(data))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class SummarizerHomogenTableSuite extends FunctionsSuite with Logging{

val sourceData = TestCommon.readCSV("src/test/resources/data/covcormoments_dense.csv")

val dataTable = new HomogenTable(sourceData.length, sourceData(0).length, TestCommon.convertArray(sourceData), CommonJob.ComputeDevice.HOST);
val dataTable = new HomogenTable(sourceData.length, sourceData(0).length, TestCommon.convertArray(sourceData), Common.ComputeDevice.HOST);
val summarizerDAL = new SummarizerDALImpl(1, 1)
val gpuIndices = Array(0)
val result = new SummarizerResult()
Expand Down
10 changes: 5 additions & 5 deletions mllib-dal/src/test/scala/com/intel/oap/mllib/TestCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,15 @@ object TestCommon {
matrix
}

def getComputeDevice: CommonJob.ComputeDevice = {
def getComputeDevice: Common.ComputeDevice = {
val device = System.getProperty("computeDevice")
var computeDevice: CommonJob.ComputeDevice = CommonJob.ComputeDevice.HOST
var computeDevice: Common.ComputeDevice = Common.ComputeDevice.HOST
if(device != null) {
// scalastyle:off caselocale
device.toUpperCase match {
case "HOST" => computeDevice = CommonJob.ComputeDevice.HOST
case "CPU" => computeDevice = CommonJob.ComputeDevice.CPU
case "GPU" => computeDevice = CommonJob.ComputeDevice.GPU
case "HOST" => computeDevice = Common.ComputeDevice.HOST
case "CPU" => computeDevice = Common.ComputeDevice.CPU
case "GPU" => computeDevice = Common.ComputeDevice.GPU
case _ => "Invalid Device"
}
// scalastyle:on caselocale
Expand Down

0 comments on commit 6a09a34

Please sign in to comment.