Skip to content

Commit e7ad4bc

Browse files
authored
[CLML] Changes corresponding to OpenCL workspace refactorization (#13972)
* [CLML] Changes corresponding to OpenCL workspace refactorization OpenCL context accessing is changed while refactoring OpenCL backend. * * review comments. * * review
1 parent aa927df commit e7ad4bc

File tree

2 files changed

+70
-59
lines changed

2 files changed

+70
-59
lines changed

src/runtime/contrib/clml/clml_runtime.cc

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,17 @@ class CLMLRuntime : public JSONRuntimeBase {
140140
void InitCLML() {
141141
// Setup CLML Context
142142
cl_int result = 0;
143-
144143
workspace = cl::OpenCLWorkspace::Global();
145144
workspace->Init();
146145
tentry = workspace->GetThreadEntry();
147146

148147
if (!ExtensionStringPresent()) {
149-
LOG(WARNING) << "CLML Runtime Init: Qualcomm extn not present.\n";
148+
LOG(FATAL) << "CLML Runtime Init: Qualcomm extn not present.\n";
150149
return;
151150
}
151+
device_id = workspace->GetCLDeviceID(tentry->device.device_id);
152+
platform_id = workspace->device_to_platform[device_id];
153+
152154
// Query and Get CLML Interface
153155
static const cl_uint MAX_VERSIONS = 256;
154156
cl_int majorVersions[MAX_VERSIONS];
@@ -492,7 +494,7 @@ class CLMLRuntime : public JSONRuntimeBase {
492494
JSONGraphNode node = it->second.second;
493495
void* node_data = nullptr;
494496

495-
allocateTensorMemory(h_ClmlIntf, workspace->context, tensor_desc);
497+
allocateTensorMemory(h_ClmlIntf, workspace->contexts[platform_id], tensor_desc);
496498

497499
if (node.GetOpType() == "const") {
498500
node_data = data_entry_[EntryID(it->first, 0)]->data;
@@ -581,11 +583,9 @@ class CLMLRuntime : public JSONRuntimeBase {
581583

582584
bool ExtensionStringPresent(void) {
583585
cl_int result = 0;
584-
if (workspace->platform_id == nullptr) {
585-
return 0;
586-
}
587586
size_t reqd_size = 0;
588-
cl_device_id device_id = workspace->devices[workspace->GetThreadEntry()->device.device_id];
587+
cl_device_id device_id =
588+
workspace->GetCLDeviceID(workspace->GetThreadEntry()->device.device_id);
589589
result = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, 0, NULL, &reqd_size);
590590
ICHECK(reqd_size > 0u && result == CL_SUCCESS) << "clGetDeviceInfo:" << result;
591591

@@ -607,7 +607,8 @@ class CLMLRuntime : public JSONRuntimeBase {
607607

608608
cl_ml_tensor_desc_qcom desc = {
609609
dtype, layout, dims.n, dims.c, dims.h, dims.w, 0, CL_TENSOR_DIMENSIONS_4D_QCOM, { 0 }};
610-
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &tensor);
610+
result =
611+
h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc, &tensor);
611612
ICHECK(tensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
612613
(void)result;
613614
return tensor;
@@ -619,11 +620,12 @@ class CLMLRuntime : public JSONRuntimeBase {
619620
cl_int result = CL_OUT_OF_HOST_MEMORY;
620621
cl_mem buffer = NULL;
621622

622-
result =
623-
h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->context, pTensorMemDesc->tensor, &size);
623+
result = h_ClmlIntf->clGetMLTensorMemorySizeQCOM(workspace->contexts[platform_id],
624+
pTensorMemDesc->tensor, &size);
624625
ICHECK(result == CL_SUCCESS) << "clGetMLTensorMemorySizeQCOM:" << result;
625626

626-
buffer = clCreateBuffer(workspace->context, CL_MEM_READ_WRITE, size, NULL, &result);
627+
buffer =
628+
clCreateBuffer(workspace->contexts[platform_id], CL_MEM_READ_WRITE, size, NULL, &result);
627629
ICHECK(result == CL_SUCCESS) << "clCreateBuffer:" << result;
628630

629631
pTensorMemDesc->memory = buffer;
@@ -686,7 +688,8 @@ class CLMLRuntime : public JSONRuntimeBase {
686688
cl_channel_type cl_dtype = MakeCLDataType(tvm_dtype);
687689

688690
auto tensor_dsc = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
689-
tensor_dsc->tensor = DeviceMakeCLMLTensor(workspace->context, dims, layout, cl_dtype);
691+
tensor_dsc->tensor =
692+
DeviceMakeCLMLTensor(workspace->contexts[platform_id], dims, layout, cl_dtype);
690693
return tensor_dsc;
691694
}
692695

@@ -800,8 +803,8 @@ class CLMLRuntime : public JSONRuntimeBase {
800803
} else {
801804
cl_ml_tensor_desc_qcom desc = {};
802805
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
803-
result =
804-
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
806+
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
807+
&layer_.unusedTensor);
805808
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
806809
bias->tensor = layer_.unusedTensor;
807810
}
@@ -821,13 +824,13 @@ class CLMLRuntime : public JSONRuntimeBase {
821824
if (!has_bn) {
822825
if (!has_act) {
823826
result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
824-
workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
825-
output->tensor, &op, NULL);
827+
workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
828+
bias->tensor, output->tensor, &op, NULL);
826829
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
827830
} else {
828831
result = h_ClmlIntf->clCreateMLOpFusedConvolutionActivationForwardQCOM(
829-
workspace->context, 0, &conv_desc, &act_desc, input->tensor, weight->tensor,
830-
bias->tensor, NULL, output->tensor, &op, tuning_cache);
832+
workspace->contexts[platform_id], 0, &conv_desc, &act_desc, input->tensor,
833+
weight->tensor, bias->tensor, NULL, output->tensor, &op, tuning_cache);
831834
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
832835
}
833836
layer_.func_ins.push_back(input);
@@ -854,15 +857,15 @@ class CLMLRuntime : public JSONRuntimeBase {
854857
cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
855858
if (!has_act) {
856859
result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormForwardQCOM(
857-
workspace->context, 0, &conv_desc, &bn_desc, input->tensor, weight->tensor,
858-
bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
859-
bn_bias->tensor, &op, tuning_cache);
860+
workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, input->tensor,
861+
weight->tensor, bias->tensor, output->tensor, bn_mean->tensor, bn_var->tensor,
862+
bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);
860863
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
861864
} else {
862865
result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormActivationForwardQCOM(
863-
workspace->context, 0, &conv_desc, &bn_desc, &act_desc, input->tensor, weight->tensor,
864-
bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor, bn_scale->tensor,
865-
bn_bias->tensor, &op, tuning_cache);
866+
workspace->contexts[platform_id], 0, &conv_desc, &bn_desc, &act_desc, input->tensor,
867+
weight->tensor, bias->tensor, output->tensor, NULL, bn_mean->tensor, bn_var->tensor,
868+
bn_scale->tensor, bn_bias->tensor, &op, tuning_cache);
866869

867870
ICHECK(op && result == CL_SUCCESS) << "Convolution Error:" << result;
868871
}
@@ -895,13 +898,13 @@ class CLMLRuntime : public JSONRuntimeBase {
895898

896899
cl_ml_tensor_desc_qcom desc = {};
897900
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
898-
result =
899-
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
901+
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
902+
&layer_.unusedTensor);
900903
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;
901904

902-
result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(workspace->context, 0, &act_desc,
903-
input->tensor, layer_.unusedTensor,
904-
output->tensor, &op, tuning_cache);
905+
result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM(
906+
workspace->contexts[platform_id], 0, &act_desc, input->tensor, layer_.unusedTensor,
907+
output->tensor, &op, tuning_cache);
905908
ICHECK(op && result == CL_SUCCESS) << "Activation Error:" << result;
906909

907910
layer_.func_ins.push_back(input);
@@ -947,8 +950,8 @@ class CLMLRuntime : public JSONRuntimeBase {
947950
cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
948951

949952
result = h_ClmlIntf->clCreateMLOpBatchNormForwardQCOM(
950-
workspace->context, 0, &bn_desc, input->tensor, bn_mean->tensor, bn_var->tensor,
951-
bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
953+
workspace->contexts[platform_id], 0, &bn_desc, input->tensor, bn_mean->tensor,
954+
bn_var->tensor, bn_scale->tensor, bn_bias->tensor, output->tensor, &op, tuning_cache);
952955
ICHECK(op && result == CL_SUCCESS) << "Batchnorm Error:" << result;
953956

954957
layer->function.push_back(op);
@@ -997,12 +1000,13 @@ class CLMLRuntime : public JSONRuntimeBase {
9971000
cl_ml_tensor_desc_qcom desc = {};
9981001
cl_ml_tensor_qcom unusedTensor = NULL;
9991002
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1000-
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &unusedTensor);
1003+
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
1004+
&unusedTensor);
10011005
ICHECK(unusedTensor && result == CL_SUCCESS) << ":" << result;
10021006

1003-
result =
1004-
h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc, input->tensor,
1005-
unusedTensor, output->tensor, &op, tuning_cache);
1007+
result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->contexts[platform_id], 0,
1008+
&pool_desc, input->tensor, unusedTensor,
1009+
output->tensor, &op, tuning_cache);
10061010
ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;
10071011

10081012
layer_.func_ins.push_back(input);
@@ -1043,13 +1047,13 @@ class CLMLRuntime : public JSONRuntimeBase {
10431047

10441048
cl_ml_tensor_desc_qcom desc = {};
10451049
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1046-
result =
1047-
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
1050+
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
1051+
&layer_.unusedTensor);
10481052
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << ":" << result;
10491053

1050-
result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(workspace->context, 0, &pool_desc,
1051-
input->tensor, layer_.unusedTensor,
1052-
output->tensor, &op, tuning_cache);
1054+
result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM(
1055+
workspace->contexts[platform_id], 0, &pool_desc, input->tensor, layer_.unusedTensor,
1056+
output->tensor, &op, tuning_cache);
10531057
ICHECK(op && result == CL_SUCCESS) << "Pooling Error:" << result;
10541058

10551059
layer_.func_ins.push_back(input);
@@ -1079,7 +1083,7 @@ class CLMLRuntime : public JSONRuntimeBase {
10791083
cl_ml_op_softmax_desc_qcom softmax_desc = {CL_SOFTMAX_ALGORITHM_ACCURATE_QCOM,
10801084
CL_SOFTMAX_MODE_INSTANCE_QCOM, cl_arithmetic_mode};
10811085

1082-
result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->context, 0, &softmax_desc,
1086+
result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM(workspace->contexts[platform_id], 0, &softmax_desc,
10831087
input->tensor, output->tensor, &op, tuning_cache);
10841088
ICHECK(op && result == CL_SUCCESS) << "SoftMax Error:" << result;
10851089

@@ -1125,8 +1129,8 @@ class CLMLRuntime : public JSONRuntimeBase {
11251129
{clml_padding[0], clml_padding[1], clml_padding[2], clml_padding[3], 0, 0, 0, 0},
11261130
cl_arithmetic_mode};
11271131

1128-
result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->context, 0, &pad_desc, input->tensor,
1129-
output->tensor, &op, tuning_cache);
1132+
result = h_ClmlIntf->clCreateMLOpPadQCOM(workspace->contexts[platform_id], 0, &pad_desc,
1133+
input->tensor, output->tensor, &op, tuning_cache);
11301134
ICHECK(op && result == CL_SUCCESS) << "Pad Error:" << result;
11311135

11321136
layer_.func_ins.push_back(input);
@@ -1150,7 +1154,7 @@ class CLMLRuntime : public JSONRuntimeBase {
11501154
cl_dtype);
11511155
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
11521156

1153-
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
1157+
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
11541158
output->tensor, &op, tuning_cache);
11551159
ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;
11561160

@@ -1175,7 +1179,7 @@ class CLMLRuntime : public JSONRuntimeBase {
11751179
cl_dtype);
11761180
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
11771181

1178-
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->context, 0, input->tensor,
1182+
result = h_ClmlIntf->clCreateMLOpReshapeQCOM(workspace->contexts[platform_id], 0, input->tensor,
11791183
output->tensor, &op, tuning_cache);
11801184
ICHECK(op && result == CL_SUCCESS) << "Reshape Error:" << result;
11811185

@@ -1210,8 +1214,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12101214
}
12111215
cl_ml_op_concat_desc_qcom concatDesc = {axis, (cl_uint)inputSize, cl_arithmetic_mode};
12121216

1213-
result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->context, 0, &concatDesc, concatInputs,
1214-
output->tensor, &op, tuning_cache);
1217+
result = h_ClmlIntf->clCreateMLOpConcatQCOM(workspace->contexts[platform_id], 0, &concatDesc,
1218+
concatInputs, output->tensor, &op, tuning_cache);
12151219
ICHECK(op && result == CL_SUCCESS) << "Concat Error:" << result;
12161220

12171221
layer->function.push_back(op);
@@ -1250,8 +1254,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12501254
} else {
12511255
cl_ml_tensor_desc_qcom desc = {};
12521256
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1253-
result =
1254-
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
1257+
result = h_ClmlIntf->clCreateMLTensorQCOM(workspace->contexts[platform_id], NULL, &desc,
1258+
&layer_.unusedTensor);
12551259
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
12561260
bias->tensor = layer_.unusedTensor;
12571261
}
@@ -1269,8 +1273,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12691273
cl_arithmetic_mode};
12701274

12711275
result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
1272-
workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
1273-
output->tensor, &op, NULL);
1276+
workspace->contexts[platform_id], 0, &conv_desc, input->tensor, weight->tensor,
1277+
bias->tensor, output->tensor, &op, NULL);
12741278
ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result;
12751279

12761280
layer->function.push_back(op);
@@ -1300,8 +1304,8 @@ class CLMLRuntime : public JSONRuntimeBase {
13001304
cl_ml_op_clip_desc_qcom clip_desc = {
13011305
CL_CLIP_BY_VALUE_QCOM, {{a_max}, CL_FLOAT}, {{a_min}, CL_FLOAT}, cl_arithmetic_mode};
13021306

1303-
result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->context, 0, &clip_desc, input->tensor,
1304-
output->tensor, &op, tuning_cache);
1307+
result = h_ClmlIntf->clCreateMLOpClipQCOM(workspace->contexts[platform_id], 0, &clip_desc,
1308+
input->tensor, output->tensor, &op, tuning_cache);
13051309
ICHECK(op && result == CL_SUCCESS) << "Clip Error:" << result;
13061310

13071311
layer_.func_ins.push_back(input);
@@ -1342,8 +1346,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13421346
cl_ml_op_binary_desc_qcom add_desc = {
13431347
binary_op, {{1.0}, CL_FLOAT}, {{1.0}, CL_FLOAT}, {{0.0}, CL_FLOAT}, cl_arithmetic_mode};
13441348

1345-
result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->context, 0, &add_desc, input_a->tensor,
1346-
input_b->tensor, output->tensor, &op, tuning_cache);
1349+
result = h_ClmlIntf->clCreateMLOpBinaryQCOM(workspace->contexts[platform_id], 0, &add_desc,
1350+
input_a->tensor, input_b->tensor, output->tensor,
1351+
&op, tuning_cache);
13471352
ICHECK(op && result == CL_SUCCESS) << op_name << " Node Error:" << result;
13481353

13491354
layer_.func_ins.push_back(input_a);
@@ -1371,8 +1376,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13711376
cl_uint block_size = std::stoi(node.GetAttr<std::vector<std::string>>("block_size")[0]);
13721377

13731378
cl_ml_op_depthtospace_desc_qcom dtos_desc = {block_size, cl_arithmetic_mode};
1374-
result = h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(
1375-
workspace->context, 0, &dtos_desc, input->tensor, output->tensor, &op, tuning_cache);
1379+
result =
1380+
h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM(workspace->contexts[platform_id], 0, &dtos_desc,
1381+
input->tensor, output->tensor, &op, tuning_cache);
13761382
ICHECK(op && result == CL_SUCCESS) << "DepthToSpace Layer Error:" << result;
13771383

13781384
layer_.func_ins.push_back(input);
@@ -1399,8 +1405,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13991405
cl_bool align_corners = std::stoi(node.GetAttr<std::vector<std::string>>("align_corners")[0]);
14001406

14011407
cl_ml_op_resize_bilinear_desc_qcom resize_desc = {align_corners, false, cl_arithmetic_mode};
1402-
result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(
1403-
workspace->context, 0, &resize_desc, input->tensor, output->tensor, &op, tuning_cache);
1408+
result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM(workspace->contexts[platform_id], 0,
1409+
&resize_desc, input->tensor, output->tensor,
1410+
&op, tuning_cache);
14041411
ICHECK(op && result == CL_SUCCESS) << "Resize Layer Error:" << result;
14051412

14061413
layer_.func_ins.push_back(input);
@@ -1418,6 +1425,8 @@ class CLMLRuntime : public JSONRuntimeBase {
14181425
GET_ML_API_INTERFACE* h_ClmlIntf = NULL;
14191426
cl::OpenCLWorkspace* workspace = NULL;
14201427
cl::OpenCLThreadEntry* tentry = NULL;
1428+
cl_device_id device_id;
1429+
cl_platform_id platform_id;
14211430
cl_ml_tuningcache_qcom tuning_cache = NULL;
14221431
bool is_tuning_run;
14231432
char* tuning_file;

src/runtime/opencl/opencl_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ class OpenCLWorkspace : public DeviceAPI {
282282

283283
return prop & CL_QUEUE_PROFILING_ENABLE;
284284
}
285+
// Check if the device is present or not
286+
bool IsDeviceExists(unsigned int device_id) { return device_id < devices.size(); }
285287
// Enable queue profiling, recreate if required
286288
void EnableQueueProfiling(Device dev, bool enable) {
287289
bool is_enabled = cl::OpenCLWorkspace::Global()->IsProfiling(dev);

0 commit comments

Comments
 (0)