@@ -140,15 +140,17 @@ class CLMLRuntime : public JSONRuntimeBase {
140140 void InitCLML () {
141141 // Setup CLML Context
142142 cl_int result = 0 ;
143-
144143 workspace = cl::OpenCLWorkspace::Global ();
145144 workspace->Init ();
146145 tentry = workspace->GetThreadEntry ();
147146
148147 if (!ExtensionStringPresent ()) {
149- LOG (WARNING ) << " CLML Runtime Init: Qualcomm extn not present.\n " ;
148+ LOG (FATAL ) << " CLML Runtime Init: Qualcomm extn not present.\n " ;
150149 return ;
151150 }
151+ device_id = workspace->GetCLDeviceID (tentry->device .device_id );
152+ platform_id = workspace->device_to_platform [device_id];
153+
152154 // Query and Get CLML Interface
153155 static const cl_uint MAX_VERSIONS = 256 ;
154156 cl_int majorVersions[MAX_VERSIONS];
@@ -492,7 +494,7 @@ class CLMLRuntime : public JSONRuntimeBase {
492494 JSONGraphNode node = it->second .second ;
493495 void * node_data = nullptr ;
494496
495- allocateTensorMemory (h_ClmlIntf, workspace->context , tensor_desc);
497+ allocateTensorMemory (h_ClmlIntf, workspace->contexts [platform_id] , tensor_desc);
496498
497499 if (node.GetOpType () == " const" ) {
498500 node_data = data_entry_[EntryID (it->first , 0 )]->data ;
@@ -581,11 +583,9 @@ class CLMLRuntime : public JSONRuntimeBase {
581583
582584 bool ExtensionStringPresent (void ) {
583585 cl_int result = 0 ;
584- if (workspace->platform_id == nullptr ) {
585- return 0 ;
586- }
587586 size_t reqd_size = 0 ;
588- cl_device_id device_id = workspace->devices [workspace->GetThreadEntry ()->device .device_id ];
587+ cl_device_id device_id =
588+ workspace->GetCLDeviceID (workspace->GetThreadEntry ()->device .device_id );
589589 result = clGetDeviceInfo (device_id, CL_DEVICE_EXTENSIONS, 0 , NULL , &reqd_size);
590590 ICHECK (reqd_size > 0u && result == CL_SUCCESS) << " clGetDeviceInfo:" << result;
591591
@@ -607,7 +607,8 @@ class CLMLRuntime : public JSONRuntimeBase {
607607
608608 cl_ml_tensor_desc_qcom desc = {
609609 dtype, layout, dims.n , dims.c , dims.h , dims.w , 0 , CL_TENSOR_DIMENSIONS_4D_QCOM, { 0 }};
610- result = h_ClmlIntf->clCreateMLTensorQCOM (workspace->context , NULL , &desc, &tensor);
610+ result =
611+ h_ClmlIntf->clCreateMLTensorQCOM (workspace->contexts [platform_id], NULL , &desc, &tensor);
611612 ICHECK (tensor && result == CL_SUCCESS) << " clCreateMLTensorQCOM:" << result;
612613 (void )result;
613614 return tensor;
@@ -619,11 +620,12 @@ class CLMLRuntime : public JSONRuntimeBase {
619620 cl_int result = CL_OUT_OF_HOST_MEMORY;
620621 cl_mem buffer = NULL ;
621622
622- result =
623- h_ClmlIntf-> clGetMLTensorMemorySizeQCOM (workspace-> context , pTensorMemDesc->tensor , &size);
623+ result = h_ClmlIntf-> clGetMLTensorMemorySizeQCOM (workspace-> contexts [platform_id],
624+ pTensorMemDesc->tensor , &size);
624625 ICHECK (result == CL_SUCCESS) << " clGetMLTensorMemorySizeQCOM:" << result;
625626
626- buffer = clCreateBuffer (workspace->context , CL_MEM_READ_WRITE, size, NULL , &result);
627+ buffer =
628+ clCreateBuffer (workspace->contexts [platform_id], CL_MEM_READ_WRITE, size, NULL , &result);
627629 ICHECK (result == CL_SUCCESS) << " clCreateBuffer:" << result;
628630
629631 pTensorMemDesc->memory = buffer;
@@ -686,7 +688,8 @@ class CLMLRuntime : public JSONRuntimeBase {
686688 cl_channel_type cl_dtype = MakeCLDataType (tvm_dtype);
687689
688690 auto tensor_dsc = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
689- tensor_dsc->tensor = DeviceMakeCLMLTensor (workspace->context , dims, layout, cl_dtype);
691+ tensor_dsc->tensor =
692+ DeviceMakeCLMLTensor (workspace->contexts [platform_id], dims, layout, cl_dtype);
690693 return tensor_dsc;
691694 }
692695
@@ -800,8 +803,8 @@ class CLMLRuntime : public JSONRuntimeBase {
800803 } else {
801804 cl_ml_tensor_desc_qcom desc = {};
802805 desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
803- result =
804- h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> context , NULL , &desc, &layer_.unusedTensor );
806+ result = h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> contexts [platform_id], NULL , &desc,
807+ &layer_.unusedTensor );
805808 ICHECK (layer_.unusedTensor && result == CL_SUCCESS) << " clCreateMLTensorQCOM:" << result;
806809 bias->tensor = layer_.unusedTensor ;
807810 }
@@ -821,13 +824,13 @@ class CLMLRuntime : public JSONRuntimeBase {
821824 if (!has_bn) {
822825 if (!has_act) {
823826 result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM (
824- workspace->context , 0 , &conv_desc, input->tensor , weight-> tensor , bias ->tensor ,
825- output->tensor , &op, NULL );
827+ workspace->contexts [platform_id] , 0 , &conv_desc, input->tensor , weight->tensor ,
828+ bias-> tensor , output->tensor , &op, NULL );
826829 ICHECK (op && result == CL_SUCCESS) << " Convolution Error:" << result;
827830 } else {
828831 result = h_ClmlIntf->clCreateMLOpFusedConvolutionActivationForwardQCOM (
829- workspace->context , 0 , &conv_desc, &act_desc, input-> tensor , weight ->tensor ,
830- bias->tensor , NULL , output->tensor , &op, tuning_cache);
832+ workspace->contexts [platform_id] , 0 , &conv_desc, &act_desc, input->tensor ,
833+ weight-> tensor , bias->tensor , NULL , output->tensor , &op, tuning_cache);
831834 ICHECK (op && result == CL_SUCCESS) << " Convolution Error:" << result;
832835 }
833836 layer_.func_ins .push_back (input);
@@ -854,15 +857,15 @@ class CLMLRuntime : public JSONRuntimeBase {
854857 cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
855858 if (!has_act) {
856859 result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormForwardQCOM (
857- workspace->context , 0 , &conv_desc, &bn_desc, input-> tensor , weight ->tensor ,
858- bias ->tensor , output ->tensor , bn_mean ->tensor , bn_var ->tensor , bn_scale ->tensor ,
859- bn_bias->tensor , &op, tuning_cache);
860+ workspace->contexts [platform_id] , 0 , &conv_desc, &bn_desc, input->tensor ,
861+ weight ->tensor , bias ->tensor , output ->tensor , bn_mean ->tensor , bn_var ->tensor ,
862+ bn_scale-> tensor , bn_bias->tensor , &op, tuning_cache);
860863 ICHECK (op && result == CL_SUCCESS) << " Convolution Error:" << result;
861864 } else {
862865 result = h_ClmlIntf->clCreateMLOpFusedConvolutionBatchNormActivationForwardQCOM (
863- workspace->context , 0 , &conv_desc, &bn_desc, &act_desc, input-> tensor , weight ->tensor ,
864- bias->tensor , output->tensor , NULL , bn_mean->tensor , bn_var-> tensor , bn_scale ->tensor ,
865- bn_bias->tensor , &op, tuning_cache);
866+ workspace->contexts [platform_id] , 0 , &conv_desc, &bn_desc, &act_desc, input->tensor ,
867+ weight-> tensor , bias->tensor , output->tensor , NULL , bn_mean->tensor , bn_var->tensor ,
868+ bn_scale-> tensor , bn_bias->tensor , &op, tuning_cache);
866869
867870 ICHECK (op && result == CL_SUCCESS) << " Convolution Error:" << result;
868871 }
@@ -895,13 +898,13 @@ class CLMLRuntime : public JSONRuntimeBase {
895898
896899 cl_ml_tensor_desc_qcom desc = {};
897900 desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
898- result =
899- h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> context , NULL , &desc, &layer_.unusedTensor );
901+ result = h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> contexts [platform_id], NULL , &desc,
902+ &layer_.unusedTensor );
900903 ICHECK (layer_.unusedTensor && result == CL_SUCCESS) << " :" << result;
901904
902- result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM (workspace-> context , 0 , &act_desc,
903- input->tensor , layer_.unusedTensor ,
904- output->tensor , &op, tuning_cache);
905+ result = h_ClmlIntf->clCreateMLOpActivationForwardQCOM (
906+ workspace-> contexts [platform_id], 0 , &act_desc, input->tensor , layer_.unusedTensor ,
907+ output->tensor , &op, tuning_cache);
905908 ICHECK (op && result == CL_SUCCESS) << " Activation Error:" << result;
906909
907910 layer_.func_ins .push_back (input);
@@ -947,8 +950,8 @@ class CLMLRuntime : public JSONRuntimeBase {
947950 cl_ml_op_batchnorm_desc_qcom bn_desc = {CL_BATCHNORM_MODE_SPATIAL_QCOM, cl_arithmetic_mode};
948951
949952 result = h_ClmlIntf->clCreateMLOpBatchNormForwardQCOM (
950- workspace->context , 0 , &bn_desc, input->tensor , bn_mean-> tensor , bn_var ->tensor ,
951- bn_scale->tensor , bn_bias->tensor , output->tensor , &op, tuning_cache);
953+ workspace->contexts [platform_id] , 0 , &bn_desc, input->tensor , bn_mean->tensor ,
954+ bn_var-> tensor , bn_scale->tensor , bn_bias->tensor , output->tensor , &op, tuning_cache);
952955 ICHECK (op && result == CL_SUCCESS) << " Batchnorm Error:" << result;
953956
954957 layer->function .push_back (op);
@@ -997,12 +1000,13 @@ class CLMLRuntime : public JSONRuntimeBase {
9971000 cl_ml_tensor_desc_qcom desc = {};
9981001 cl_ml_tensor_qcom unusedTensor = NULL ;
9991002 desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1000- result = h_ClmlIntf->clCreateMLTensorQCOM (workspace->context , NULL , &desc, &unusedTensor);
1003+ result = h_ClmlIntf->clCreateMLTensorQCOM (workspace->contexts [platform_id], NULL , &desc,
1004+ &unusedTensor);
10011005 ICHECK (unusedTensor && result == CL_SUCCESS) << " :" << result;
10021006
1003- result =
1004- h_ClmlIntf-> clCreateMLOpPoolingForwardQCOM (workspace-> context , 0 , &pool_desc, input->tensor ,
1005- unusedTensor, output->tensor , &op, tuning_cache);
1007+ result = h_ClmlIntf-> clCreateMLOpPoolingForwardQCOM (workspace-> contexts [platform_id], 0 ,
1008+ &pool_desc, input->tensor , unusedTensor ,
1009+ output->tensor , &op, tuning_cache);
10061010 ICHECK (op && result == CL_SUCCESS) << " Pooling Error:" << result;
10071011
10081012 layer_.func_ins .push_back (input);
@@ -1043,13 +1047,13 @@ class CLMLRuntime : public JSONRuntimeBase {
10431047
10441048 cl_ml_tensor_desc_qcom desc = {};
10451049 desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1046- result =
1047- h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> context , NULL , &desc, &layer_.unusedTensor );
1050+ result = h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> contexts [platform_id], NULL , &desc,
1051+ &layer_.unusedTensor );
10481052 ICHECK (layer_.unusedTensor && result == CL_SUCCESS) << " :" << result;
10491053
1050- result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM (workspace-> context , 0 , &pool_desc,
1051- input->tensor , layer_.unusedTensor ,
1052- output->tensor , &op, tuning_cache);
1054+ result = h_ClmlIntf->clCreateMLOpPoolingForwardQCOM (
1055+ workspace-> contexts [platform_id], 0 , &pool_desc, input->tensor , layer_.unusedTensor ,
1056+ output->tensor , &op, tuning_cache);
10531057 ICHECK (op && result == CL_SUCCESS) << " Pooling Error:" << result;
10541058
10551059 layer_.func_ins .push_back (input);
@@ -1079,7 +1083,7 @@ class CLMLRuntime : public JSONRuntimeBase {
10791083 cl_ml_op_softmax_desc_qcom softmax_desc = {CL_SOFTMAX_ALGORITHM_ACCURATE_QCOM,
10801084 CL_SOFTMAX_MODE_INSTANCE_QCOM, cl_arithmetic_mode};
10811085
1082- result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM (workspace->context , 0 , &softmax_desc,
1086+ result = h_ClmlIntf->clCreateMLOpSoftmaxQCOM (workspace->contexts [platform_id] , 0 , &softmax_desc,
10831087 input->tensor , output->tensor , &op, tuning_cache);
10841088 ICHECK (op && result == CL_SUCCESS) << " SoftMax Error:" << result;
10851089
@@ -1125,8 +1129,8 @@ class CLMLRuntime : public JSONRuntimeBase {
11251129 {clml_padding[0 ], clml_padding[1 ], clml_padding[2 ], clml_padding[3 ], 0 , 0 , 0 , 0 },
11261130 cl_arithmetic_mode};
11271131
1128- result = h_ClmlIntf->clCreateMLOpPadQCOM (workspace->context , 0 , &pad_desc, input-> tensor ,
1129- output->tensor , &op, tuning_cache);
1132+ result = h_ClmlIntf->clCreateMLOpPadQCOM (workspace->contexts [platform_id] , 0 , &pad_desc,
1133+ input-> tensor , output->tensor , &op, tuning_cache);
11301134 ICHECK (op && result == CL_SUCCESS) << " Pad Error:" << result;
11311135
11321136 layer_.func_ins .push_back (input);
@@ -1150,7 +1154,7 @@ class CLMLRuntime : public JSONRuntimeBase {
11501154 cl_dtype);
11511155 auto output = MakeCLMLTensorFromJSONNode (node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
11521156
1153- result = h_ClmlIntf->clCreateMLOpReshapeQCOM (workspace->context , 0 , input->tensor ,
1157+ result = h_ClmlIntf->clCreateMLOpReshapeQCOM (workspace->contexts [platform_id] , 0 , input->tensor ,
11541158 output->tensor , &op, tuning_cache);
11551159 ICHECK (op && result == CL_SUCCESS) << " Reshape Error:" << result;
11561160
@@ -1175,7 +1179,7 @@ class CLMLRuntime : public JSONRuntimeBase {
11751179 cl_dtype);
11761180 auto output = MakeCLMLTensorFromJSONNode (node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
11771181
1178- result = h_ClmlIntf->clCreateMLOpReshapeQCOM (workspace->context , 0 , input->tensor ,
1182+ result = h_ClmlIntf->clCreateMLOpReshapeQCOM (workspace->contexts [platform_id] , 0 , input->tensor ,
11791183 output->tensor , &op, tuning_cache);
11801184 ICHECK (op && result == CL_SUCCESS) << " Reshape Error:" << result;
11811185
@@ -1210,8 +1214,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12101214 }
12111215 cl_ml_op_concat_desc_qcom concatDesc = {axis, (cl_uint)inputSize, cl_arithmetic_mode};
12121216
1213- result = h_ClmlIntf->clCreateMLOpConcatQCOM (workspace->context , 0 , &concatDesc, concatInputs ,
1214- output->tensor , &op, tuning_cache);
1217+ result = h_ClmlIntf->clCreateMLOpConcatQCOM (workspace->contexts [platform_id] , 0 , &concatDesc,
1218+ concatInputs, output->tensor , &op, tuning_cache);
12151219 ICHECK (op && result == CL_SUCCESS) << " Concat Error:" << result;
12161220
12171221 layer->function .push_back (op);
@@ -1250,8 +1254,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12501254 } else {
12511255 cl_ml_tensor_desc_qcom desc = {};
12521256 desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
1253- result =
1254- h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> context , NULL , &desc, &layer_.unusedTensor );
1257+ result = h_ClmlIntf-> clCreateMLTensorQCOM (workspace-> contexts [platform_id], NULL , &desc,
1258+ &layer_.unusedTensor );
12551259 ICHECK (layer_.unusedTensor && result == CL_SUCCESS) << " clCreateMLTensorQCOM:" << result;
12561260 bias->tensor = layer_.unusedTensor ;
12571261 }
@@ -1269,8 +1273,8 @@ class CLMLRuntime : public JSONRuntimeBase {
12691273 cl_arithmetic_mode};
12701274
12711275 result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM (
1272- workspace->context , 0 , &conv_desc, input->tensor , weight-> tensor , bias ->tensor ,
1273- output->tensor , &op, NULL );
1276+ workspace->contexts [platform_id] , 0 , &conv_desc, input->tensor , weight->tensor ,
1277+ bias-> tensor , output->tensor , &op, NULL );
12741278 ICHECK (op && result == CL_SUCCESS) << " Fully Connected Error:" << result;
12751279
12761280 layer->function .push_back (op);
@@ -1300,8 +1304,8 @@ class CLMLRuntime : public JSONRuntimeBase {
13001304 cl_ml_op_clip_desc_qcom clip_desc = {
13011305 CL_CLIP_BY_VALUE_QCOM, {{a_max}, CL_FLOAT}, {{a_min}, CL_FLOAT}, cl_arithmetic_mode};
13021306
1303- result = h_ClmlIntf->clCreateMLOpClipQCOM (workspace->context , 0 , &clip_desc, input-> tensor ,
1304- output->tensor , &op, tuning_cache);
1307+ result = h_ClmlIntf->clCreateMLOpClipQCOM (workspace->contexts [platform_id] , 0 , &clip_desc,
1308+ input-> tensor , output->tensor , &op, tuning_cache);
13051309 ICHECK (op && result == CL_SUCCESS) << " Clip Error:" << result;
13061310
13071311 layer_.func_ins .push_back (input);
@@ -1342,8 +1346,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13421346 cl_ml_op_binary_desc_qcom add_desc = {
13431347 binary_op, {{1.0 }, CL_FLOAT}, {{1.0 }, CL_FLOAT}, {{0.0 }, CL_FLOAT}, cl_arithmetic_mode};
13441348
1345- result = h_ClmlIntf->clCreateMLOpBinaryQCOM (workspace->context , 0 , &add_desc, input_a->tensor ,
1346- input_b->tensor , output->tensor , &op, tuning_cache);
1349+ result = h_ClmlIntf->clCreateMLOpBinaryQCOM (workspace->contexts [platform_id], 0 , &add_desc,
1350+ input_a->tensor , input_b->tensor , output->tensor ,
1351+ &op, tuning_cache);
13471352 ICHECK (op && result == CL_SUCCESS) << op_name << " Node Error:" << result;
13481353
13491354 layer_.func_ins .push_back (input_a);
@@ -1371,8 +1376,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13711376 cl_uint block_size = std::stoi (node.GetAttr <std::vector<std::string>>(" block_size" )[0 ]);
13721377
13731378 cl_ml_op_depthtospace_desc_qcom dtos_desc = {block_size, cl_arithmetic_mode};
1374- result = h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM (
1375- workspace->context , 0 , &dtos_desc, input->tensor , output->tensor , &op, tuning_cache);
1379+ result =
1380+ h_ClmlIntf->clCreateMLOpDepthToSpaceQCOM (workspace->contexts [platform_id], 0 , &dtos_desc,
1381+ input->tensor , output->tensor , &op, tuning_cache);
13761382 ICHECK (op && result == CL_SUCCESS) << " DepthToSpace Layer Error:" << result;
13771383
13781384 layer_.func_ins .push_back (input);
@@ -1399,8 +1405,9 @@ class CLMLRuntime : public JSONRuntimeBase {
13991405 cl_bool align_corners = std::stoi (node.GetAttr <std::vector<std::string>>(" align_corners" )[0 ]);
14001406
14011407 cl_ml_op_resize_bilinear_desc_qcom resize_desc = {align_corners, false , cl_arithmetic_mode};
1402- result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM (
1403- workspace->context , 0 , &resize_desc, input->tensor , output->tensor , &op, tuning_cache);
1408+ result = h_ClmlIntf->clCreateMLOpResizeBilinearQCOM (workspace->contexts [platform_id], 0 ,
1409+ &resize_desc, input->tensor , output->tensor ,
1410+ &op, tuning_cache);
14041411 ICHECK (op && result == CL_SUCCESS) << " Resize Layer Error:" << result;
14051412
14061413 layer_.func_ins .push_back (input);
@@ -1418,6 +1425,8 @@ class CLMLRuntime : public JSONRuntimeBase {
14181425 GET_ML_API_INTERFACE* h_ClmlIntf = NULL ;
14191426 cl::OpenCLWorkspace* workspace = NULL ;
14201427 cl::OpenCLThreadEntry* tentry = NULL ;
1428+ cl_device_id device_id;
1429+ cl_platform_id platform_id;
14211430 cl_ml_tuningcache_qcom tuning_cache = NULL ;
14221431 bool is_tuning_run;
14231432 char * tuning_file;
0 commit comments