@@ -2210,6 +2210,22 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
22102210 return retErr;
22112211}
22122212
2213+ pi_result cuda_piextKernelSetArgSampler (pi_kernel kernel, pi_uint32 arg_index,
2214+ const pi_sampler *arg_value) {
2215+
2216+ assert (kernel != nullptr );
2217+ assert (arg_value != nullptr );
2218+
2219+ pi_result retErr = PI_SUCCESS;
2220+ try {
2221+ pi_uint32 samplerProps = (*arg_value)->props_ ;
2222+ kernel->set_kernel_arg (arg_index, sizeof (pi_uint32), (void *)&samplerProps);
2223+ } catch (pi_result err) {
2224+ retErr = err;
2225+ }
2226+ return retErr;
2227+ }
2228+
22132229pi_result cuda_piEnqueueKernelLaunch (
22142230 pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
22152231 const size_t *global_work_offset, const size_t *global_work_size,
@@ -2989,32 +3005,139 @@ pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
29893005 return {};
29903006}
29913007
2992- // / \TODO Not implemented in CUDA, need untie from OpenCL
3008+ // / Creates a PI sampler object
3009+ // /
3010+ // / \param[in] context The context the sampler is created for.
3011+ // / \param[in] sampler_properties The properties for the sampler.
3012+ // / \param[out] result_sampler Set to the resulting sampler object.
3013+ // /
3014+ // / \return PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property
3015+ // / or if there is multiple of properties from the same category.
29933016pi_result cuda_piSamplerCreate (pi_context context,
2994- const cl_sampler_properties *sampler_properties,
3017+ const pi_sampler_properties *sampler_properties,
29953018 pi_sampler *result_sampler) {
2996- cl::sycl::detail::pi::die (" cuda_piSamplerCreate not implemented" );
2997- return {};
3019+ std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler (context)};
3020+
3021+ bool propSeen[3 ] = {false , false , false };
3022+ for (size_t i = 0 ; sampler_properties[i] != 0 ; i += 2 ) {
3023+ switch (sampler_properties[i]) {
3024+ case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
3025+ if (propSeen[0 ]) {
3026+ return PI_INVALID_VALUE;
3027+ }
3028+ propSeen[0 ] = true ;
3029+ retImplSampl->props_ |= sampler_properties[i + 1 ];
3030+ break ;
3031+ case PI_SAMPLER_PROPERTIES_FILTER_MODE:
3032+ if (propSeen[1 ]) {
3033+ return PI_INVALID_VALUE;
3034+ }
3035+ propSeen[1 ] = true ;
3036+ retImplSampl->props_ |=
3037+ (sampler_properties[i + 1 ] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1 ;
3038+ break ;
3039+ case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
3040+ if (propSeen[2 ]) {
3041+ return PI_INVALID_VALUE;
3042+ }
3043+ propSeen[2 ] = true ;
3044+ retImplSampl->props_ |=
3045+ (sampler_properties[i + 1 ] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2 ;
3046+ break ;
3047+ default :
3048+ return PI_INVALID_VALUE;
3049+ }
3050+ }
3051+
3052+ if (!propSeen[0 ]) {
3053+ retImplSampl->props_ |= CL_TRUE;
3054+ }
3055+ // Default filter mode to CL_FILTER_NEAREST
3056+ if (!propSeen[2 ]) {
3057+ retImplSampl->props_ |= (CL_ADDRESS_CLAMP % CL_ADDRESS_NONE) << 2 ;
3058+ }
3059+
3060+ *result_sampler = retImplSampl.release ();
3061+ return PI_SUCCESS;
29983062}
29993063
3000- // / \TODO Not implemented in CUDA, need untie from OpenCL
3064+ // / Gets information from a PI sampler object
3065+ // /
3066+ // / \param[in] sampler The sampler to get the information from.
3067+ // / \param[in] param_name The name of the information to get.
3068+ // / \param[in] param_value_size The size of the param_value.
3069+ // / \param[out] param_value Set to information value.
3070+ // / \param[out] param_value_size_ret Set to the size of the information value.
3071+ // /
3072+ // / \return PI_SUCCESS on success.
30013073pi_result cuda_piSamplerGetInfo (pi_sampler sampler, cl_sampler_info param_name,
30023074 size_t param_value_size, void *param_value,
30033075 size_t *param_value_size_ret) {
3004- cl::sycl::detail::pi::die (" cuda_piSamplerGetInfo not implemented" );
3076+ assert (sampler != nullptr );
3077+
3078+ switch (param_name) {
3079+ case PI_SAMPLER_INFO_REFERENCE_COUNT:
3080+ return getInfo (param_value_size, param_value, param_value_size_ret,
3081+ sampler->get_reference_count ());
3082+ case PI_SAMPLER_INFO_CONTEXT:
3083+ return getInfo (param_value_size, param_value, param_value_size_ret,
3084+ sampler->context_ );
3085+ case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
3086+ pi_bool norm_coords_prop = static_cast <pi_bool>(sampler->props_ & 0x1 );
3087+ return getInfo (param_value_size, param_value, param_value_size_ret,
3088+ norm_coords_prop);
3089+ }
3090+ case PI_SAMPLER_INFO_FILTER_MODE: {
3091+ pi_sampler_filter_mode filter_prop = static_cast <pi_sampler_filter_mode>(
3092+ ((sampler->props_ >> 1 ) & 0x1 ) + PI_SAMPLER_FILTER_MODE_NEAREST);
3093+ return getInfo (param_value_size, param_value, param_value_size_ret,
3094+ filter_prop);
3095+ }
3096+ case PI_SAMPLER_INFO_ADDRESSING_MODE: {
3097+ pi_sampler_addressing_mode addressing_prop =
3098+ static_cast <pi_sampler_addressing_mode>(
3099+ (sampler->props_ >> 2 ) + PI_SAMPLER_ADDRESSING_MODE_NONE);
3100+ return getInfo (param_value_size, param_value, param_value_size_ret,
3101+ addressing_prop);
3102+ }
3103+ default :
3104+ PI_HANDLE_UNKNOWN_PARAM_NAME (param_name);
3105+ }
30053106 return {};
30063107}
30073108
3008- // / \TODO Not implemented in CUDA, need untie from OpenCL
3109+ // / Retains a PI sampler object, incrementing its reference count.
3110+ // /
3111+ // / \param[in] sampler The sampler to increment the reference count of.
3112+ // /
3113+ // / \return PI_SUCCESS.
30093114pi_result cuda_piSamplerRetain (pi_sampler sampler) {
3010- cl::sycl::detail::pi::die (" cuda_piSamplerRetain not implemented" );
3011- return {};
3115+ assert (sampler != nullptr );
3116+ sampler->increment_reference_count ();
3117+ return PI_SUCCESS;
30123118}
30133119
3014- // / \TODO Not implemented in CUDA, need untie from OpenCL
3120+ // / Releases a PI sampler object, decrementing its reference count. If the
3121+ // / reference count reaches zero, the sampler object is destroyed.
3122+ // /
3123+ // / \param[in] sampler The sampler to decrement the reference count of.
3124+ // /
3125+ // / \return PI_SUCCESS.
30153126pi_result cuda_piSamplerRelease (pi_sampler sampler) {
3016- cl::sycl::detail::pi::die (" cuda_piSamplerRelease not implemented" );
3017- return {};
3127+ assert (sampler != nullptr );
3128+
3129+ // double delete or someone is messing with the ref count.
3130+ // either way, cannot safely proceed.
3131+ cl::sycl::detail::pi::assertion (
3132+ sampler->get_reference_count () != 0 ,
3133+ " Reference count overflow detected in cuda_piSamplerRelease." );
3134+
3135+ // decrement ref count. If it is 0, delete the sampler.
3136+ if (sampler->decrement_reference_count () == 0 ) {
3137+ delete sampler;
3138+ }
3139+
3140+ return PI_SUCCESS;
30183141}
30193142
30203143// / General 3D memory copy operation.
@@ -3934,6 +4057,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
39344057 _PI_CL (piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo)
39354058
39364059 _PI_CL (piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
4060+ _PI_CL (piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)
39374061
39384062#undef _PI_CL
39394063
0 commit comments