-
Notifications
You must be signed in to change notification settings - Fork 493
UCP/DEVICE: Make memh and local_addr optional for counter elements #10945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
06c380a
a494c52
60a1131
6a3eb78
205b01e
589b0b4
4573621
b0eff73
10646cd
534057b
dc11fd6
c718cec
2288f1b
b1f2ad0
5c3b023
06a662e
777d06a
62e6732
8219c47
025a1a0
e46a302
c70739c
af75f5c
c967767
c444251
a07231c
1f0d285
0d30594
e9c1382
2bbfa15
438346f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,9 +36,9 @@ BEGIN_C_DECLS | |
| enum ucp_device_mem_list_elem_field { | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_MEMH = UCS_BIT(0), /**< Source memory handle */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_RKEY = UCS_BIT(1), /**< Unpacked remote memory key */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_LOCAL_ADDR = UCS_BIT(2), /**< Local address */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_REMOTE_ADDR = UCS_BIT(3), /**< Remote address */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_LENGTH = UCS_BIT(4) /**< Length of the local buffer in bytes */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_LOCAL_ADDR = UCS_BIT(2), /**< Local address (optional for counter elements) */ | ||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_REMOTE_ADDR = UCS_BIT(3), /**< Remote address */ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw, it is also always required, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not always, check remote_offset in partial, users can to pass null to remote address and then the addr aa remote_offset |
||
| UCP_DEVICE_MEM_LIST_ELEM_FIELD_LENGTH = UCS_BIT(4) /**< Length of the local buffer in bytes (optional for counter elements) */ | ||
ofirfarjun7 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
|
|
||
|
|
@@ -48,6 +48,14 @@ enum ucp_device_mem_list_elem_field { | |
| * | ||
| * This describes a pair of local and remote memory for which a memory operation | ||
| * can later be performed multiple times, possibly with varying memory offsets. | ||
| * | ||
| * @note The @a memh and @a local_addr fields are optional for elements | ||
| * that are only used for remote addressing (e.g., counter elements): | ||
| * - @ref ucp_device_counter_inc: All elements may omit these fields | ||
| * - @ref ucp_device_put_multi: The last element (counter) may omit these | ||
| * fields | ||
| * - @ref ucp_device_put_multi_partial: The element at counter_index may | ||
| * omit these fields if not also in mem_list_indices | ||
| */ | ||
| typedef struct ucp_device_mem_list_elem { | ||
| /** | ||
|
|
@@ -60,11 +68,13 @@ typedef struct ucp_device_mem_list_elem { | |
|
|
||
| /** | ||
| * Local memory registration handle. | ||
| * Optional for elements used only for remote addressing (e.g., counters). | ||
| */ | ||
| ucp_mem_h memh; | ||
|
|
||
| /** | ||
| * Local memory address for the device transfer operations. | ||
| * Optional for elements used only for remote addressing (e.g., counters). | ||
| */ | ||
| void* local_addr; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,9 @@ KHASH_IMPL(ucp_device_handle_allocs, ucp_device_mem_list_handle_h, | |
| static khash_t(ucp_device_handle_allocs) ucp_device_handle_hash; | ||
| static ucs_spinlock_t ucp_device_handle_hash_lock; | ||
|
|
||
| /* Size of temporary allocation for local sys_dev detection */ | ||
| #define UCP_DEVICE_LOCAL_SYS_DEV_DETECT_SIZE 64 | ||
|
|
||
|
|
||
| void ucp_device_init(void) | ||
| { | ||
|
|
@@ -121,34 +124,46 @@ ucp_device_mem_list_params_check(const ucp_device_mem_list_params_t *params, | |
| RKEY, NULL); | ||
|
|
||
| /* TODO: Delegate most of checks below to proto selection */ | ||
| if ((rkey == NULL) || (memh == NULL)) { | ||
| ucs_error("element[%lu] rkey=%p, memh=%p", i, rkey, memh); | ||
| if (rkey == NULL) { | ||
| ucs_error("element[%lu] rkey is NULL", i); | ||
| return UCS_ERR_INVALID_PARAM; | ||
| } | ||
|
|
||
| if (i == 0) { | ||
| *local_sys_dev = memh->sys_dev; | ||
| *local_md_map = memh->md_map; | ||
| *mem_type = memh->mem_type; | ||
| if (memh != NULL) { | ||
| *local_sys_dev = memh->sys_dev; | ||
| *local_md_map = memh->md_map; | ||
| *mem_type = memh->mem_type; | ||
rakhmets marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } else { | ||
| *mem_type = rkey->mem_type; | ||
ofirfarjun7 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| *local_md_map = UINT64_MAX; | ||
| } | ||
| *rkey_cfg_index = rkey->cfg_index; | ||
| if (*rkey_cfg_index == UCP_WORKER_CFG_INDEX_NULL) { | ||
| ucs_debug("invalid first rkey: cfg_index=%d", *rkey_cfg_index); | ||
| return UCS_ERR_INVALID_PARAM; | ||
| } | ||
| } else { | ||
| *local_md_map &= memh->md_map; | ||
| if (rkey->cfg_index != *rkey_cfg_index) { | ||
| ucs_debug("mismatched rkey config index: " | ||
| "ucp_rkey[%lu]->cfg_index=%u cfg_index=%u", | ||
| i, rkey->cfg_index, *rkey_cfg_index); | ||
| return UCS_ERR_UNSUPPORTED; | ||
| } | ||
|
|
||
| if (memh->sys_dev != *local_sys_dev) { | ||
| ucs_debug("mismatched local sys_dev: ucp_memh[%zu].sys_dev=%u " | ||
| "first_sys_dev=%u", | ||
| i, memh->sys_dev, *local_sys_dev); | ||
| return UCS_ERR_UNSUPPORTED; | ||
| if (memh != NULL) { | ||
| if (*local_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) { | ||
| *local_sys_dev = memh->sys_dev; | ||
| *local_md_map = memh->md_map; | ||
ofirfarjun7 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } else { | ||
| *local_md_map &= memh->md_map; | ||
| if (memh->sys_dev != *local_sys_dev) { | ||
ofirfarjun7 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ucs_debug("mismatched local sys_dev: ucp_memh[%zu].sys_dev=%u " | ||
| "first_sys_dev=%u", | ||
| i, memh->sys_dev, *local_sys_dev); | ||
| return UCS_ERR_UNSUPPORTED; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -230,6 +245,42 @@ static void ucp_device_mem_list_lane_lookup( | |
| } | ||
| } | ||
|
|
||
| static ucs_status_t | ||
| ucp_device_detect_local_sys_dev(ucp_context_h context, | ||
| ucs_memory_type_t mem_type, | ||
| ucs_sys_device_t *local_sys_dev_p) | ||
| { | ||
| ucs_memory_info_t mem_info; | ||
| uct_allocated_memory_t detect_mem; | ||
| ucs_status_t status; | ||
|
|
||
| status = ucp_mem_do_alloc(context, NULL, | ||
| UCP_DEVICE_LOCAL_SYS_DEV_DETECT_SIZE, | ||
| UCT_MD_MEM_ACCESS_LOCAL_READ | | ||
| UCT_MD_MEM_ACCESS_LOCAL_WRITE, | ||
| mem_type, UCS_SYS_DEVICE_ID_UNKNOWN, | ||
| "local_sys_dev_detect", &detect_mem); | ||
| if (status != UCS_OK) { | ||
| ucs_error("failed to allocate memory for sys_dev detection: %s", | ||
| ucs_status_string(status)); | ||
| return status; | ||
| } | ||
|
|
||
| ucp_memory_detect_internal(context, detect_mem.address, detect_mem.length, | ||
| &mem_info); | ||
| *local_sys_dev_p = mem_info.sys_dev; | ||
|
|
||
| uct_mem_free(&detect_mem); | ||
|
|
||
| if (*local_sys_dev_p == UCS_SYS_DEVICE_ID_UNKNOWN) { | ||
| ucs_error("detected unknown local_sys_dev"); | ||
| return UCS_ERR_UNSUPPORTED; | ||
| } | ||
|
|
||
| ucs_trace("detected local_sys_dev=%u", *local_sys_dev_p); | ||
| return UCS_OK; | ||
| } | ||
|
|
||
| static ucs_status_t ucp_device_mem_list_create_handle( | ||
| ucp_ep_h ep, ucs_sys_device_t local_sys_dev, | ||
| const ucp_device_mem_list_params_t *params, | ||
|
|
@@ -361,13 +412,17 @@ static ucs_status_t ucp_device_mem_list_create_handle( | |
| ucp_ep_get_rsc_index(ep, lanes[i])); | ||
| ucp_element = params->elements; | ||
| for (j = 0; j < params->num_elements; j++) { | ||
| /* Local registration */ | ||
| uct_memh = ucp_element->memh->uct[local_md_index]; | ||
| ucs_assertv((ucp_element->memh->md_map & UCS_BIT(local_md_index)) != | ||
| 0, | ||
| "uct_memh=%p md_map=0x%lx local_md_index=%u", uct_memh, | ||
| ucp_element->memh->md_map, local_md_index); | ||
| ucs_assert(uct_memh != UCT_MEM_HANDLE_NULL); | ||
| if (ucp_element->memh != NULL) { | ||
|
||
| /* Local registration */ | ||
| uct_memh = ucp_element->memh->uct[local_md_index]; | ||
| ucs_assertv( | ||
| (ucp_element->memh->md_map & UCS_BIT(local_md_index)) != 0, | ||
| "uct_memh=%p md_map=0x%lx local_md_index=%u", uct_memh, | ||
| ucp_element->memh->md_map, local_md_index); | ||
| ucs_assert(uct_memh != UCT_MEM_HANDLE_NULL); | ||
| } else { | ||
| uct_memh = UCT_MEM_HANDLE_NULL; | ||
| } | ||
|
|
||
| /* Remote registration */ | ||
| rkey_index = | ||
|
|
@@ -430,6 +485,15 @@ ucp_device_mem_list_create(ucp_ep_h ep, | |
| return status; | ||
| } | ||
|
|
||
| if (local_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) { | ||
ofirfarjun7 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| status = ucp_device_detect_local_sys_dev(ep->worker->context, mem_type, | ||
| &local_sys_dev); | ||
| if (status != UCS_OK) { | ||
| ucs_error("failed to detect local_sys_dev: %s", ucs_status_string(status)); | ||
| return status; | ||
| } | ||
| } | ||
|
|
||
| /* Perform pseudo lane selection without size */ | ||
| rkey_config = &ep->worker->rkey_config[rkey_cfg_index]; | ||
| ep_config = ucp_worker_ep_config(ep->worker, rkey_config->key.ep_cfg_index); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.