diff --git a/oshmem/mca/memheap/base/base.h b/oshmem/mca/memheap/base/base.h index 92772ecb653..3dc5da77535 100644 --- a/oshmem/mca/memheap/base/base.h +++ b/oshmem/mca/memheap/base/base.h @@ -40,8 +40,8 @@ OSHMEM_DECLSPEC int mca_memheap_base_select(void); extern int mca_memheap_base_already_opened; extern int mca_memheap_base_key_exchange; +extern int mca_memheap_num_segments_warn; -#define MCA_MEMHEAP_MAX_SEGMENTS 32 #define HEAP_SEG_INDEX 0 #define MCA_MEMHEAP_SEG_COUNT 2 @@ -54,8 +54,9 @@ typedef struct mca_memheap_base_config { typedef struct mca_memheap_map { - map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */ + map_segment_t *mem_segs; int n_segments; + int capacity; int num_transports; } mca_memheap_map_t; @@ -70,6 +71,7 @@ int mca_memheap_base_reg(mca_memheap_map_t *); int mca_memheap_base_dereg(mca_memheap_map_t *); int memheap_oob_init(mca_memheap_map_t *); void memheap_oob_destruct(void); +map_segment_t *mca_memheap_base_allocate_segment(mca_memheap_map_t *map); OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va); OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va, diff --git a/oshmem/mca/memheap/base/memheap_base_alloc.c b/oshmem/mca/memheap/base/memheap_base_alloc.c index 3354a187def..b90415bbf80 100644 --- a/oshmem/mca/memheap/base/memheap_base_alloc.c +++ b/oshmem/mca/memheap/base/memheap_base_alloc.c @@ -18,6 +18,7 @@ #include "oshmem/mca/memheap/memheap.h" #include "oshmem/mca/memheap/base/base.h" #include "ompi/util/timings.h" +#include "opal/util/minmax.h" int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size, long hint, @@ -35,7 +36,12 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size, long hint, assert(HEAP_SEG_INDEX < map->n_segments); } - map_segment_t *s = &map->mem_segs[map->n_segments]; + map_segment_t *s = mca_memheap_base_allocate_segment(map); + if (NULL == s) { + MEMHEAP_ERROR("failed to allocate segment"); + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + seg_filename = oshmem_get_unique_file_name(oshmem_my_proc_id()); OPAL_TIMING_ENV_NEXT(timing, "oshmem_get_unique_file_name()"); @@ -72,6 +78,11 @@ void mca_memheap_base_alloc_exit(mca_memheap_map_t *map) mca_sshmem_unlink(s); } } + + free(map->mem_segs); + map->n_segments = 0; + map->capacity = 0; + map->mem_segs = NULL; } int mca_memheap_alloc_with_hint(size_t size, long hint, void** ptr) @@ -90,3 +101,33 @@ int mca_memheap_alloc_with_hint(size_t size, long hint, void** ptr) return MCA_MEMHEAP_CALL(alloc(size, ptr)); } + +map_segment_t *mca_memheap_base_allocate_segment(mca_memheap_map_t *map) +{ + static int warned = 0; + map_segment_t *segments; + int capacity; + + assert(map->n_segments <= map->capacity); + + if (!warned && (map->n_segments > mca_memheap_num_segments_warn)) { + MEMHEAP_WARN("too many segments are registered: %d. This may cause " + "performance degradation. Pls try adding --mca " + "memheap_base_max_segments to mpirun/oshrun " + "command line to suppress this message", map->n_segments); + warned = 1; + } + + if (map->n_segments == map->capacity) { + capacity = opal_max(map->capacity * 2, 4); + segments = realloc(map->mem_segs, capacity * sizeof(*map->mem_segs)); + if (segments == NULL) { + return NULL; + } + + map->capacity = capacity; + map->mem_segs = segments; + } + + return &map->mem_segs[map->n_segments]; +} diff --git a/oshmem/mca/memheap/base/memheap_base_frame.c b/oshmem/mca/memheap/base/memheap_base_frame.c index 23ebf0860db..53a71b27a9e 100644 --- a/oshmem/mca/memheap/base/memheap_base_frame.c +++ b/oshmem/mca/memheap/base/memheap_base_frame.c @@ -36,6 +36,7 @@ int mca_memheap_base_key_exchange = 1; opal_list_t mca_memheap_base_components_opened = {{0}}; int mca_memheap_base_already_opened = 0; mca_memheap_map_t mca_memheap_base_map = {{{{0}}}}; +int mca_memheap_num_segments_warn = 32; static int mca_memheap_base_register(mca_base_register_flag_t flags) { @@ -59,6 +60,14 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_LOCAL, &mca_memheap_base_config.device_nic_mem_seg_size); + mca_base_var_register("oshmem", "memheap", "base", "max_segments", + "Display a warning if the number of segments of the " + "shared memheap exceeds this value", + MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &mca_memheap_num_segments_warn); + return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/memheap/base/memheap_base_mkey.c b/oshmem/mca/memheap/base/memheap_base_mkey.c index a765285a93f..a04389e5c48 100644 --- a/oshmem/mca/memheap/base/memheap_base_mkey.c +++ b/oshmem/mca/memheap/base/memheap_base_mkey.c @@ -766,10 +766,6 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno) { map_segment_t *s; - if (segno >= MCA_MEMHEAP_MAX_SEGMENTS) { - return; - } - s = memheap_find_seg(segno); assert(NULL != s); seg->super.va_base = s->super.va_base; diff --git a/oshmem/mca/memheap/base/memheap_base_static.c b/oshmem/mca/memheap/base/memheap_base_static.c index d59b710088c..99f613340a9 100644 --- a/oshmem/mca/memheap/base/memheap_base_static.c +++ b/oshmem/mca/memheap/base/memheap_base_static.c @@ -15,13 +15,22 @@ #include "oshmem/mca/memheap/memheap.h" #include "oshmem/mca/memheap/base/base.h" #include "oshmem/util/oshmem_util.h" +#include "opal/util/minmax.h" #include #include #include #include -struct map_segment_desc { +static int _check_perms(const char *perm); +static int _check_address(void *start, void **end); +static int _check_pathname(uint64_t inode, const char *pathname); + +int mca_memheap_base_static_init(mca_memheap_map_t *map) +{ + /* read and parse segments from /proc/self/maps */ + int ret = OSHMEM_SUCCESS; + uint64_t total_mem = 0; void* start; void* end; char perms[8]; @@ -29,56 +38,80 @@ struct map_segment_desc { char dev[8]; uint64_t inode; char pathname[MAXPATHLEN]; -}; - -typedef struct memheap_static_context { - struct { - void* start; - void* end; - } mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; - int n_segments; -} memheap_static_context_t; + FILE *fp; + char line[1024]; + map_segment_t *s; -static memheap_static_context_t memheap_context; + assert(map); + assert(HEAP_SEG_INDEX < map->n_segments); -static int _load_segments(void); -static int _check_perms(struct map_segment_desc *seg); -static int _check_address(struct map_segment_desc *seg); -static int _check_pathname(struct map_segment_desc *seg); + /* FIXME!!! Linux specific code */ + fp = fopen("/proc/self/maps", "r"); + if (NULL == fp) { + MEMHEAP_ERROR("Failed to open /proc/self/maps"); + return OSHMEM_ERROR; + } -int mca_memheap_base_static_init(mca_memheap_map_t *map) -{ - /* read and parse segments from /proc/self/maps */ - int ret = OSHMEM_SUCCESS; + while (NULL != fgets(line, sizeof(line), fp)) { + if (3 > sscanf(line, + "%llx-%llx %s %llx %s %llx %s", + (unsigned long long *) &start, + (unsigned long long *) &end, + perms, + (unsigned long long *) &offset, + dev, + (unsigned long long *) &inode, + pathname)) { + MEMHEAP_ERROR("Failed to sscanf /proc/self/maps output %s", line); + ret = OSHMEM_ERROR; + goto out; + } - assert(map); - assert(HEAP_SEG_INDEX < map->n_segments); + if (OSHMEM_ERROR == _check_address(start, &end)) + continue; - ret = _load_segments(); + if (OSHMEM_ERROR == _check_pathname(inode, pathname)) + continue; - if (OSHMEM_SUCCESS == ret) { - int i; - size_t total_mem; + if (OSHMEM_ERROR == _check_perms(perms)) + continue; - for (i = 0, total_mem = 0; i < memheap_context.n_segments; i++) { - map_segment_t *s = &map->mem_segs[map->n_segments]; + MEMHEAP_VERBOSE(5, "add: %s", line); - memset(s, 0, sizeof(*s)); - MAP_SEGMENT_RESET_FLAGS(s); - s->seg_id = MAP_SEGMENT_SHM_INVALID; - s->super.va_base = memheap_context.mem_segs[i].start; - s->super.va_end = memheap_context.mem_segs[i].end; + if ((map->n_segments > 0) && + (start == map->mem_segs[map->n_segments - 1].super.va_end)) { + s = &map->mem_segs[map->n_segments - 1]; + MEMHEAP_VERBOSE(5, "Coalescing segment"); + s->super.va_end = end; s->seg_size = ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base); - s->type = MAP_SEGMENT_STATIC; - map->n_segments++; + continue; + } - total_mem += ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base); + s = mca_memheap_base_allocate_segment(map); + if (NULL == s) { + MEMHEAP_ERROR("failed to allocate segment"); + ret = OSHMEM_ERR_OUT_OF_RESOURCE; + goto out; } - MEMHEAP_VERBOSE(1, - "Memheap static memory: %llu byte(s), %d segments", - (unsigned long long)total_mem, map->n_segments); + + memset(s, 0, sizeof(*s)); + MAP_SEGMENT_RESET_FLAGS(s); + s->seg_id = MAP_SEGMENT_SHM_INVALID; + s->super.va_base = start; + s->super.va_end = end; + s->seg_size = ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base); + s->type = MAP_SEGMENT_STATIC; + map->n_segments++; + + total_mem += ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base); } + MEMHEAP_VERBOSE(1, + "Memheap static memory: %llu byte(s), %d segments", + total_mem, map->n_segments); + +out: + fclose(fp); return ret; } @@ -87,20 +120,20 @@ void mca_memheap_base_static_exit(mca_memheap_map_t *map) assert(map); } -static int _check_perms(struct map_segment_desc *seg) +static int _check_perms(const char *perms) { - if (!strcmp(seg->perms, "rw-p") || !strcmp(seg->perms, "rwxp")) + if (!strcmp(perms, "rw-p") || !strcmp(perms, "rwxp")) return OSHMEM_SUCCESS; return OSHMEM_ERROR; } -static int _check_address(struct map_segment_desc *seg) +static int _check_address(void *start, void **end) { /* FIXME Linux specific code */ #ifdef __linux__ extern unsigned _end; - void* data_end = &_end; + uintptr_t data_end = (uintptr_t)&_end; /** * SGI shmem only supports globals&static in main program. @@ -111,24 +144,24 @@ static int _check_address(struct map_segment_desc *seg) * FIXME: make sure we do not register symmetric heap twice * if we decide to allow shared objects */ - if ((uintptr_t)seg->start > (uintptr_t)data_end) { + if ((uintptr_t)start > data_end) { MEMHEAP_VERBOSE(100, "skip segment: data _end < segment start (%p < %p)", - data_end, seg->start); + data_end, start); return OSHMEM_ERROR; } - if ((uintptr_t)seg->end > (uintptr_t)data_end) { + if ((uintptr_t)*end > data_end) { MEMHEAP_VERBOSE(100, "adjust segment: data _end < segment end (%p < %p", - data_end, seg->end); - seg->end = data_end; + data_end, *end); + *end = (void*)data_end; } #endif return OSHMEM_SUCCESS; } -static int _check_pathname(struct map_segment_desc *seg) +static int _check_pathname(uint64_t inode, const char *pathname) { static const char *proc_self_exe = "/proc/self/exe"; static int warned = 0; @@ -136,7 +169,7 @@ static int _check_pathname(struct map_segment_desc *seg) char module_path[PATH_MAX]; char *path; - if (0 == seg->inode) { + if (0 == inode) { /* segment is not mapped to file, allow sharing it */ return OSHMEM_SUCCESS; } @@ -153,7 +186,7 @@ static int _check_pathname(struct map_segment_desc *seg) } /* for file-mapped segments allow segments from start process only */ - path = realpath(seg->pathname, module_path); + path = realpath(pathname, module_path); if (NULL == path) { return OSHMEM_ERROR; } @@ -205,66 +238,3 @@ static int _check_pathname(struct map_segment_desc *seg) return OSHMEM_SUCCESS; } -static int _load_segments(void) -{ - FILE *fp; - char line[1024]; - struct map_segment_desc seg; - - memheap_context.n_segments = 0; - /* FIXME!!! Linux specific code */ - fp = fopen("/proc/self/maps", "r"); - if (NULL == fp) { - MEMHEAP_ERROR("Failed to open /proc/self/maps"); - return OSHMEM_ERROR; - } - - while (NULL != fgets(line, sizeof(line), fp)) { - memset(&seg, 0, sizeof(seg)); - if (3 > sscanf(line, - "%llx-%llx %s %llx %s %llx %s", - (unsigned long long *) &seg.start, - (unsigned long long *) &seg.end, - seg.perms, - (unsigned long long *) &seg.offset, - seg.dev, - (unsigned long long *) &seg.inode, - seg.pathname)) { - MEMHEAP_ERROR("Failed to sscanf /proc/self/maps output %s", line); - fclose(fp); - return OSHMEM_ERROR; - } - - if (OSHMEM_ERROR == _check_address(&seg)) - continue; - - if (OSHMEM_ERROR == _check_pathname(&seg)) - continue; - - if (OSHMEM_ERROR == _check_perms(&seg)) - continue; - - MEMHEAP_VERBOSE(5, "add: %s", line); - if (MCA_MEMHEAP_MAX_SEGMENTS <= memheap_context.n_segments) { - MEMHEAP_ERROR("too many segments (max = %d): skip %s", - MCA_MEMHEAP_MAX_SEGMENTS, line); - continue; - } - if (memheap_context.n_segments > 0 - && seg.start - == memheap_context.mem_segs[memheap_context.n_segments - - 1].end) { - MEMHEAP_VERBOSE(5, "Coalescing segment"); - memheap_context.mem_segs[memheap_context.n_segments - 1].end = - seg.end; - } else { - memheap_context.mem_segs[memheap_context.n_segments].start = - seg.start; - memheap_context.mem_segs[memheap_context.n_segments].end = seg.end; - memheap_context.n_segments++; - } - } - - fclose(fp); - return OSHMEM_SUCCESS; -} diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index fa9bedf03d7..29069e87a05 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -151,11 +151,6 @@ int mca_spml_ucx_peer_mkey_cache_add(ucp_peer_t *ucp_peer, int index) /* Allocate an array to hold the pointers to the ucx_cached_mkey */ if (index >= (int)ucp_peer->mkeys_cnt){ int old_size = ucp_peer->mkeys_cnt; - if (MCA_MEMHEAP_MAX_SEGMENTS <= (index + 1)) { - SPML_UCX_ERROR("Failed to get new mkey for segment: max number (%d) of segment descriptor is exhausted", - MCA_MEMHEAP_MAX_SEGMENTS); - return OSHMEM_ERROR; - } ucp_peer->mkeys_cnt = index + 1; ucp_peer->mkeys = realloc(ucp_peer->mkeys, sizeof(ucp_peer->mkeys[0]) * ucp_peer->mkeys_cnt); if (NULL == ucp_peer->mkeys) { diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index 35263b2d630..a93ff3756a3 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -285,10 +285,9 @@ static inline int mca_spml_ucx_peer_mkey_get(ucp_peer_t *ucp_peer, int index, spml_ucx_cached_mkey_t **out_rmkey) { *out_rmkey = NULL; - if (OPAL_UNLIKELY((index >= (int)ucp_peer->mkeys_cnt) || - (MCA_MEMHEAP_MAX_SEGMENTS <= index) || (0 > index))) { - SPML_UCX_ERROR("Failed to get mkey for segment: bad index = %d, MAX = %d, cached mkeys count: %zu", - index, MCA_MEMHEAP_MAX_SEGMENTS, ucp_peer->mkeys_cnt); + if (OPAL_UNLIKELY((index >= (int)ucp_peer->mkeys_cnt) || (0 > index))) { + SPML_UCX_ERROR("Failed to get mkey for segment: bad index = %d, cached mkeys count: %zu", + index, ucp_peer->mkeys_cnt); return OSHMEM_ERR_BAD_PARAM; } *out_rmkey = ucp_peer->mkeys[index];