From 2c976ebabfe405600c67c599636a6e07d1024cbc Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 1 Oct 2014 10:51:22 -0700 Subject: [PATCH] fabric: Add AV support for symmetric addresses Symmetric address supports allows an app to declare that the number of endpoints per node will be the same, and that the transport addresses of those endpoints will be the same. A further optimization allows the transport addresses to include all addresses within a range. This allows an AV to store the addresses of remote endpoints in a compressed fashion, to greatly reduce the needed memory footprint. Define an FI_SYMMETRIC flag for use with an AV. The flag will indicate that endpoints are distributed equally over each network address. In conjunction with FI_RANGE, an app can insert a large number of endpoint addresses into an AV with a single call. Introduce a new AV insert all that takes as input a node and service parameter, similar to those specified through fi_getinfo. This simplifies things for the app and allows the insertion of hostnames into the AV, for example. Remove the fi_av_map macro and have users call fi_av_insert as defined in the man pages. Signed-off-by: Sean Hefty --- Makefile.am | 1 + include/rdma/fabric.h | 1 + include/rdma/fi_domain.h | 25 +++++++++-- man/fi_av.3 | 96 +++++++++++++++++++++++++++++----------- man/fi_av_insertsvc.3 | 1 + 5 files changed, 93 insertions(+), 31 deletions(-) create mode 100644 man/fi_av_insertsvc.3 diff --git a/Makefile.am b/Makefile.am index b6e0c5293b3..a9d4c587f62 100644 --- a/Makefile.am +++ b/Makefile.am @@ -113,6 +113,7 @@ man_MANS = \ man/fi_av.3 \ man/fi_av_bind.3 \ man/fi_av_insert.3 \ + man/fi_av_insertsvc.3 \ man/fi_av_lookup.3 \ man/fi_av_open.3 \ man/fi_av_remove.3 \ diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 355b9523f8e..cda2121477b 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -88,6 +88,7 @@ uint32_t fi_version(void); #define FI_INJECT (1ULL << 11) #define FI_MULTI_RECV (1ULL << 12) #define FI_SOURCE (1ULL << 13) +#define FI_SYMMETRIC (1ULL << 14) #define FI_READ (1ULL << 16) #define FI_WRITE (1ULL << 17) diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index 79426f0fc2a..068db3ff311 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -47,8 +47,6 @@ extern "C" { * Maps and stores transport/network addresses. */ -#define FI_RANGE (1ULL << 0) - enum fi_av_type { FI_AV_MAP, FI_AV_TABLE @@ -58,6 +56,7 @@ struct fi_av_attr { enum fi_av_type type; int rx_ctx_bits; size_t count; + size_t ep_per_node; const char *name; void *map_addr; uint64_t flags; @@ -67,6 +66,11 @@ struct fi_ops_av { size_t size; int (*insert)(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags); + int (*insertsvc)(struct fid_av *av, const char *node, + const char *service, fi_addr_t *fi_addr, uint64_t flags); + int (*insertsym)(struct fid_av *av, const char *node, size_t nodecnt, + const char *service, size_t svccnt, fi_addr_t *fi_addr, + uint64_t flags); int (*remove)(struct fid_av *av, fi_addr_t *fi_addr, size_t count, uint64_t flags); int (*lookup)(struct fid_av *av, fi_addr_t fi_addr, void *addr, @@ -214,8 +218,21 @@ fi_av_insert(struct fid_av *av, const void *addr, size_t count, { return av->ops->insert(av, addr, count, fi_addr, flags); } -#define fi_av_map(av, addr, count, fi_addr, flags) \ - fi_av_insert(av, addr, count, fi_addr, flags) + +static inline int +fi_av_insertsvc(struct fid_av *av, const char *node, const char *service, + fi_addr_t *fi_addr, uint64_t flags) +{ + return av->ops->insertsvc(av, node, service, fi_addr, flags); +} + +static inline int +fi_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt, + const char *service, size_t svccnt, + fi_addr_t *fi_addr, uint64_t flags) +{ + return av->ops->insertsym(av, node, nodecnt, service, svccnt, fi_addr, flags); +} static inline int fi_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, uint64_t flags) diff --git a/man/fi_av.3 b/man/fi_av.3 index 2131a76a221..977614086f3 100644 --- a/man/fi_av.3 +++ b/man/fi_av.3 @@ -12,7 +12,7 @@ fi_av_bind Associate an address vector with an event queue. .RE .PP -fi_av_insert / fi_av_remove +fi_av_insert / fi_av_insertsvc / fi_av_remove .RS Insert/remove an address into/from the address vector. .RE @@ -41,6 +41,13 @@ Convert an address into a printable string. .BI "int fi_av_insert(struct fid_av *" av ", void *" addr ", size_t " count ", " .BI "fi_addr_t *" fi_addr ", uint64_t " flags ");" .HP +.BI "int fi_av_insertsvc(struct fid_av *" av ", const char *" node ", " +.BI "const char *" service ", fi_addr_t *" fi_addr ", uint64_t " flags ");" +.HP +.BI "int fi_av_insertsym(struct fid_av *" av ", const char *" node ", " +.BI "size_t " nodecnt ", const char *" service ", size_t " svccnt ", " +.BI "fi_addr_t *" fi_addr ", uint64_t " flags ");" +.HP .BI "int fi_av_remove(struct fid_av *" av ", fi_addr_t " fi_addr ", size_t " count ", " .BI "uint64_t " flags ");" .HP @@ -74,7 +81,7 @@ will be written. .br For remove, one or more fabric addresses to remove. .IP "count" -Number of entries referenced by addr and/or fi_addr. +Number of addresses to insert/remove from an AV. .IP "flags" Additional flags to apply to the operation. .SH "DESCRIPTION" @@ -93,6 +100,7 @@ struct fi_av_attr { enum fi_av_type type; /* type of AV */ int rx_ctx_bits; /* address bits to identify rx ctx */ size_t count; /* # entries for AV */ + size_t ep_per_node; /* # endpoints per fabric address */ const char *name; /* system name of AV */ void *map_addr; /* base mmap address */ uint64_t flags; /* operation flags */ @@ -105,16 +113,24 @@ including how it may be accessed. Valid values are: .RS .IP "FI_AV_MAP" Addresses which are inserted into an AV are mapped to a native fabric -address for use by the application. Mapped addresses are usable with -data transfer operations, such that costly translations or lookups can -be avoided. Addresses are stored in the AV using a provider specific +address for use by the application. The use of FI_AV_MAP requires that +an application store the returned fi_addr_t value that is associated with +each inserted address. The advantage of using FI_AV_MAP is that the returned +fi_addr_t value may contain encoded address data, which is immediately +available when processing data transfer requests. This can eliminate +or reduce the number of memory lookups needed when initiating a transfer. +The disadvantage of FI_AV_MAP is the increase in memory usage needed to +store the returned addresses. +Addresses are stored in the AV using a provider specific mechanism, including, but not limited to a tree, hash table, or maintained -on the heap. FI_AV_MAP is often used with address format FI_ADDR. +on the heap. .IP "FI_AV_TABLE" -Addresses which are inserted into an AV are accessible using a simple -index. Conceptually, the AV may be treated as an array of addresses, -though the provider may implement the AV using a variety of mechanisms. -FI_AV_TABLE is often used with address formats FI_AV and FI_ADDR_INDEX. +Addresses which are inserted into an AV of type FI_AV_TABLE are accessible +using a simple index. Conceptually, the AV may be treated as an array +of addresses, though the provider may implement the AV using a variety +of mechanisms. When FI_AV_TABLE is used, the returned fi_addr_t is a +0-based index, with the index for an inserted address the same as its +insertion order into the table. .RE .IP "Receive Context Bits (rx_ctx_bits)" The receive context bits field is only for use with scalable endpoints. It @@ -126,6 +142,14 @@ rx_ctx_cnt for the endpoint. .IP "count" Indicates the expected number of addresses that will be inserted into the AV. The provider uses this to optimize resource allocations. +.IP "ep_per_node" +This field indicates the number of endpoints that will be associated +with a specific fabric, or network, address. If the number of endpoints +per node is unknown, this value should be set to 0. +The provider uses this value to optimize resource allocations. +For example, distributed, parallel applications may set this to the number +of processes allocated per node, times the number of endpoints each process +will open. .IP "name" An optional system name associated with the address vector to create or open. Address vectors may be shared across multiple processes which access @@ -157,6 +181,12 @@ The following flags may be used when opening an AV. .IP "FI_READ" Opens an AV for read-only access. An AV opened for read-only access must be named (name attribute specified), and the AV must exist. +.IP "FI_SYMMETRIC" +Indicates that each node will be associated with the +same number of endpoints, the same transport addresses will be allocated +on each node, and the transport addresses will be sequential. This feature +targets distributed applications on large fabrics and allows for +highly-optimized storage of remote endpoint addressing. .RE .SS "fi_close" The fi_close call is used to release all resources associated with an @@ -173,7 +203,7 @@ The fi_av_insert call inserts one or more addresses into an AV. The number of addresses is specified through the count parameter. The addr parameter references an array of addresses to insert into the AV. Addresses inserted into an address vector must be in the same format as specified -in struct fi_info:info_addr_format for the corresponding domain. A NULL +in struct fi_info:addr_format for the corresponding domain. A NULL value for an address may be used to indicate that an entry should be associated with 'any' address (similar to the IPv4 address of 0.0.0.0). .PP @@ -196,9 +226,35 @@ remain valid until the insertion operation completes. When addresses are inserted into an AV of type FI_AV_TABLE, the returned fi_addr values will be simple indices corresponding to the entry into the table where the address was inserted. Addresses are indexed in order of their insertion. +.SS "fi_av_insertsvc" +The fi_av_insertsvc call behaves similar to fi_av_insert, but allows the +application to specify the node and service names, similar to the +fi_getinfo inputs, rather than an encoded address. The node and service +parameters are defined the same as fi_getinfo(3). Node should be a string +that corresponds to a hostname or network address. The service string +corresponds to a textual representation of a transport address. +.SS "fi_av_insertsym" +fi_av_insertsym performs a symmetric insert that inserts a sequential +range of nodes and/or service addresses into an AV. The svccnt parameter +indicates the number of transport (endpoint) addresses to insert into the AV +for each node address, with the service parameter specifying the starting +transport address. Inserted transport addresses will be of the range +{service, service + svccnt - 1}, inclusive. All service addresses for a +node will be inserted before the next node is inserted. .PP -The FI_RANGE flag may be used with fi_av_insert to indicate that a range -of addresses should be inserted. See the flags discussion below. +The nodecnt parameter indicates the number of node (network) addresses to +insert into the AV, with the node parameter specifying the starting +node address. Inserted node addresses will be of the range +{node, node + nodecnt - 1}, inclusive. If node is a non-numeric string, +such as a hostname, it must contain a numeric suffix if nodecnt > 1. +.PP +As an example, if node = "10.1.1.1", nodecnt = 2, service = "5000", and +svccnt = 2, the following addresses will be inserted into the AV in the +order shown: 10.1.1.1:5000, 10.1.1.1:5001, 10.1.1.2:5000, 10.1.1.1:5001. +If node were replaced by the hostname "host10", the addresses would be: +host10:5000, host10:5001, host11:5000, host11:5001. +.PP +The total number of inserted addresses will be nodecnt x svccnt. .SS "fi_av_remove" fi_av_remove removes a set of addresses from an address vector. All resources associated with the indicated addresses are released, and @@ -238,20 +294,6 @@ referenced by buf. On output, the actual size needed to write the entire string will be returned. This size may be larger than the input len. If the provided buffer is too small, the results will be truncated. fi_av_straddr returns a pointer to buf. -.SH "FLAGS" -The following flags are usable with fi_av_insert. -.IP "FI_RANGE" -FI_RANGE allows for multiple addresses to be inserted into an AV by -specifying only the starting and ending addresses, inclusive, for a -range of given addresses. When multiple addresses are inserted into -the AV with FI_RANGE enabled, the provided addresses are processed in pairs. -Each pair indicates the first and last address of a range of addresses that -the AV should store. If an odd number of addresses are inserted into -the AV, the final address is treated as a single address. -.sp -When FI_RANGE is in use, the AV will return one mapped address for -every address that is inserted, including those address specified -indirectly as part of a given range. .SH "NOTES" Providers may implement AV's using a variety of mechanisms. Specifically, a provider may begin resolving inserted addresses as soon as they have diff --git a/man/fi_av_insertsvc.3 b/man/fi_av_insertsvc.3 new file mode 100644 index 00000000000..cea770b2a13 --- /dev/null +++ b/man/fi_av_insertsvc.3 @@ -0,0 +1 @@ +.so man3/fi_av.3