diff --git a/include/freebsd/osd.h b/include/freebsd/osd.h index 81414d1d59c..fe1193cc9e1 100644 --- a/include/freebsd/osd.h +++ b/include/freebsd/osd.h @@ -76,6 +76,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa) return 0; } +static inline int ofi_ifaddr_get_mtu(struct ifaddrs *ifa) +{ + return -1; +} + static inline ssize_t ofi_process_vm_readv(pid_t pid, const struct iovec *local_iov, unsigned long liovcnt, @@ -185,5 +190,3 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags) } #endif /* _FREEBSD_OSD_H_ */ - - diff --git a/include/linux/osd.h b/include/linux/osd.h index 5b8d0fcd4ee..3053c5ae153 100644 --- a/include/linux/osd.h +++ b/include/linux/osd.h @@ -92,6 +92,8 @@ static inline int ofi_hugepage_enabled(void) size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa); +int ofi_ifaddr_get_mtu(struct ifaddrs *ifa); + #ifndef __NR_process_vm_readv # define __NR_process_vm_readv 310 #endif diff --git a/include/osx/osd.h b/include/osx/osd.h index 2f7494af6e1..98f674f503e 100644 --- a/include/osx/osd.h +++ b/include/osx/osd.h @@ -99,6 +99,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa) return 0; } +static inline int ofi_ifaddr_get_mtu(struct ifaddrs *ifa) +{ + return -1; +} + static inline int ofi_hugepage_enabled(void) { return 0; diff --git a/include/windows/ifaddrs.h b/include/windows/ifaddrs.h index 02e657cb480..6aad728995d 100644 --- a/include/windows/ifaddrs.h +++ b/include/windows/ifaddrs.h @@ -34,8 +34,8 @@ struct ifaddrs { char ad_name[16]; size_t speed; + int mtu; }; int getifaddrs(struct ifaddrs **ifap); void freeifaddrs(struct ifaddrs *ifa); - diff --git a/include/windows/osd.h b/include/windows/osd.h index d9698bd9724..e284c66ec08 100644 --- a/include/windows/osd.h +++ b/include/windows/osd.h @@ -1006,6 +1006,8 @@ static inline int ofi_is_loopback_addr(struct sockaddr *addr) { size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa); +int ofi_ifaddr_get_mtu(struct ifaddrs *ifa); + #define file2unix_time 10000000i64 #define win2unix_epoch 116444736000000000i64 #define CLOCK_REALTIME 0 diff --git a/man/fi_udp.7.md b/man/fi_udp.7.md index cbe0a371012..27c518be42f 100644 --- a/man/fi_udp.7.md +++ b/man/fi_udp.7.md @@ -41,9 +41,8 @@ receiving datagram messages over an unreliable endpoint. # LIMITATIONS -The UDP provider has hard-coded maximums for supported queue sizes and data -transfers. These values are reflected in the related fabric attribute -structures +The UDP provider has a hard-coded maximum for supported queue sizes. +This value is reflected in the related fabric attribute structures. EPs must be bound to both RX and TX CQs. @@ -53,7 +52,10 @@ No support for counters. # RUNTIME PARAMETERS -No runtime parameters are currently defined. +The UDP provider checks for the following environment variables - + +*FI_UDP_IFACE* +: An string value that specifies the name of the interface. # SEE ALSO diff --git a/prov/udp/src/udpx.h b/prov/udp/src/udpx.h index a52ff392b99..f0767063097 100644 --- a/prov/udp/src/udpx.h +++ b/prov/udp/src/udpx.h @@ -63,22 +63,22 @@ #ifndef _UDPX_H_ #define _UDPX_H_ - extern struct fi_provider udpx_prov; extern struct util_prov udpx_util_prov; extern struct fi_info udpx_info; - int udpx_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context); int udpx_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **dom, void *context); int udpx_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, struct fid_eq **eq, void *context); - +int udpx_util_prov_init(uint32_t version, const char *node, const char *service, + uint64_t flags); #define UDPX_FLAG_MULTI_RECV 1 #define UDPX_IOV_LIMIT 4 +#define UDPX_MTU 1500 struct udpx_ep_entry { void *context; @@ -88,6 +88,8 @@ struct udpx_ep_entry { uint8_t resv[sizeof(size_t) - 2]; }; +#define UDPX_MAX_MSG_SIZE(mtu) ((mtu) - 28) + OFI_DECLARE_CIRQUE(struct udpx_ep_entry, udpx_rx_cirq); struct udpx_ep; diff --git a/prov/udp/src/udpx_attr.c b/prov/udp/src/udpx_attr.c index 7737efe86fc..6727f49a76b 100644 --- a/prov/udp/src/udpx_attr.c +++ b/prov/udp/src/udpx_attr.c @@ -31,6 +31,7 @@ */ #include "udpx.h" +#include "ofi_osd.h" #define UDPX_TX_CAPS (OFI_TX_MSG_CAPS | FI_MULTICAST) #define UDPX_RX_CAPS (FI_SOURCE | OFI_RX_MSG_CAPS) @@ -38,7 +39,7 @@ struct fi_tx_attr udpx_tx_attr = { .caps = UDPX_TX_CAPS, - .inject_size = 1472, + .inject_size = UDPX_MAX_MSG_SIZE(UDPX_MTU), .size = 1024, .iov_limit = UDPX_IOV_LIMIT }; @@ -53,7 +54,7 @@ struct fi_ep_attr udpx_ep_attr = { .type = FI_EP_DGRAM, .protocol = FI_PROTO_UDP, .protocol_version = 0, - .max_msg_size = 1472, + .max_msg_size = UDPX_MAX_MSG_SIZE(UDPX_MTU), .tx_ctx_cnt = 1, .rx_ctx_cnt = 1 }; @@ -93,6 +94,39 @@ struct fi_info udpx_info = { struct util_prov udpx_util_prov = { .prov = &udpx_prov, - .info = &udpx_info, - .flags = 0, + .info = NULL, + .flags = 0, }; + +static int detect_mtu(const struct fi_info* info) { + + struct ifaddrs ifaddrs; + ifaddrs.ifa_next = NULL; + ifaddrs.ifa_flags = 0; + ifaddrs.ifa_netmask = NULL; + ifaddrs.ifa_name = info->domain_attr->name; + ifaddrs.ifa_addr = info->src_addr; + return ofi_ifaddr_get_mtu(&ifaddrs); +} + +int udpx_util_prov_init(uint32_t version, const char *node, const char *service, + uint64_t flags) { + + struct fi_info* cur; + struct fi_info* info; + int max_msg_size; + if (udpx_util_prov.info == NULL) { + udpx_util_prov.info = &udpx_info; + info = fi_allocinfo(); + ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags, + NULL, &info); + for (cur = info; cur; cur = cur->next) { + max_msg_size = UDPX_MAX_MSG_SIZE(detect_mtu(cur)); + if (max_msg_size > 0) { + cur->tx_attr->inject_size = max_msg_size; + cur->ep_attr->max_msg_size = max_msg_size; + } + } + udpx_util_prov.info = info; + } +} diff --git a/prov/udp/src/udpx_init.c b/prov/udp/src/udpx_init.c index 85ad70f749b..e711abebf98 100644 --- a/prov/udp/src/udpx_init.c +++ b/prov/udp/src/udpx_init.c @@ -37,20 +37,26 @@ #include +static ofi_mutex_t init_lock; static int udpx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { - return ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags, - hints, info); + ofi_mutex_lock(&init_lock); + udpx_util_prov_init(version, node, service, flags); + ofi_mutex_unlock(&init_lock); + return util_getinfo(&udpx_util_prov, version, node, service, flags, + hints, info); } static void udpx_fini(void) { - /* yawn */ + if (udpx_util_prov.info != NULL) + fi_freeinfo(udpx_util_prov.info); } + struct fi_provider udpx_prov = { .name = "udp", .version = OFI_VERSION_DEF_PROV, @@ -65,5 +71,6 @@ UDP_INI fi_param_define(&udpx_prov, "iface", FI_PARAM_STRING, "Specify interface name"); + ofi_mutex_init(&init_lock); return &udpx_prov; } diff --git a/src/linux/osd.c b/src/linux/osd.c index cb848367506..97de8df8e60 100644 --- a/src/linux/osd.c +++ b/src/linux/osd.c @@ -257,3 +257,49 @@ size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa) } #endif /* HAVE_ETHTOOL */ + +int ofi_ifaddr_get_mtu(struct ifaddrs *ifa) +{ + FILE *fd; + char *line = NULL; + size_t len = 0; + char *mtu_filename_prefix = "/sys/class/net/"; + char *mtu_filename_suffix = "/mtu"; + char *mtu_filename; + size_t mtu; + /* IF_NAMESIZE includes NULL-terminated symbol */ + size_t filename_len = strlen(mtu_filename_prefix) + + strlen(mtu_filename_prefix) + + IF_NAMESIZE; + + mtu_filename = calloc(1, filename_len); + if (!mtu_filename) + return 0; + + snprintf(mtu_filename, filename_len, "%s%s%s", + mtu_filename_prefix, ifa->ifa_name, mtu_filename_suffix); + + fd = fopen(mtu_filename, "r"); + if (!fd) + goto err1; + + if (getline(&line, &len, fd) == -1) { + goto err2; + } + + if (sscanf(line, "%d", &mtu) != 1) + goto err3; + + free(line); + fclose(fd); + free(mtu_filename); + + return mtu; + err3: + free(line); + err2: + fclose(fd); + err1: + free(mtu_filename); + return 0; +} diff --git a/src/windows/osd.c b/src/windows/osd.c index 6f693514c7f..c7c091a2826 100644 --- a/src/windows/osd.c +++ b/src/windows/osd.c @@ -477,6 +477,7 @@ int getifaddrs(struct ifaddrs **ifap) (*addr6) = *(struct sockaddr_in6 *) pSockAddr; } fa->speed = aa->TransmitLinkSpeed; + fa->mtu = aa->Mtu; /* Generate fake Unix-like device names */ sprintf_s(fa->ad_name, sizeof(fa->ad_name), "eth%d", i++); }