diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index c30a3d862d4..b119f134c0d 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -199,10 +199,40 @@ int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, { int ret; struct efa_cntr *cntr; + + cntr = calloc(1, sizeof(*cntr)); + if (!cntr) + return -FI_ENOMEM; + + dlist_init(&cntr->ibv_cq_poll_list); + cntr->need_to_scan_ep_list = false; + + ret = ofi_cntr_init(&efa_prov, domain, attr, &cntr->util_cntr, + efa_cntr_progress, context); + + if (ret) + goto free; + + *cntr_fid = &cntr->util_cntr.cntr_fid; + cntr->util_cntr.cntr_fid.ops = &efa_cntr_ops; + cntr->util_cntr.cntr_fid.fid.ops = &efa_cntr_fi_ops; + + return FI_SUCCESS; + +free: + free(cntr); + return ret; +} + + +int efa_rdm_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, + struct fid_cntr **cntr_fid, void *context) +{ + int ret; + struct efa_cntr *cntr; struct efa_domain *efa_domain; struct fi_cntr_attr shm_cntr_attr = {0}; struct fi_peer_cntr_context peer_cntr_context = {0}; - ofi_cntr_progress_func cntr_progress_func; cntr = calloc(1, sizeof(*cntr)); if (!cntr) @@ -213,11 +243,8 @@ int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, efa_domain = container_of(domain, struct efa_domain, util_domain.domain_fid); - cntr_progress_func = efa_domain->info->ep_attr->type == FI_EP_RDM - ? efa_rdm_cntr_progress - : efa_cntr_progress; ret = ofi_cntr_init(&efa_prov, domain, attr, &cntr->util_cntr, - cntr_progress_func, context); + efa_rdm_cntr_progress, context); if (ret) goto free; diff --git a/prov/efa/src/efa_cntr.h b/prov/efa/src/efa_cntr.h index bcfde8784a2..7b8588943c8 100644 --- a/prov/efa/src/efa_cntr.h +++ b/prov/efa/src/efa_cntr.h @@ -19,6 +19,9 @@ struct efa_cntr { int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, struct fid_cntr **cntr_fid, void *context); +int efa_rdm_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, + struct fid_cntr **cntr_fid, void *context); + void efa_cntr_report_tx_completion(struct util_ep *ep, uint64_t flags); void efa_cntr_report_rx_completion(struct util_ep *ep, uint64_t flags); diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 1535af56664..441be368b35 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -48,7 +48,7 @@ static struct fi_ops_domain efa_domain_ops_rdm = { .cq_open = efa_rdm_cq_open, .endpoint = efa_rdm_ep_open, .scalable_ep = fi_no_scalable_ep, - .cntr_open = efa_cntr_open, + .cntr_open = efa_rdm_cntr_open, .poll_open = fi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, diff --git a/prov/efa/src/efa_ep.c b/prov/efa/src/efa_ep.c index 8aa3268adf2..91ac038b4b0 100644 --- a/prov/efa/src/efa_ep.c +++ b/prov/efa/src/efa_ep.c @@ -227,12 +227,6 @@ static int efa_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) switch (bfid->fclass) { case FI_CLASS_CQ: - if (flags & FI_SELECTIVE_COMPLETION) { - EFA_WARN(FI_LOG_EP_CTRL, - "Endpoint cannot be bound with selective completion.\n"); - return -FI_EBADFLAGS; - } - /* Must bind a CQ to either RECV or SEND completions */ if (!(flags & (FI_RECV | FI_TRANSMIT))) return -FI_EBADFLAGS; diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 3b6f9a7f7f1..2dbc946542b 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -153,8 +153,14 @@ void efa_prov_info_set_ep_attr(struct fi_info *prov_info, prov_info->ep_attr->max_msg_size = device->ibv_port_attr.max_msg_sz; prov_info->ep_attr->type = ep_type; - if (ep_type == FI_EP_DGRAM) + if (ep_type == FI_EP_RDM) { + /* ep_attr->max_msg_size is the maximum of both MSG and RMA operations */ + if (prov_info->caps & FI_RMA) + prov_info->ep_attr->max_msg_size = MAX(device->ibv_port_attr.max_msg_sz, device->max_rdma_size); + } else { + assert(ep_type == FI_EP_DGRAM); prov_info->ep_attr->msg_prefix_size = 40; + } } /** diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 5cbb9f51b98..9a1ccb1fd1c 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -380,11 +380,11 @@ int efa_user_info_alter_direct(int version, struct fi_info *info, const struct f EFA_INFO(FI_LOG_CORE, "FI_MSG_PREFIX size = %ld\n", info->ep_attr->msg_prefix_size); } - /* When user requests FI_RMA and it's supported, the max_msg_size should be returned - * as the maximum of both MSG and RMA operations + /* When user doesn't request FI_RMA, the max_msg_size should be returned + * as the MSG only as RMA will not be used. */ - if (hints->caps & FI_RMA) - info->ep_attr->max_msg_size = MAX(g_device_list[0].ibv_port_attr.max_msg_sz, g_device_list[0].max_rdma_size); + if (!(hints->caps & FI_RMA)) + info->ep_attr->max_msg_size = g_device_list[0].ibv_port_attr.max_msg_sz; } /* Print a warning and use FI_AV_TABLE if the app requests FI_AV_MAP */ diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index 448f0c8b286..34a0069cfcd 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -116,6 +116,11 @@ static void test_info_direct_attributes_impl(struct fi_info *hints, assert_false(info->tx_attr->msg_order & FI_ORDER_SAS); assert_int_equal(info->domain_attr->progress, FI_PROGRESS_AUTO); assert_int_equal(info->domain_attr->control_progress, FI_PROGRESS_AUTO); + assert_int_equal( + g_device_list[0].rdm_info->ep_attr->max_msg_size, + (info->caps & FI_RMA) ? + g_device_list[0].max_rdma_size : + g_device_list[0].ibv_port_attr.max_msg_sz); assert_int_equal( info->ep_attr->max_msg_size, (hints->caps & FI_RMA) ?