Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prov/efa: Fix the rnr cq read error test for efa-direct #10818

Merged
merged 2 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fabtests/prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ endif BUILD_EFA_RDMA_CHECKER
endif HAVE_VERBS_DEVEL

efa_rnr_srcs = \
prov/efa/src/efa_shared.h \
prov/efa/src/efa_rnr_shared.h \
prov/efa/src/efa_rnr_shared.c

Expand Down
19 changes: 19 additions & 0 deletions fabtests/prov/efa/src/efa_shared.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is useful for general efa specific fabtests in the future. So I intend to keep it in a separate .h

/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All
* rights reserved. */

#ifndef _EFA_SHARED_H
#define _EFA_SHARED_H

#define EFA_FABRIC_NAME "efa"
#define EFA_DIRECT_FABRIC_NAME "efa-direct"

#define EFA_INFO_TYPE_IS_RDM(_info) \
(_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_RDM) && \
!strcasecmp(_info->fabric_attr->name, EFA_FABRIC_NAME))

#define EFA_INFO_TYPE_IS_DIRECT(_info) \
(_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_RDM) && \
!strcasecmp(_info->fabric_attr->name, EFA_DIRECT_FABRIC_NAME))

#endif /* _EFA_SHARED_H */
15 changes: 11 additions & 4 deletions fabtests/prov/efa/src/rdm_rnr_read_cq_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <getopt.h>

#include <shared.h>
#include "efa_shared.h"
#include "efa_rnr_shared.h"


Expand All @@ -48,11 +49,17 @@ static int rnr_read_cq_error(void)
rnr_flag = 0;
/*
* In order for the sender to get RNR error, we need to first consume
* all pre-posted receive buffer (in efa provider, fi->rx_attr->size
* receiving buffer are pre-posted) on the receiver side, the subsequent
* sends (expected_rnr_error) will then get RNR errors.
* all pre-posted receive buffer.
* For efa-rdm, it pre-posted fi->rx_attr->size receive buffers during 1st cq read
* For efa-direct, it posted whatever application posts. ft_enable_ep_recv already
* posts 1.
*/
total_send = fi->rx_attr->size + expected_rnr_error;
if (EFA_INFO_TYPE_IS_RDM(fi)) {
total_send = fi->rx_attr->size + expected_rnr_error;
} else {
assert(EFA_INFO_TYPE_IS_DIRECT(fi));
total_send = expected_rnr_error + 1;
}

for (i = 0; i < total_send; i++) {
do {
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/src/efa_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ static inline ssize_t efa_post_send(struct efa_base_ep *base_ep, const struct fi

if (!(flags & FI_MORE)) {
ret = ibv_wr_complete(qp->ibv_qp_ex);
if (OFI_UNLIKELY(ret))
ret = (ret == ENOMEM) ? -FI_EAGAIN : -ret;
base_ep->is_wr_started = false;
}

Expand Down
4 changes: 4 additions & 0 deletions prov/efa/src/efa_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep,

if (!(flags & FI_MORE)) {
err = ibv_wr_complete(qp->ibv_qp_ex);
if (OFI_UNLIKELY(err))
err = (err == ENOMEM) ? -FI_EAGAIN : -err;
base_ep->is_wr_started = false;
}

Expand Down Expand Up @@ -265,6 +267,8 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep,

if (!(flags & FI_MORE)) {
err = ibv_wr_complete(qp->ibv_qp_ex);
if (OFI_UNLIKELY(err))
err = (err == ENOMEM) ? -FI_EAGAIN : -err;
base_ep->is_wr_started = false;
}

Expand Down
6 changes: 3 additions & 3 deletions prov/efa/src/rdm/efa_rdm_pke.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ ssize_t efa_rdm_pke_sendv(struct efa_rdm_pke **pkt_entry_vec,
}

if (OFI_UNLIKELY(ret)) {
return ret;
return (ret == ENOMEM) ? -FI_EAGAIN : -ret;
}

for (pkt_idx = 0; pkt_idx < pkt_entry_cnt; ++pkt_idx)
Expand Down Expand Up @@ -537,7 +537,7 @@ int efa_rdm_pke_read(struct efa_rdm_pke *pkt_entry,
err = ibv_wr_complete(qp->ibv_qp_ex);

if (OFI_UNLIKELY(err))
return err;
return (err == ENOMEM) ? -FI_EAGAIN : -err;

efa_rdm_ep_record_tx_op_submitted(ep, pkt_entry);
return 0;
Expand Down Expand Up @@ -632,7 +632,7 @@ int efa_rdm_pke_write(struct efa_rdm_pke *pkt_entry)
}

if (OFI_UNLIKELY(err))
return err;
return (err == ENOMEM) ? -FI_EAGAIN : -err;

efa_rdm_ep_record_tx_op_submitted(ep, pkt_entry);
return 0;
Expand Down