Skip to content

Commit

Permalink
fabtests/efa: Add remote exit early test with post recv
Browse files Browse the repository at this point in the history
When rdma read is available and message size >= 1M, long read or
runt read protocol is used and server is expected to get a cq entry
or cq error.
Otherwise, if long CTS is used and sender exits before sending CTS
data, receiver is expected to timeout after sending the CTS packet
without getting a cq entry or cq error.

Signed-off-by: Jessie Yang <jiaxiyan@amazon.com>
  • Loading branch information
jiaxiyan authored and j-xiong committed Feb 28, 2025
1 parent 4672ade commit 9fd92a4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 11 deletions.
41 changes: 32 additions & 9 deletions fabtests/prov/efa/src/rdm_remote_exit_early.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
#include <shared.h>
#include <stdio.h>
#include <stdlib.h>

#include <rdma/fi_tagged.h>
#include <rdma/fi_ext.h>

static bool post_rx = false;

Expand All @@ -45,13 +47,21 @@ enum {
static int run()
{
int ret;
bool use_emulated_read;

ret = ft_init_fabric();
if (ret) {
FT_PRINTERR("ft_init_fabric", -ret);
return ret;
}

ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_READ,
&use_emulated_read, &(size_t) {sizeof use_emulated_read});
if (ret) {
FT_PRINTERR("fi_getopt(FI_OPT_EFA_EMULATED_READ)", ret);
goto out;
}

/*
* The handshake procedure between server and client will happen in
* either ft_exchange_keys() or ft_sync()
Expand Down Expand Up @@ -125,18 +135,31 @@ static int run()
}

ft_stop();
/* When server posts a recv, we expect to
* get a cq entry or cq error.
* If no recv is posted, it should just
* poll some cq in the timeout range
* and exit.
*/
if ((end.tv_sec - start.tv_sec) > timeout) {
if (post_rx) {
fprintf(stderr, "%ds timeout expired\n",
timeout);
ret = -FI_ENODATA;
if (use_emulated_read || opts.transfer_size < 1048576) {
/*
* RDMA read is not available. If long CTS is used and
* sender exits before sending CTS data, receiver is
* expected to timeout after sending the CTS packet
* without getting a cq entry or cq error.
*/
printf("server timeout\n");
ret = 0;
} else {
/*
* RDMA read is available.
* When server posts a recv, it is expected
* to get a cq entry or cq error.
*/
fprintf(stderr, "%ds timeout expired\n", timeout);
ret = -FI_ENODATA;
}
} else {
/*
* If no recv is posted, it should just
* poll some cq in the timeout range and exit.
*/
printf("server polls cq and exits\n");
ret = 0;
}
Expand Down
9 changes: 7 additions & 2 deletions fabtests/pytest/efa/test_remote_exit_early.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def remote_exit_early_message_size(request):
# 64K use medium
# 128K use long CTS
# 1M use runtread or longread
# 1M use runtread or longread if rdma read is available
return request.param

@pytest.mark.functional
Expand All @@ -26,4 +26,9 @@ def test_remote_exit_early_post_writedata(cmdline_args, remote_exit_early_messag
message_size=remote_exit_early_message_size)
test.run()

# TODO: add test with --post-rx after fixing the leak in srx->rx_pool
@pytest.mark.functional
def test_remote_exit_early_post_rx(cmdline_args, remote_exit_early_message_size):
test = ClientServerTest(cmdline_args,
"fi_efa_rdm_remote_exit_early --post-rx",
message_size=remote_exit_early_message_size)
test.run()

0 comments on commit 9fd92a4

Please sign in to comment.