diff --git a/fabtests/pytest/efa/conftest.py b/fabtests/pytest/efa/conftest.py index f541d091192..8a2cd09071b 100644 --- a/fabtests/pytest/efa/conftest.py +++ b/fabtests/pytest/efa/conftest.py @@ -74,11 +74,30 @@ def zcpy_recv_message_size(request): def zcpy_recv_max_msg_size(request): return 8192 -# TODO - add efa-direct tests -@pytest.fixture(scope="module", params=["efa"]) +@pytest.fixture(scope="module", params=["r:0,4,32", + "r:0,1024,8192",]) +def direct_message_size(request): + return request.param + +@pytest.fixture(scope="module", params=["r:1,4,32", + "r:1,1024,8192",]) +def direct_rma_size(request): + return request.param + +@pytest.fixture(scope="module", params=["efa", "efa-direct"]) def fabric(request): return request.param +@pytest.fixture(scope="function") +def rma_fabric(cmdline_args, fabric): + if fabric == 'efa-direct' and ( + not has_rdma(cmdline_args, 'read') or + not has_rdma(cmdline_args, 'write') or + not has_rdma(cmdline_args, 'writedata') + ): + pytest.skip("FI_RMA is not supported. Skip rma tests on efa-direct.") + return fabric + @pytest.hookimpl(hookwrapper=True) def pytest_collection_modifyitems(session, config, items): # Called after collection has been performed, may filter or re-order the items in-place diff --git a/fabtests/pytest/efa/test_av.py b/fabtests/pytest/efa/test_av.py index 09091bb569b..2b82bec17c6 100644 --- a/fabtests/pytest/efa/test_av.py +++ b/fabtests/pytest/efa/test_av.py @@ -1,7 +1,8 @@ import pytest +# This test skips efa-direct because it requests FI_TAGGED @pytest.mark.functional -def test_av_xfer(cmdline_args, fabric): +def test_av_xfer(cmdline_args): from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm", fabric=fabric) + test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm", fabric="efa") test.run() diff --git a/fabtests/pytest/efa/test_cq.py b/fabtests/pytest/efa/test_cq.py index 2e68f2268f1..e42fb15520c 100644 --- a/fabtests/pytest/efa/test_cq.py +++ b/fabtests/pytest/efa/test_cq.py @@ -1,4 +1,5 @@ import pytest +from efa.efa_common import has_rdma # this test must be run in serial mode because it will open the maximal number # of cq that efa device can support @@ -13,5 +14,7 @@ def test_cq(cmdline_args, fabric): @pytest.mark.parametrize("operation_type", ["senddata", "writedata"]) def test_cq_data(cmdline_args, operation_type, fabric): from common import ClientServerTest - test = ClientServerTest(cmdline_args, f"fi_cq_data -e rdm -o" + operation_type, fabric=fabric) + if fabric == "efa-direct" and operation_type == "writedata" and not has_rdma(cmdline_args, operation_type): + pytest.skip("FI_RMA is not supported. Skip writedata test on efa-direct.") + test = ClientServerTest(cmdline_args, f"fi_cq_data -e rdm -o " + operation_type, fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_efa_device_selection.py b/fabtests/pytest/efa/test_efa_device_selection.py index 09c3a5c3db7..346e597df48 100644 --- a/fabtests/pytest/efa/test_efa_device_selection.py +++ b/fabtests/pytest/efa/test_efa_device_selection.py @@ -26,6 +26,8 @@ def test_efa_device_selection(cmdline_args, fabric): client_device_name = client_device_names[client_device_idx] for suffix in ["rdm", "dgrm"]: + if fabric == "efa-direct" and suffix == "dgrm": + continue server_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", server_device_name) client_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.client_id, "tx_bytes", client_device_name) diff --git a/fabtests/pytest/efa/test_efa_protocol_selection.py b/fabtests/pytest/efa/test_efa_protocol_selection.py index bf390d6c3d7..76701f97dff 100644 --- a/fabtests/pytest/efa/test_efa_protocol_selection.py +++ b/fabtests/pytest/efa/test_efa_protocol_selection.py @@ -3,12 +3,13 @@ from efa.efa_common import has_gdrcopy, has_rdma +# This test skips efa-direct because it does not have the read protocol # TODO Expand this test to run on all memory types (and rename) @pytest.mark.serial @pytest.mark.functional @pytest.mark.cuda_memory @pytest.mark.parametrize("fabtest_name,cntrl_env_var", [("fi_rdm_tagged_bw", "FI_EFA_INTER_MIN_READ_MESSAGE_SIZE"), ("fi_rma_bw", "FI_EFA_INTER_MIN_READ_WRITE_SIZE")]) -def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var, fabric): +def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var): """ Verify that the read protocol is used for a 1024 byte message when the env variable switches are set to force the read protocol at 1000 bytes. @@ -51,7 +52,7 @@ def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_ memory_type="cuda_to_cuda", message_size=message_size, warmup_iteration_type="0", - fabric=fabric) + fabric="efa") server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs") server_read_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes") diff --git a/fabtests/pytest/efa/test_fork_support.py b/fabtests/pytest/efa/test_fork_support.py index ab4c87531fe..a7c1db670c7 100644 --- a/fabtests/pytest/efa/test_fork_support.py +++ b/fabtests/pytest/efa/test_fork_support.py @@ -9,8 +9,7 @@ def test_fork_support(cmdline_args, completion_semantic, environment_variable, f cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("{}=1".format(environment_variable)) - test = ClientServerTest(cmdline_args_copy, "fi_rdm_tagged_bw -K", + test = ClientServerTest(cmdline_args_copy, "fi_rdm_bw -K", completion_semantic=completion_semantic, datacheck_type="with_datacheck", fabric=fabric) test.run() - diff --git a/fabtests/pytest/efa/test_multi_ep.py b/fabtests/pytest/efa/test_multi_ep.py index 7f0130d9b8b..36e08130870 100644 --- a/fabtests/pytest/efa/test_multi_ep.py +++ b/fabtests/pytest/efa/test_multi_ep.py @@ -2,10 +2,11 @@ @pytest.mark.functional @pytest.mark.parametrize("shared_cq", [True, False]) -def test_multi_ep(cmdline_args, shared_cq, fabric): +def test_multi_ep(cmdline_args, shared_cq, rma_fabric): + # This test requests FI_RMA from common import ClientServerTest cmd = "fi_multi_ep -e rdm" if shared_cq: cmd += " -Q" - test = ClientServerTest(cmdline_args, cmd, fabric=fabric) + test = ClientServerTest(cmdline_args, cmd, message_size=256, fabric=rma_fabric) test.run() diff --git a/fabtests/pytest/efa/test_multi_recv.py b/fabtests/pytest/efa/test_multi_recv.py index 38acb3ce008..0d5b8d2c9ac 100644 --- a/fabtests/pytest/efa/test_multi_recv.py +++ b/fabtests/pytest/efa/test_multi_recv.py @@ -4,11 +4,12 @@ [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) @pytest.mark.parametrize("message_size", ["1024", "8192"]) -def test_multi_recv(cmdline_args, iteration_type, message_size, fabric): +# efa-direct does not support multi-recv +def test_multi_recv(cmdline_args, iteration_type, message_size): from common import ClientServerTest test = ClientServerTest(cmdline_args, "fi_multi_recv -e rdm", iteration_type, message_size=message_size, - fabric=fabric) + fabric="efa") test.run() diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index f7a3942019d..e00dba3dbb7 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -11,92 +11,101 @@ def test_rdm_efa(cmdline_args, completion_semantic, fabric): test = ClientServerTest(cmdline_args, "fi_rdm", completion_semantic=completion_semantic, fabric=fabric) test.run() +# This test skips efa-direct because it requests FI_ORDER_SAS @pytest.mark.functional -def test_rdm_bw_functional_efa(cmdline_args, completion_semantic, fabric): +def test_rdm_bw_functional_efa(cmdline_args, completion_semantic): from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_flood -e rdm -v -T 1", completion_semantic=completion_semantic, fabric=fabric) + test = ClientServerTest(cmdline_args, "fi_flood -e rdm -v -T 1", completion_semantic=completion_semantic, fabric="efa") test.run() @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type, fabric): +def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, + memory_type_bi_dir, completion_type, direct_message_size, fabric): command = "fi_rdm_pingpong" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, - completion_semantic, memory_type_bi_dir, "all", + completion_semantic, memory_type_bi_dir, + direct_message_size if fabric == "efa-direct" else "all", completion_type=completion_type, fabric=fabric) +# This test skips efa-direct because efa-direct does not +# do memory registrations on behalf of the application @pytest.mark.functional @pytest.mark.serial -def test_mr_exhaustion_rdm_pingpong(cmdline_args, completion_semantic, fabric): +def test_mr_exhaustion_rdm_pingpong(cmdline_args, completion_semantic): efa_run_client_server_test(cmdline_args, "fi_efa_exhaust_mr_reg_rdm_pingpong", "short", completion_semantic, "host_to_host", "all", timeout=1000, - fabric=fabric) + fabric="efa") @pytest.mark.functional -def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size, fabric): +def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size, direct_message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong", "short", - completion_semantic, memory_type_bi_dir, message_size, fabric=fabric) + completion_semantic, memory_type_bi_dir, + direct_message_size if fabric == "efa-direct" else message_size, fabric=fabric) @pytest.mark.functional -def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_message_size, fabric): +def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_message_size, direct_message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong -j 0", "short", - completion_semantic, "host_to_host", inject_message_size, fabric=fabric) + completion_semantic, "host_to_host", + direct_message_size if fabric == "efa-direct" else inject_message_size, fabric=fabric) +# efa-direct does not support tagged @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type, fabric): +def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type): command = "fi_rdm_tagged_pingpong" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type, - fabric=fabric) + fabric="efa") @pytest.mark.functional -def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size, fabric): +def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_pingpong", "short", completion_semantic, memory_type_bi_dir, message_size, - fabric=fabric) + fabric="efa") @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_tagged_bw(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type, fabric): +def test_rdm_tagged_bw(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type): command = "fi_rdm_tagged_bw" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, completion_semantic, memory_type, "all", completion_type=completion_type, - fabric=fabric) + fabric="efa") @pytest.mark.functional -def test_rdm_tagged_bw_range(cmdline_args, completion_semantic, memory_type, message_size, fabric): +def test_rdm_tagged_bw_range(cmdline_args, completion_semantic, memory_type, message_size): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw", "short", - completion_semantic, memory_type, message_size, fabric=fabric) + completion_semantic, memory_type, message_size, fabric="efa") @pytest.mark.functional -def test_rdm_tagged_bw_no_inject_range(cmdline_args, completion_semantic, inject_message_size, fabric): +def test_rdm_tagged_bw_no_inject_range(cmdline_args, completion_semantic, inject_message_size): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw -j 0", "short", - completion_semantic, "host_to_host", inject_message_size, fabric=fabric) + completion_semantic, "host_to_host", inject_message_size, fabric="efa") @pytest.mark.functional @pytest.mark.parametrize("env_vars", [["FI_EFA_TX_SIZE=64"], ["FI_EFA_RX_SIZE=64"], ["FI_EFA_TX_SIZE=64", "FI_EFA_RX_SIZE=64"]]) -def test_rdm_tagged_bw_small_tx_rx(cmdline_args, completion_semantic, memory_type, completion_type, env_vars, fabric): +def test_rdm_tagged_bw_small_tx_rx(cmdline_args, completion_semantic, memory_type, completion_type, env_vars): cmdline_args_copy = copy.copy(cmdline_args) for env_var in env_vars: cmdline_args_copy.append_environ(env_var) # Use a window size larger than tx/rx size efa_run_client_server_test(cmdline_args_copy, "fi_rdm_tagged_bw -W 128", "short", completion_semantic, memory_type, "all", completion_type=completion_type, - fabric=fabric) + fabric="efa") @pytest.mark.functional -def test_rdm_tagged_bw_use_fi_more(cmdline_args, completion_semantic, memory_type, message_size, fabric): +def test_rdm_tagged_bw_use_fi_more(cmdline_args, completion_semantic, memory_type, message_size): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw --use-fi-more", - "short", completion_semantic, memory_type, message_size, fabric=fabric) + "short", completion_semantic, memory_type, message_size, fabric="efa") +# efa-direct does not support atomic @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_type, fabric): +def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_type): from copy import copy from common import ClientServerTest @@ -110,7 +119,7 @@ def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_ty cmdline_args_copy = copy(cmdline_args) command = "fi_rdm_atomic" + " " + perf_progress_model_cli test = ClientServerTest(cmdline_args_copy, "fi_rdm_atomic", iteration_type, completion_semantic, - memory_type=memory_type, timeout=1800, fabric=fabric) + memory_type=memory_type, timeout=1800, fabric="efa") test.run() @pytest.mark.functional @@ -123,38 +132,39 @@ def test_rdm_tagged_peek(cmdline_args): test.run() # This test is run in serial mode because it takes a lot of memory +# It is skipped for efa-direct because 1GB exceeds device max msg size @pytest.mark.serial @pytest.mark.functional -def test_rdm_pingpong_1G(cmdline_args, completion_semantic, fabric): +def test_rdm_pingpong_1G(cmdline_args, completion_semantic): # Default window size is 64 resulting in 128GB being registered, which # exceeds max number of registered host pages efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong -W 1", 2, completion_semantic=completion_semantic, message_size=1073741824, - memory_type="host_to_host", warmup_iteration_type=0, fabric=fabric) + memory_type="host_to_host", warmup_iteration_type=0, fabric="efa") @pytest.mark.functional -def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type_bi_dir, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): +def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type_bi_dir, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type_bi_dir, zcpy_recv_message_size, fabric=fabric) + "short", "transmit_complete", memory_type_bi_dir, zcpy_recv_message_size, fabric="efa") @pytest.mark.functional -def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): +def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric=fabric) + "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric="efa") @pytest.mark.functional -def test_rdm_bw_zcpy_recv_use_fi_more(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): +def test_rdm_bw_zcpy_recv_use_fi_more(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --use-fi-more --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric=fabric) + "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric="efa") diff --git a/fabtests/pytest/efa/test_rma_bw.py b/fabtests/pytest/efa/test_rma_bw.py index 49b30058fbd..a82a9c51db6 100644 --- a/fabtests/pytest/efa/test_rma_bw.py +++ b/fabtests/pytest/efa/test_rma_bw.py @@ -7,16 +7,17 @@ @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rma_bw(cmdline_args, iteration_type, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, fabric): +def test_rma_bw(cmdline_args, iteration_type, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, direct_rma_size, rma_fabric): command = "fi_rma_bw -e rdm" command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, - rma_bw_memory_type, "all", timeout=timeout, fabric=fabric) + rma_bw_memory_type, direct_rma_size if rma_fabric == "efa-direct" else "all", + timeout=timeout, fabric=rma_fabric) @pytest.mark.parametrize("env_vars", [["FI_EFA_TX_SIZE=64"], ["FI_EFA_RX_SIZE=64"], ["FI_EFA_TX_SIZE=64", "FI_EFA_RX_SIZE=64"]]) -def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, env_vars, fabric): +def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, env_vars, direct_rma_size, rma_fabric): cmdline_args_copy = copy.copy(cmdline_args) for env_var in env_vars: cmdline_args_copy.append_environ(env_var) @@ -26,34 +27,39 @@ def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_ # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args_copy.timeout) efa_run_client_server_test(cmdline_args_copy, command, "short", rma_bw_completion_semantic, - rma_bw_memory_type, "all", timeout=timeout, fabric=fabric) + rma_bw_memory_type, direct_rma_size if rma_fabric == "efa-direct" else "all", + timeout=timeout, fabric=rma_fabric) @pytest.mark.functional -def test_rma_bw_range(cmdline_args, rma_operation_type, rma_bw_completion_semantic, message_size, rma_bw_memory_type, fabric): +def test_rma_bw_range(cmdline_args, rma_operation_type, rma_bw_completion_semantic, message_size, direct_rma_size, rma_bw_memory_type, rma_fabric): command = "fi_rma_bw -e rdm" command = command + " -o " + rma_operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - rma_bw_memory_type, message_size, timeout=timeout, fabric=fabric) + rma_bw_memory_type, direct_rma_size if rma_fabric == "efa-direct" else message_size, + timeout=timeout, fabric=rma_fabric) @pytest.mark.functional -def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, rma_bw_completion_semantic, inject_message_size, fabric): +def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, rma_bw_completion_semantic, inject_message_size, rma_fabric): + if rma_fabric == "efa-direct": + pytest.skip("Duplicate test. efa-direct has inject size = 0") command = "fi_rma_bw -e rdm -j 0" command = command + " -o " + rma_operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - "host_to_host", inject_message_size, timeout=timeout, fabric=fabric) + "host_to_host", inject_message_size, timeout=timeout, fabric=rma_fabric) # This test is run in serial mode because it takes a lot of memory @pytest.mark.serial @pytest.mark.functional # TODO Add "writedata", "write" back in when EFA firmware bug is fixed +# TODO enable efa-direct test after fixing fabtests to post recv within device max msg size. @pytest.mark.parametrize("operation_type", ["read"]) -def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic, fabric): +def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic): # Default window size is 64 resulting in 128GB being registered, which # exceeds max number of registered host pages timeout = max(540, cmdline_args.timeout) @@ -61,14 +67,15 @@ def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic, fab command = command + " -o " + operation_type efa_run_client_server_test(cmdline_args, command, 2, completion_semantic=rma_bw_completion_semantic, message_size=1073741824, - memory_type="host_to_host", warmup_iteration_type=0, timeout=timeout, fabric=fabric) + memory_type="host_to_host", warmup_iteration_type=0, timeout=timeout, fabric="efa") @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata", "write"]) -def test_rma_bw_use_fi_more(cmdline_args, operation_type, rma_bw_completion_semantic, inject_message_size, fabric): +def test_rma_bw_use_fi_more(cmdline_args, operation_type, rma_bw_completion_semantic, inject_message_size, direct_rma_size, rma_fabric): command = "fi_rma_bw -e rdm -j 0 --use-fi-more" command = command + " -o " + operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - "host_to_host", inject_message_size, timeout=timeout, fabric=fabric) + "host_to_host", direct_rma_size if rma_fabric == "efa-direct" else inject_message_size, + timeout=timeout, fabric=rma_fabric) diff --git a/fabtests/pytest/efa/test_rma_pingpong.py b/fabtests/pytest/efa/test_rma_pingpong.py index 0c1869614e7..e68bb33fe65 100644 --- a/fabtests/pytest/efa/test_rma_pingpong.py +++ b/fabtests/pytest/efa/test_rma_pingpong.py @@ -14,26 +14,29 @@ def rma_pingpong_message_size(request): @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rma_pingpong(cmdline_args, iteration_type, operation_type, rma_bw_completion_semantic, memory_type_bi_dir, fabric): +def test_rma_pingpong(cmdline_args, iteration_type, operation_type, rma_bw_completion_semantic, memory_type_bi_dir, direct_rma_size, rma_fabric): command = "fi_rma_pingpong -e rdm" command = command + " -o " + operation_type + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, - memory_type_bi_dir, "all", fabric=fabric) + memory_type_bi_dir, direct_rma_size if rma_fabric == "efa-direct" else "all", fabric=rma_fabric) @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata"]) -def test_rma_pingpong_range(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir, fabric): +def test_rma_pingpong_range(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, + direct_rma_size, memory_type_bi_dir, rma_fabric): command = "fi_rma_pingpong -e rdm" command = command + " -o " + operation_type efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - memory_type_bi_dir, rma_pingpong_message_size, fabric=fabric) + memory_type_bi_dir, direct_rma_size if rma_fabric == "efa-direct" else rma_pingpong_message_size, fabric=rma_fabric) @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata"]) -def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir, fabric): +def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir, rma_fabric): + if rma_fabric == "efa-direct": + pytest.skip("Duplicate test. efa-direct has inject size = 0") command = "fi_rma_pingpong -e rdm -j 0" command = command + " -o " + operation_type efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - memory_type_bi_dir, rma_pingpong_message_size, fabric=fabric) + memory_type_bi_dir, rma_pingpong_message_size, fabric=rma_fabric) diff --git a/fabtests/pytest/efa/test_rnr.py b/fabtests/pytest/efa/test_rnr.py index 2d5e2f67b49..bc934a97819 100644 --- a/fabtests/pytest/efa/test_rnr.py +++ b/fabtests/pytest/efa/test_rnr.py @@ -48,9 +48,10 @@ def test_rnr_read_cq_error(cmdline_args, fabric): "writedata": "-c 1 -o writedata -S 4" } +# This test skips efa-direct because it does not have these protocols @pytest.mark.functional @pytest.mark.parametrize("packet_type", packet_type_option_map.keys()) -def test_rnr_queue_resend(cmdline_args, packet_type, fabric): +def test_rnr_queue_resend(cmdline_args, packet_type): from common import ClientServerTest if cmdline_args.server_id == cmdline_args.client_id: @@ -64,5 +65,5 @@ def test_rnr_queue_resend(cmdline_args, packet_type, fabric): cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.strict_fabtests_mode = False test = ClientServerTest(cmdline_args_copy, - "fi_efa_rnr_queue_resend " + packet_type_option_map[packet_type], fabric=fabric) + "fi_efa_rnr_queue_resend " + packet_type_option_map[packet_type], fabric="efa") test.run() diff --git a/fabtests/pytest/efa/test_runt.py b/fabtests/pytest/efa/test_runt.py index e451809e032..cc6ebaec4a5 100644 --- a/fabtests/pytest/efa/test_runt.py +++ b/fabtests/pytest/efa/test_runt.py @@ -5,13 +5,14 @@ # this test must be run in serial mode because it check hw counter +# efa-direct does not have runt read so skip this test @pytest.mark.serial @pytest.mark.functional @pytest.mark.parametrize("memory_type,copy_method", [ pytest.param("cuda_to_cuda", "gdrcopy", marks=pytest.mark.cuda_memory), pytest.param("cuda_to_cuda", "localread", marks=pytest.mark.cuda_memory), pytest.param("neuron_to_neuron", None, marks=pytest.mark.neuron_memory)]) -def test_runt_read_functional(cmdline_args, memory_type, copy_method, fabric): +def test_runt_read_functional(cmdline_args, memory_type, copy_method): """ Verify runt reading protocol is working as expected by sending 1 message of 256 KB. 64 KB of the message will be transfered using EFA device's send capability @@ -46,7 +47,7 @@ def test_runt_read_functional(cmdline_args, memory_type, copy_method, fabric): memory_type=memory_type, message_size="262144", warmup_iteration_type="0", - fabric=fabric) + fabric="efa") server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs") server_read_bytes_after_test =efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes") diff --git a/fabtests/pytest/efa/test_unexpected_msg.py b/fabtests/pytest/efa/test_unexpected_msg.py index 2e307c0be6c..6e8bc05f0ff 100644 --- a/fabtests/pytest/efa/test_unexpected_msg.py +++ b/fabtests/pytest/efa/test_unexpected_msg.py @@ -6,10 +6,11 @@ SHM_DEFAULT_RX_SIZE = 1024 +# This test skips efa-direct because it does not have unexpected message @pytest.mark.functional @pytest.mark.parametrize("msg_size", [1, 512, 9000, 1048576]) # cover various switch points of shm/efa protocols @pytest.mark.parametrize("msg_count", [1, 1024, 2048]) # below and above shm's default rx size -def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completion_semantic, fabric): +def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completion_semantic): from common import ClientServerTest if cmdline_args.server_id == cmdline_args.client_id: if (msg_size > SHM_DEFAULT_MAX_INJECT_SIZE or memory_type != "host_to_host" or completion_semantic == "delivery_complete") and msg_count > SHM_DEFAULT_RX_SIZE: @@ -28,4 +29,4 @@ def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completi efa_run_client_server_test(cmdline_args, f"fi_unexpected_msg -e rdm -M {msg_count}", iteration_type="short", completion_semantic=completion_semantic, memory_type=memory_type, - message_size=msg_size, completion_type="queue", timeout=1800, fabric=fabric) + message_size=msg_size, completion_type="queue", timeout=1800, fabric="efa")