Skip to content

Commit

Permalink
UCT: Removed XGVMI BF2 support (umem)
Browse files Browse the repository at this point in the history
  • Loading branch information
iyastreb committed Feb 25, 2025
1 parent 224b217 commit d46cf59
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 177 deletions.
3 changes: 1 addition & 2 deletions src/uct/ib/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ AS_IF([test "x$with_ib" = "xyes"],
MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE,
MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE,
MLX5DV_UAR_ALLOC_TYPE_BF,
MLX5DV_UAR_ALLOC_TYPE_NC_DEDICATED,
mlx5dv_devx_umem_reg_ex],
MLX5DV_UAR_ALLOC_TYPE_NC_DEDICATED],
[], [], [[#include <infiniband/mlx5dv.h>]])
AC_CHECK_MEMBERS([struct mlx5dv_cq.cq_uar],
[], [], [[#include <infiniband/mlx5dv.h>]])
Expand Down
193 changes: 40 additions & 153 deletions src/uct/ib/mlx5/dv/ib_mlx5dv_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -1215,10 +1215,8 @@ uct_ib_mlx5_devx_umr_mkey_create(uct_ib_mlx5_md_t *md)
umr_mkey->mkey->lkey |= UCT_IB_MLX5_MKEY_TAG_UMR;
umr_mkey->mkey->rkey |= UCT_IB_MLX5_MKEY_TAG_UMR;

status = uct_ib_mlx5_devx_allow_xgvmi_access(md, umr_mkey->mkey->lkey, 1);
status = uct_ib_mlx5_devx_allow_xgvmi_access(md, umr_mkey->mkey->lkey, 0);
if (status != UCS_OK) {
/* Reset XGVMI capability flag */
md->flags &= ~UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
uct_ib_mlx5_devx_umr_mkey_destroy(md, umr_mkey);
return NULL;
}
Expand Down Expand Up @@ -1493,7 +1491,6 @@ uct_ib_mlx5_devx_mem_dereg(uct_md_h uct_md,
uct_ib_mlx5_md_t *md = ucs_derived_of(uct_md, uct_ib_mlx5_md_t);
uct_ib_mlx5_devx_mem_t *memh;
ucs_status_t status;
int ret;

UCT_MD_MEM_DEREG_CHECK_PARAMS(params, 1);
if (ENABLE_PARAMS_CHECK) {
Expand Down Expand Up @@ -1538,14 +1535,6 @@ uct_ib_mlx5_devx_mem_dereg(uct_md_h uct_md,
}
}

if (memh->umem != NULL) {
ret = mlx5dv_devx_umem_dereg(memh->umem);
if (ret < 0) {
ucs_error("mlx5dv_devx_umem_dereg(crossmr) failed: %m");
return UCS_ERR_IO_ERROR;
}
}

if (!(memh->super.flags & UCT_IB_MEM_IMPORTED)) {
if (uct_ib_mlx5_devx_memh_has_ro(md, memh)) {
status = uct_ib_mlx5_devx_dereg_mr(md, memh,
Expand Down Expand Up @@ -1883,7 +1872,7 @@ ucs_status_t uct_ib_mlx5_devx_query_cap_2(struct ibv_context *ctx,
"QUERY_HCA_CAP, CAP2", 1);
}

int uct_ib_mlx5_devx_check_xgvmi(void *cap_2, const char *dev_name)
int uct_ib_mlx5_devx_check_xgvmi(void *cap_2)
{
uint64_t object_for_other_vhca;
uint32_t object_to_object;
Expand All @@ -1893,16 +1882,10 @@ int uct_ib_mlx5_devx_check_xgvmi(void *cap_2, const char *dev_name)
object_for_other_vhca = UCT_IB_MLX5DV_GET64(
cmd_hca_cap_2, cap_2, allowed_object_for_other_vhca_access);

if ((object_to_object &
UCT_IB_MLX5_HCA_CAPS_2_CROSS_VHCA_OBJ_TO_OBJ_LOCAL_MKEY_TO_REMOTE_MKEY) &&
(object_for_other_vhca &
UCT_IB_MLX5_HCA_CAPS_2_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_MKEY)) {
ucs_debug("%s: cross gvmi alias mkey is supported", dev_name);
return 1;
} else {
ucs_debug("%s: crossing_vhca_mkey is not supported", dev_name);
return 0;
}
return ((object_to_object &
UCT_IB_MLX5_HCA_CAPS_2_CROSS_VHCA_OBJ_TO_OBJ_LOCAL_MKEY_TO_REMOTE_MKEY) &&
(object_for_other_vhca &
UCT_IB_MLX5_HCA_CAPS_2_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_MKEY));
}

static void uct_ib_mlx5_devx_check_dp_ordering(uct_ib_mlx5_md_t *md, void *cap,
Expand Down Expand Up @@ -2357,11 +2340,6 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
status = uct_ib_mlx5_devx_query_cap_2(ctx, cap_2_out, out_len);
if (status == UCS_OK) {
cap_2 = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, cap_2_out, capability);
if (uct_ib_mlx5_devx_check_xgvmi(cap_2, uct_ib_device_name(dev))) {
md->flags |= UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
md->super.cap_flags |= UCT_MD_FLAG_EXPORTED_MKEY;
}

uct_ib_mlx5_devx_check_mkey_by_name(md, cap_2, dev);
} else {
cap_2 = NULL;
Expand Down Expand Up @@ -2452,11 +2430,6 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,
md->super.vhca_id = vhca_id;
md->super.uuid = ucs_generate_uuid((uintptr_t)md);

if ((md->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI) &&
(md_config->xgvmi_umr_enable == UCS_YES)) {
md->flags |= UCT_IB_MLX5_MD_FLAG_XGVMI_UMR;
}

/* Zero init UMR related fields, will lazy init on first use */
md->umr.cq = NULL;
md->umr.qp = NULL;
Expand All @@ -2482,6 +2455,27 @@ ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device,

uct_ib_mlx5_devx_init_flush_mr(md);

/*
* Device capabilities do not allow reliable check whether XGVMI for
* indirect mkeys is actually supported. Therefore we do this check by
* allowing XGVMI on indirect KSM flush_rkey.
*/
if ((cap_2 != NULL) && (md->flush_mr != NULL) &&
uct_ib_mlx5_devx_check_xgvmi(cap_2)) {
ucs_assert(md->super.flush_rkey != UCT_IB_MD_INVALID_FLUSH_RKEY);
status = uct_ib_mlx5_devx_allow_xgvmi_access(md, md->super.flush_rkey, 1);
if (status == UCS_OK) {
md->super.cap_flags |= UCT_MD_FLAG_EXPORTED_MKEY;

if (md_config->xgvmi_umr_enable == UCS_YES) {
md->flags |= UCT_IB_MLX5_MD_FLAG_XGVMI_UMR;
}
}
}

ucs_debug("%s: XGVMI is %ssupported", uct_ib_device_name(dev),
(md->super.cap_flags & UCT_MD_FLAG_EXPORTED_MKEY) ? "" : "not ");

*p_md = &md->super;
ucs_free(buf);
return UCS_OK;
Expand Down Expand Up @@ -2675,97 +2669,6 @@ uct_ib_mlx5_devx_allow_xgvmi_access(uct_ib_mlx5_md_t *md,
"ALLOW_OTHER_VHCA_ACCESS", silent);
}

static ucs_status_t uct_ib_mlx5_devx_xgvmi_umem_mr(uct_ib_mlx5_md_t *md,
uct_ib_mlx5_devx_mem_t *memh)
{
#if HAVE_DECL_MLX5DV_DEVX_UMEM_REG_EX
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_in)] = {0};
char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_out)] = {0};
struct mlx5dv_devx_umem_in umem_in;
struct mlx5dv_devx_obj *cross_mr;
struct mlx5dv_devx_umem *umem;
uint32_t exported_lkey;
ucs_status_t status;
void *aligned_address;
size_t length;
void *mkc;

if (uct_ib_mlx5_devx_has_dm(memh)) {
return UCS_ERR_UNSUPPORTED;
}

length = memh->mrs[UCT_IB_MR_DEFAULT].super.ib->length;

/* register umem */
umem_in.addr = memh->address;
umem_in.size = length;
umem_in.access = UCT_IB_MLX5_MD_UMEM_ACCESS;
aligned_address = ucs_align_down_pow2_ptr(memh->address,
ucs_get_page_size());
umem_in.pgsz_bitmap = UCS_MASK(ucs_ffs64((uint64_t)aligned_address) + 1);
umem_in.comp_mask = 0;

umem = mlx5dv_devx_umem_reg_ex(md->super.dev.ibv_context, &umem_in);
if (umem == NULL) {
uct_ib_md_log_mem_reg_error(&md->super, 0,
"mlx5dv_devx_umem_reg_ex() failed: %m");
status = UCS_ERR_NO_MEMORY;
goto err_out;
}

/* create mkey */
mkc = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
UCT_IB_MLX5DV_SET(create_mkey_in, in, opcode,
UCT_IB_MLX5_CMD_OP_CREATE_MKEY);
UCT_IB_MLX5DV_SET(create_mkey_in, in, translations_octword_actual_size, 1);
UCT_IB_MLX5DV_SET(create_mkey_in, in, mkey_umem_id, umem->umem_id);
UCT_IB_MLX5DV_SET64(create_mkey_in, in, mkey_umem_offset, 0);
UCT_IB_MLX5DV_SET(mkc, mkc, access_mode_1_0,
UCT_IB_MLX5_MKC_ACCESS_MODE_MTT);
UCT_IB_MLX5DV_SET(mkc, mkc, a, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, rw, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, rr, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, lw, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, lr, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, crossing_target_mkey, 1);
UCT_IB_MLX5DV_SET(mkc, mkc, qpn, 0xffffff);
UCT_IB_MLX5DV_SET(mkc, mkc, pd, uct_ib_mlx5_devx_md_get_pdn(md));
UCT_IB_MLX5DV_SET(mkc, mkc, mkey_7_0, md->mkey_tag);
UCT_IB_MLX5DV_SET64(mkc, mkc, start_addr, (uintptr_t)memh->address);
UCT_IB_MLX5DV_SET64(mkc, mkc, len, length);

cross_mr = uct_ib_mlx5_devx_obj_create(md->super.dev.ibv_context, in,
sizeof(in), out, sizeof(out), "MKEY",
uct_md_reg_log_lvl(0));
if (cross_mr == NULL) {
status = UCS_ERR_IO_ERROR;
goto err_umem_dereg;
}

exported_lkey = (UCT_IB_MLX5DV_GET(create_mkey_out, out, mkey_index) << 8) |
md->mkey_tag;

status = uct_ib_mlx5_devx_allow_xgvmi_access(md, exported_lkey, 0);
if (status != UCS_OK) {
goto err_cross_mr_destroy;
}

memh->umem = umem;
memh->cross_mr = cross_mr;
memh->exported_lkey = exported_lkey;
return UCS_OK;

err_cross_mr_destroy:
mlx5dv_devx_obj_destroy(cross_mr);
err_umem_dereg:
mlx5dv_devx_umem_dereg(umem);
err_out:
return status;
#else
return UCS_ERR_UNSUPPORTED;
#endif
}

static ucs_status_t
uct_ib_mlx5_devx_reg_xgvmi_ksm_mr(uct_ib_mlx5_md_t *md,
uct_ib_mlx5_devx_mem_t *memh)
Expand All @@ -2781,10 +2684,8 @@ uct_ib_mlx5_devx_reg_xgvmi_ksm_mr(uct_ib_mlx5_md_t *md,
return status;
}

status = uct_ib_mlx5_devx_allow_xgvmi_access(md, exported_lkey, 1);
status = uct_ib_mlx5_devx_allow_xgvmi_access(md, exported_lkey, 0);
if (status != UCS_OK) {
/* Reset XGVMI capability flag */
md->flags &= ~UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
mlx5dv_devx_obj_destroy(cross_mr);
return status;
}
Expand Down Expand Up @@ -2814,9 +2715,9 @@ UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_exported_key,
uct_ib_mlx5_devx_mem_t *memh)
{
size_t length = memh->mrs[UCT_IB_MR_DEFAULT].super.ib->length;
ucs_status_t status;

if (uct_ib_mlx5_devx_has_dm(memh)) {
if (uct_ib_mlx5_devx_has_dm(memh) ||
!(md->super.cap_flags & UCT_MD_FLAG_EXPORTED_MKEY)) {
return UCS_ERR_UNSUPPORTED;
}

Expand All @@ -2825,31 +2726,17 @@ UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_exported_key,
memh, memh->exported_umr_mkey, memh->cross_mr,
memh->exported_lkey);

if (md->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI) {
/* UMR bind impl (IBV_WR_BIND_MW) attaches a single KLM segment, so:
* - IBV_WR_BIND_MW supports the maximum region length of 2GB
* - IBV_WR_BIND_MW does not support multi-segment (multi-threaded) MRs
* For these use cases we fallback to KSM */
if (!(md->flags & UCT_IB_MLX5_MD_FLAG_XGVMI_UMR) ||
(length > UCT_IB_MD_MAX_MR_SIZE) ||
(memh->super.flags & UCT_IB_MEM_MULTITHREADED)) {
status = uct_ib_mlx5_devx_reg_xgvmi_ksm_mr(md, memh);
} else {
status = uct_ib_mlx5_devx_reg_xgvmi_umr_mr(md, memh);
}

/* If KSM or UMR implementation fail to enable XGVMI, this capability
* flag is removed by impl, and then we fallback to UMEM impl */
if (md->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI) {
/* No XGVMI error, return impl status as is */
return status;
}

ucs_debug("%s: indirect xgvmi not supported, fallback to DEVX UMEM",
uct_ib_mlx5_dev_name(md));
/* UMR bind impl (IBV_WR_BIND_MW) attaches a single KLM segment, so:
* - IBV_WR_BIND_MW supports the maximum region length of 2GB
* - IBV_WR_BIND_MW does not support multi-segment (multi-threaded) MRs
* For these use cases we fallback to KSM */
if (!(md->flags & UCT_IB_MLX5_MD_FLAG_XGVMI_UMR) ||
(length > UCT_IB_MD_MAX_MR_SIZE) ||
(memh->super.flags & UCT_IB_MEM_MULTITHREADED)) {
return uct_ib_mlx5_devx_reg_xgvmi_ksm_mr(md, memh);
} else {
return uct_ib_mlx5_devx_reg_xgvmi_umr_mr(md, memh);
}

return uct_ib_mlx5_devx_xgvmi_umem_mr(md, memh);
}

static UCS_F_ALWAYS_INLINE int
Expand Down
10 changes: 8 additions & 2 deletions src/uct/ib/mlx5/gga/gga_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

#if ENABLE_ASSERT
#define UCT_GGA_MLX5_MD_CAPS (UCT_IB_MLX5_MD_FLAG_DEVX | \
UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI | \
UCT_IB_MLX5_MD_FLAG_MMO_DMA)
#endif /* ENABLE_ASSERT */

Expand Down Expand Up @@ -204,7 +203,11 @@ static int uct_ib_mlx5_gga_check_device(struct ibv_device *device)
goto out_close_ctx;
}

result = uct_ib_mlx5_devx_check_xgvmi(cap, ibv_get_device_name(device));
/*
* This check does not guarantee that XGVMI works with indirect mkeys.
* We can filter out devices that does not support XGVMI at all.
*/
result = uct_ib_mlx5_devx_check_xgvmi(cap);

out_close_ctx:
uct_ib_md_device_context_close(ctx);
Expand Down Expand Up @@ -825,6 +828,9 @@ uct_gga_mlx5_query_tl_devices(uct_md_h md,
return UCS_ERR_NO_DEVICE;
}

ucs_assertv(mlx5_md->super.cap_flags & UCT_MD_FLAG_EXPORTED_MKEY,
"md %p: cap_flags do not have EXPORTED_MKEY flag", mlx5_md);

ucs_assertv(ucs_test_all_flags(mlx5_md->flags, UCT_GGA_MLX5_MD_CAPS),
"md %p: flags=0x%x do not have mandatory capabilities 0x%x",
mlx5_md, mlx5_md->flags, UCT_GGA_MLX5_MD_CAPS);
Expand Down
27 changes: 15 additions & 12 deletions src/uct/ib/mlx5/ib_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,21 +195,18 @@ enum {
UCT_IB_MLX5_MD_FLAG_CQE128_ZIP = UCS_BIT(11),
/* Device performance is optimized when RDMA_WRITE is not used */
UCT_IB_MLX5_MD_FLAG_NO_RDMA_WR_OPTIMIZED = UCS_BIT(12),
/* Device supports indirect xgvmi MR. This flag is removed if xgvmi access
* command fails */
UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI = UCS_BIT(13),
/* Device supports symmetric key creation */
UCT_IB_MLX5_MD_FLAG_MKEY_BY_NAME_RESERVE = UCS_BIT(14),
UCT_IB_MLX5_MD_FLAG_MKEY_BY_NAME_RESERVE = UCS_BIT(13),
/* Device supports DMA MMO */
UCT_IB_MLX5_MD_FLAG_MMO_DMA = UCS_BIT(15),
UCT_IB_MLX5_MD_FLAG_MMO_DMA = UCS_BIT(14),
/* Device supports XGVMI UMR workflow */
UCT_IB_MLX5_MD_FLAG_XGVMI_UMR = UCS_BIT(16),
UCT_IB_MLX5_MD_FLAG_XGVMI_UMR = UCS_BIT(15),
/* Device supports UAR WC allocation type */
UCT_IB_MLX5_MD_FLAG_UAR_USE_WC = UCS_BIT(17),
UCT_IB_MLX5_MD_FLAG_UAR_USE_WC = UCS_BIT(16),
/* Device supports implicit ODP with PCI relaxed order */
UCT_IB_MLX5_MD_FLAG_GVA_RO = UCS_BIT(18),
UCT_IB_MLX5_MD_FLAG_GVA_RO = UCS_BIT(17),
/* Device supports forcing ordering configuration */
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_FORCE = UCS_BIT(19),
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_FORCE = UCS_BIT(18),

/* Object to be created by DevX */
UCT_IB_MLX5_MD_FLAG_DEVX_OBJS_SHIFT = 20,
Expand Down Expand Up @@ -320,7 +317,6 @@ typedef struct {
void *address;
struct mlx5dv_devx_obj *atomic_dvmr;
struct mlx5dv_devx_obj *indirect_dvmr;
struct mlx5dv_devx_umem *umem;
struct mlx5dv_devx_obj *cross_mr;
uct_ib_mlx5_devx_umr_mkey_t *exported_umr_mkey;
struct mlx5dv_devx_obj *smkey_mr;
Expand Down Expand Up @@ -1170,12 +1166,19 @@ uct_ib_mlx5_devx_mem_reg(uct_md_h uct_md, void *address, size_t length,
const uct_md_mem_reg_params_t *params,
uct_mem_h *memh_p);

/**
* Check if the device capabilities declare XGVMI support.
* This function detects whether device supports XGVMI, but there is no way to
* detect whether XGVMI works with indirect mkeys. Currently we only support
* XGVMI with indirect mkeys.
* TODO: FW should expose this capability
*/
int uct_ib_mlx5_devx_check_xgvmi(void *cap_2);

ucs_status_t
uct_ib_mlx5_devx_mem_dereg(uct_md_h uct_md,
const uct_md_mem_dereg_params_t *params);

int uct_ib_mlx5_devx_check_xgvmi(void *cap_2, const char *dev_name);

ucs_status_t uct_ib_mlx5_devx_query_cap(struct ibv_context *ctx, uint32_t opmod,
void *out, size_t size, char *msg_arg,
int silent);
Expand Down
9 changes: 1 addition & 8 deletions test/gtest/uct/ib/test_devx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,7 @@ class test_devx_umr_mkey : public test_devx {
}

bool check_xgvmi() const {
if (md()->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI) {
uct_ib_mlx5_devx_mem_t *memh = create_memh(1);
ASSERT_UCS_OK(uct_ib_mlx5_devx_reg_exported_key(md(), memh));
/* XGVMI capability might be dropped by this point! */
destroy_memh(memh);
}

return md()->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_XGVMI;
return md()->super.cap_flags & UCT_MD_FLAG_EXPORTED_MKEY;
}

void skip_no_xgvmi() const {
Expand Down

0 comments on commit d46cf59

Please sign in to comment.