Skip to content

Commit

Permalink
prov/efa: Do not allocate rdm_peer struct for efa-direct and dgram paths
Browse files Browse the repository at this point in the history
Use a separate bufpool to allocate the peer struct in the rdm path

Signed-off-by: Sai Sunku <sunkusa@amazon.com>
  • Loading branch information
sunkuamzn committed Mar 3, 2025
1 parent 37f4902 commit fddf018
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 13 deletions.
49 changes: 38 additions & 11 deletions prov/efa/src/efa_av.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,13 @@ int efa_conn_rdm_init(struct efa_av *av, struct efa_conn *conn, bool insert_shm_
assert(!dlist_empty(&av->util_av.ep_list));
efa_rdm_ep = container_of(av->util_av.ep_list.next, struct efa_rdm_ep, base_ep.util_ep.av_entry);

peer = &conn->rdm_peer;
efa_rdm_peer_construct(peer, efa_rdm_ep, conn);
peer = (struct efa_rdm_peer *) ofi_buf_alloc(av->rdm_peer_pool);
if (!peer) {
EFA_WARN(FI_LOG_AV, "Unable to allocate memory for peer struct");
return -FI_ENOMEM;
}
conn->rdm_peer = peer;
efa_rdm_peer_construct(conn->rdm_peer, efa_rdm_ep, conn);

/*
* The efa_conn_rdm_init() call can be made in two situations:
Expand Down Expand Up @@ -349,7 +354,7 @@ void efa_conn_rdm_deinit(struct efa_av *av, struct efa_conn *conn)

assert(av->domain->info_type == EFA_INFO_RDM);

peer = &conn->rdm_peer;
peer = conn->rdm_peer;
if (peer->is_local && av->shm_rdm_av) {
err = fi_av_remove(av->shm_rdm_av, &peer->shm_fiaddr, 1, 0);
if (err) {
Expand All @@ -366,6 +371,7 @@ void efa_conn_rdm_deinit(struct efa_av *av, struct efa_conn *conn)
*/
ep = dlist_empty(&av->util_av.ep_list) ? NULL : container_of(av->util_av.ep_list.next, struct efa_rdm_ep, base_ep.util_ep.av_entry);
efa_rdm_peer_destruct(peer, ep);
ofi_buf_free(peer);
}

/*
Expand Down Expand Up @@ -819,6 +825,8 @@ static int efa_av_close(struct fid *fid)
fi_strerror(err));
}
}
if (av->rdm_peer_pool)
ofi_bufpool_destroy(av->rdm_peer_pool);
}
free(av);
return err;
Expand Down Expand Up @@ -848,11 +856,6 @@ int efa_av_init_util_av(struct efa_domain *efa_domain,
void *context)
{
struct util_av_attr util_attr;
size_t universe_size;

if (fi_param_get_size_t(NULL, "universe_size",
&universe_size) == FI_SUCCESS)
attr->count = MAX(attr->count, universe_size);

util_attr.addrlen = EFA_EP_ADDR_LEN;
util_attr.context_len = sizeof(struct efa_av_entry) - EFA_EP_ADDR_LEN;
Expand All @@ -868,6 +871,7 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
struct efa_av *av;
struct fi_av_attr av_attr = { 0 };
int ret, retv;
size_t universe_size;

if (!attr)
return -FI_EINVAL;
Expand Down Expand Up @@ -900,12 +904,32 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,

efa_domain = container_of(domain_fid, struct efa_domain, util_domain.domain_fid);

if (fi_param_get_size_t(NULL, "universe_size",
&universe_size) == FI_SUCCESS)
attr->count = MAX(attr->count, universe_size);

ret = efa_av_init_util_av(efa_domain, attr, &av->util_av, context);
if (ret)
goto err;

if (efa_domain->info_type == EFA_INFO_RDM) {
av_attr = *attr;
/* In the rdm path, we need a bufpool for the rdm peer entries */
struct ofi_bufpool_attr rdm_peer_pool_attr = {
.size = sizeof(struct efa_rdm_peer),
.alignment = 16,
.chunk_cnt = attr->count,
.max_cnt = 0,
/* Don't track buffer use because user can close
* the AV without removing addresses */
.flags = OFI_BUFPOOL_NO_TRACK,
};

ret = ofi_bufpool_create_attr(&rdm_peer_pool_attr,
&av->rdm_peer_pool);
if (ret)
goto err_close_util_av;

if (efa_domain->fabric && efa_domain->fabric->shm_fabric) {
/*
* shm av supports maximum 256 entries
Expand All @@ -917,14 +941,14 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
EFA_WARN(FI_LOG_AV, "The requested av size is beyond"
" shm supported maximum av size: %s\n",
fi_strerror(-ret));
goto err_close_util_av;
goto err_destroy_peer_bufpool;
}
av_attr.count = efa_env.shm_av_size;
assert(av_attr.type == FI_AV_TABLE);
ret = fi_av_open(efa_domain->shm_domain, &av_attr,
&av->shm_rdm_av, context);
if (ret)
goto err_close_util_av;
goto err_destroy_peer_bufpool;
}
}

Expand All @@ -944,6 +968,10 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,

return 0;

err_destroy_peer_bufpool:
if (av->rdm_peer_pool)
ofi_bufpool_destroy(av->rdm_peer_pool);

err_close_util_av:
retv = ofi_av_close(&av->util_av);
if (retv)
Expand All @@ -953,4 +981,3 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
free(av);
return ret;
}

3 changes: 2 additions & 1 deletion prov/efa/src/efa_av.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct efa_conn {
struct efa_ah *ah;
struct efa_ep_addr *ep_addr;
fi_addr_t fi_addr;
struct efa_rdm_peer rdm_peer;
struct efa_rdm_peer *rdm_peer;
};

struct efa_av_entry {
Expand Down Expand Up @@ -69,6 +69,7 @@ struct efa_av {
struct efa_prv_reverse_av *prv_reverse_av;
struct efa_ah *ah_map;
struct util_av util_av;
struct ofi_bufpool *rdm_peer_pool;
};

int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct efa_rdm_peer *efa_rdm_ep_get_peer(struct efa_rdm_ep *ep, fi_addr_t addr)
util_av_entry = ofi_bufpool_get_ibuf(ep->base_ep.util_ep.av->av_entry_pool,
addr);
av_entry = (struct efa_av_entry *)util_av_entry->data;
return av_entry->conn.ep_addr ? &av_entry->conn.rdm_peer : NULL;
return av_entry->conn.ep_addr ? av_entry->conn.rdm_peer : NULL;
}

/**
Expand Down

0 comments on commit fddf018

Please sign in to comment.