From e9624f4140b5e084ad197c61a38d0fa9cfed0ba3 Mon Sep 17 00:00:00 2001 From: Yonatan Goldhirsh Date: Wed, 20 Mar 2024 11:45:14 +0000 Subject: [PATCH] prov/efa: Make the inflight read msg per domain Make the inflight read msg counter per domain rather than per peer. This counter is used to prevent using runting read when EFA is busy with a read, since in that case runting read would be less performant than a read. Since any ongoing read, regardless of peer, makes EFA busy the counter should be domain scoped and not peer scoped. Signed-off-by: Yonatan Goldhirsh Signed-off-by: Shi Jin --- prov/efa/src/efa_domain.c | 1 + prov/efa/src/efa_domain.h | 1 + prov/efa/src/rdm/efa_rdm_peer.c | 4 ++-- prov/efa/src/rdm/efa_rdm_peer.h | 5 ----- prov/efa/src/rdm/efa_rdm_pke_nonreq.c | 10 ++-------- prov/efa/src/rdm/efa_rdm_pke_rtm.c | 8 ++------ 6 files changed, 8 insertions(+), 21 deletions(-) diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index da7819ecb99..f1a81c89780 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -159,6 +159,7 @@ static int efa_domain_init_rdm(struct efa_domain *efa_domain, struct fi_info *in efa_domain->addrlen = (info->src_addr) ? info->src_addrlen : info->dest_addrlen; efa_domain->rdm_cq_size = MAX(info->rx_attr->size + info->tx_attr->size, efa_env.cq_size); + efa_domain->num_read_msg_in_flight = 0; return 0; } diff --git a/prov/efa/src/efa_domain.h b/prov/efa/src/efa_domain.h index d91cd1ade79..1d74a9aa2ed 100644 --- a/prov/efa/src/efa_domain.h +++ b/prov/efa/src/efa_domain.h @@ -30,6 +30,7 @@ struct efa_domain { size_t rdm_cq_size; struct dlist_entry list_entry; /* linked to g_efa_domain_list */ struct ofi_genlock srx_lock; /* shared among peer providers */ + uint64_t num_read_msg_in_flight; }; extern struct dlist_entry g_efa_domain_list; diff --git a/prov/efa/src/rdm/efa_rdm_peer.c b/prov/efa/src/rdm/efa_rdm_peer.c index 8f20b6aa170..4a4d526ce4e 100644 --- a/prov/efa/src/rdm/efa_rdm_peer.c +++ b/prov/efa/src/rdm/efa_rdm_peer.c @@ -24,7 +24,6 @@ void efa_rdm_peer_construct(struct efa_rdm_peer *peer, struct efa_rdm_ep *ep, st peer->efa_fiaddr = conn->fi_addr; peer->is_self = efa_is_same_addr(&ep->base_ep.src_addr, conn->ep_addr); peer->host_id = peer->is_self ? ep->host_id : 0; /* Peer host id is exchanged via handshake */ - peer->num_read_msg_in_flight = 0; peer->num_runt_bytes_in_flight = 0; ofi_recvwin_buf_alloc(&peer->robuf, efa_env.recvwin_size); dlist_init(&peer->outstanding_tx_pkts); @@ -272,7 +271,8 @@ int efa_rdm_peer_select_readbase_rtm(struct efa_rdm_peer *peer, int op = ope->op; assert(op == ofi_op_tagged || op == ofi_op_msg); - if (peer->num_read_msg_in_flight == 0 && + + if (efa_rdm_ep_domain(ep)->num_read_msg_in_flight == 0 && efa_rdm_peer_get_runt_size(peer, ep, ope) > 0 && !(ope->fi_flags & FI_DELIVERY_COMPLETE)) { return (op == ofi_op_tagged) ? EFA_RDM_RUNTREAD_TAGRTM_PKT diff --git a/prov/efa/src/rdm/efa_rdm_peer.h b/prov/efa/src/rdm/efa_rdm_peer.h index 35815074d84..26d07298a08 100644 --- a/prov/efa/src/rdm/efa_rdm_peer.h +++ b/prov/efa/src/rdm/efa_rdm_peer.h @@ -58,11 +58,6 @@ struct efa_rdm_peer { * @details this value is capped by efa_env.efa_runt_size */ int64_t num_runt_bytes_in_flight; - - /** - * @brief number of messages that are using read based protocol - */ - int64_t num_read_msg_in_flight; }; /** diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index d0b7d9bd6cc..e5d735eb28d 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -646,11 +646,8 @@ void efa_rdm_pke_handle_eor_recv(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_eor_hdr *eor_hdr; struct efa_rdm_ope *txe; - struct efa_rdm_peer *peer; - peer = efa_rdm_ep_get_peer(pkt_entry->ep, pkt_entry->addr); - assert(peer); - peer->num_read_msg_in_flight -= 1; + efa_rdm_ep_domain(pkt_entry->ep)->num_read_msg_in_flight -= 1; eor_hdr = (struct efa_rdm_eor_hdr *)pkt_entry->wiredata; @@ -674,11 +671,8 @@ void efa_rdm_pke_handle_read_nack_recv(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_read_nack_hdr *nack_hdr; struct efa_rdm_ope *txe; - struct efa_rdm_peer *peer; - peer = efa_rdm_ep_get_peer(pkt_entry->ep, pkt_entry->addr); - assert(peer); - peer->num_read_msg_in_flight -= 1; + efa_rdm_ep_domain(pkt_entry->ep)->num_read_msg_in_flight -= 1; nack_hdr = (struct efa_rdm_read_nack_hdr *) pkt_entry->wiredata; diff --git a/prov/efa/src/rdm/efa_rdm_pke_rtm.c b/prov/efa/src/rdm/efa_rdm_pke_rtm.c index a4ed52e6264..cd9939d85d7 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_rtm.c +++ b/prov/efa/src/rdm/efa_rdm_pke_rtm.c @@ -1172,11 +1172,7 @@ ssize_t efa_rdm_pke_init_longread_tagrtm(struct efa_rdm_pke *pkt_entry, */ void efa_rdm_pke_handle_longread_rtm_sent(struct efa_rdm_pke *pkt_entry) { - struct efa_rdm_peer *peer; - - peer = efa_rdm_ep_get_peer(pkt_entry->ep, pkt_entry->addr); - assert(peer); - peer->num_read_msg_in_flight += 1; + efa_rdm_ep_domain(pkt_entry->ep)->num_read_msg_in_flight += 1; } /** @@ -1357,7 +1353,7 @@ void efa_rdm_pke_handle_runtread_rtm_sent(struct efa_rdm_pke *pkt_entry) if (efa_rdm_pke_get_runtread_rtm_base_hdr(pkt_entry)->seg_offset == 0 && txe->total_len > txe->bytes_runt) - peer->num_read_msg_in_flight += 1; + efa_rdm_ep_domain(pkt_entry->ep)->num_read_msg_in_flight += 1; } /**