Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CL/HIER: 2step reduce algorithm #854

Merged
merged 2 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/components/cl/hier/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ bcast = \
bcast/bcast.c \
bcast/bcast_2step.c

reduce = \
reduce/reduce.h \
reduce/reduce.c \
reduce/reduce_2step.c

sources = \
cl_hier.h \
cl_hier.c \
Expand All @@ -37,7 +42,8 @@ sources = \
$(alltoallv) \
$(alltoall) \
$(barrier) \
$(bcast)
$(bcast) \
$(reduce)

module_LTLIBRARIES = libucc_cl_hier.la
libucc_cl_hier_la_SOURCES = $(sources)
Expand Down
5 changes: 5 additions & 0 deletions src/components/cl/hier/cl_hier.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ static ucc_config_field_t ucc_cl_hier_lib_config_table[] = {
ucc_offsetof(ucc_cl_hier_lib_config_t, bcast_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{"REDUCE_2STEP_PIPELINE", "n",
"Pipelining settings for RAB reduce algorithm",
ucc_offsetof(ucc_cl_hier_lib_config_t, reduce_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{NULL}};

static ucs_config_field_t ucc_cl_hier_context_config_table[] = {
Expand Down
1 change: 1 addition & 0 deletions src/components/cl/hier/cl_hier.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef struct ucc_cl_hier_lib_config {
ucc_pipeline_params_t allreduce_split_rail_pipeline;
ucc_pipeline_params_t allreduce_rab_pipeline;
ucc_pipeline_params_t bcast_2step_pipeline;
ucc_pipeline_params_t reduce_2step_pipeline;
} ucc_cl_hier_lib_config_t;

typedef struct ucc_cl_hier_context_config {
Expand Down
49 changes: 32 additions & 17 deletions src/components/cl/hier/cl_hier_coll.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -13,7 +13,8 @@
const char *
ucc_cl_hier_default_alg_select_str[UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR] = {
UCC_CL_HIER_ALLREDUCE_DEFAULT_ALG_SELECT_STR,
UCC_CL_HIER_BCAST_DEFAULT_ALG_SELECT_STR};
UCC_CL_HIER_BCAST_DEFAULT_ALG_SELECT_STR,
UCC_CL_HIER_REDUCE_DEFAULT_ALG_SELECT_STR};

ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand All @@ -22,14 +23,16 @@ ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
switch (coll_args->args.coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_rab_init(coll_args, team, task);
case UCC_COLL_TYPE_BARRIER:
return ucc_cl_hier_barrier_init(coll_args, team, task);
case UCC_COLL_TYPE_ALLTOALL:
return ucc_cl_hier_alltoall_init(coll_args, team, task);
case UCC_COLL_TYPE_ALLTOALLV:
return ucc_cl_hier_alltoallv_init(coll_args, team, task);
case UCC_COLL_TYPE_BARRIER:
return ucc_cl_hier_barrier_init(coll_args, team, task);
case UCC_COLL_TYPE_BCAST:
return ucc_cl_hier_bcast_2step_init(coll_args, team, task);
case UCC_COLL_TYPE_REDUCE:
return ucc_cl_hier_reduce_2step_init(coll_args, team, task);
default:
cl_error(team->context->lib, "coll_type %s is not supported",
ucc_coll_type_str(coll_args->args.coll_type));
Expand All @@ -41,14 +44,16 @@ ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
static inline int alg_id_from_str(ucc_coll_type_t coll_type, const char *str)
{
switch (coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_alg_from_str(str);
case UCC_COLL_TYPE_ALLTOALLV:
return ucc_cl_hier_alltoallv_alg_from_str(str);
case UCC_COLL_TYPE_ALLTOALL:
return ucc_cl_hier_alltoall_alg_from_str(str);
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_alg_from_str(str);
case UCC_COLL_TYPE_BCAST:
return ucc_cl_hier_bcast_alg_from_str(str);
case UCC_COLL_TYPE_REDUCE:
return ucc_cl_hier_reduce_alg_from_str(str);
default:
break;
}
Expand All @@ -66,6 +71,19 @@ ucc_status_t ucc_cl_hier_alg_id_to_init(int alg_id, const char *alg_id_str,
}

switch (coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
switch (alg_id) {
case UCC_CL_HIER_ALLREDUCE_ALG_RAB:
*init = ucc_cl_hier_allreduce_rab_init;
break;
case UCC_CL_HIER_ALLREDUCE_ALG_SPLIT_RAIL:
*init = ucc_cl_hier_allreduce_split_rail_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
break;
case UCC_COLL_TYPE_ALLTOALLV:
switch (alg_id) {
case UCC_CL_HIER_ALLTOALLV_ALG_NODE_SPLIT:
Expand All @@ -86,28 +104,25 @@ ucc_status_t ucc_cl_hier_alg_id_to_init(int alg_id, const char *alg_id_str,
break;
};
break;
case UCC_COLL_TYPE_ALLREDUCE:
case UCC_COLL_TYPE_BCAST:
switch (alg_id) {
case UCC_CL_HIER_ALLREDUCE_ALG_RAB:
*init = ucc_cl_hier_allreduce_rab_init;
break;
case UCC_CL_HIER_ALLREDUCE_ALG_SPLIT_RAIL:
*init = ucc_cl_hier_allreduce_split_rail_init;
case UCC_CL_HIER_BCAST_ALG_2STEP:
*init = ucc_cl_hier_bcast_2step_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
break;
case UCC_COLL_TYPE_BCAST:
switch (alg_id) {
case UCC_CL_HIER_BCAST_ALG_2STEP:
*init = ucc_cl_hier_bcast_2step_init;
case UCC_COLL_TYPE_REDUCE:
switch(alg_id) {
case UCC_CL_HIER_REDUCE_ALG_2STEP:
*init = ucc_cl_hier_reduce_2step_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
}
break;
default:
status = UCC_ERR_NOT_SUPPORTED;
Expand Down
5 changes: 3 additions & 2 deletions src/components/cl/hier/cl_hier_coll.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -14,8 +14,9 @@
#include "alltoall/alltoall.h"
#include "barrier/barrier.h"
#include "bcast/bcast.h"
#include "reduce/reduce.h"

#define UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR 2
#define UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR 3

extern const char
*ucc_cl_hier_default_alg_select_str[UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR];
Expand Down
17 changes: 17 additions & 0 deletions src/components/cl/hier/reduce/reduce.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "reduce.h"
#include "../reduce/reduce.h"

ucc_base_coll_alg_info_t
ucc_cl_hier_reduce_algs[UCC_CL_HIER_REDUCE_ALG_LAST + 1] = {
[UCC_CL_HIER_REDUCE_ALG_2STEP] =
{.id = UCC_CL_HIER_REDUCE_ALG_2STEP,
.name = "2step",
.desc = "intra-node and inter-node reduces executed in parallel"},
[UCC_CL_HIER_REDUCE_ALG_LAST] = {
.id = 0, .name = NULL, .desc = NULL}};
38 changes: 38 additions & 0 deletions src/components/cl/hier/reduce/reduce.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#ifndef REDUCE_H_
#define REDUCE_H_
#include "../cl_hier.h"

enum
{
UCC_CL_HIER_REDUCE_ALG_2STEP,
UCC_CL_HIER_REDUCE_ALG_LAST,
};

extern ucc_base_coll_alg_info_t
ucc_cl_hier_reduce_algs[UCC_CL_HIER_REDUCE_ALG_LAST + 1];

#define UCC_CL_HIER_REDUCE_DEFAULT_ALG_SELECT_STR "reduce:0-4k:@2step"

ucc_status_t ucc_cl_hier_reduce_2step_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task);

static inline int ucc_cl_hier_reduce_alg_from_str(const char *str)
{
int i;

for (i = 0; i < UCC_CL_HIER_REDUCE_ALG_LAST; i++) {
if (0 == strcasecmp(str, ucc_cl_hier_reduce_algs[i].name)) {
break;
}
}
return i;
}

#endif
Loading
Loading