@@ -63,7 +63,7 @@ def trainer(a, b, c):
63
63
"""
64
64
65
65
import os
66
- from typing import Callable , Dict , Optional , Tuple , Union , Set
66
+ from typing import Callable , Dict , Optional , Tuple , Union
67
67
68
68
from torch .distributed .elastic .multiprocessing .api import ( # noqa: F401
69
69
_validate_full_rank ,
@@ -103,7 +103,6 @@ def start_processes(
103
103
start_method : str = "spawn" ,
104
104
redirects : Union [Std , Dict [int , Std ]] = Std .NONE ,
105
105
tee : Union [Std , Dict [int , Std ]] = Std .NONE ,
106
- filter_local_ranks : Optional [Set [int ]] = None ,
107
106
) -> PContext :
108
107
"""
109
108
Start ``n`` copies of ``entrypoint`` processes with the provided options.
@@ -195,7 +194,6 @@ def start_processes(
195
194
ignored for binaries
196
195
redirects: which std streams to redirect to a log file
197
196
tee: which std streams to redirect + print to console
198
- filter_local_ranks: which ranks' logs to print to console
199
197
200
198
"""
201
199
# listdir raises FileNotFound or NotADirectoryError so no need to check manually
@@ -225,9 +223,8 @@ def start_processes(
225
223
redirect_std = redirs [local_rank ]
226
224
redirs [local_rank ] = redirect_std | tee_std
227
225
228
- SYS_STREAM = "" # special case to indicate to output to console
229
- stdouts = dict .fromkeys (range (nprocs ), SYS_STREAM )
230
- stderrs = dict .fromkeys (range (nprocs ), SYS_STREAM )
226
+ stdouts = dict .fromkeys (range (nprocs ), "" )
227
+ stderrs = dict .fromkeys (range (nprocs ), "" )
231
228
tee_stdouts : Dict [int , str ] = {}
232
229
tee_stderrs : Dict [int , str ] = {}
233
230
error_files = {}
@@ -254,19 +251,6 @@ def start_processes(
254
251
if t & Std .ERR == Std .ERR :
255
252
tee_stderrs [local_rank ] = stderrs [local_rank ]
256
253
257
- if filter_local_ranks and local_rank not in filter_local_ranks :
258
- # If stream is tee'd, only write to file, but don't tail
259
- if local_rank in tee_stdouts :
260
- tee_stdouts .pop (local_rank , None )
261
- if local_rank in tee_stderrs :
262
- tee_stderrs .pop (local_rank , None )
263
-
264
- # If stream is not redirected, don't print
265
- if stdouts [local_rank ] == SYS_STREAM :
266
- stdouts [local_rank ] = os .devnull
267
- if stderrs [local_rank ] == SYS_STREAM :
268
- stderrs [local_rank ] = os .devnull
269
-
270
254
error_file = os .path .join (clogdir , "error.json" )
271
255
error_files [local_rank ] = error_file
272
256
log .info ("Setting worker%s reply file to: %s" , local_rank , error_file )
0 commit comments