From 507ce174b0839309987e88f7920478597e226e4d Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 11 Mar 2021 22:35:26 -0800 Subject: [PATCH 1/2] propagate the error --- deepspeed/launcher/runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py index 6ce482060358..582f2a8873a5 100755 --- a/deepspeed/launcher/runner.py +++ b/deepspeed/launcher/runner.py @@ -359,6 +359,11 @@ def main(args=None): result = subprocess.Popen(cmd, env=env) result.wait() + # In case of failure must propagate the error-condition back to the caller (usually shell). The + # actual error and traceback should have been printed in the subprocess, so in order to avoid + # unnecessary noise we just quietly exit here with the same code as the subprocess + if result.returncode > 0: + sys.exit(result.returncode) if __name__ == "__main__": main() From 8eb0ceefee13209f0d51f3ffcfda22315cab83bf Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 11 Mar 2021 22:47:26 -0800 Subject: [PATCH 2/2] style --- deepspeed/launcher/runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py index 582f2a8873a5..ac873f4ca3f4 100755 --- a/deepspeed/launcher/runner.py +++ b/deepspeed/launcher/runner.py @@ -365,5 +365,6 @@ def main(args=None): if result.returncode > 0: sys.exit(result.returncode) + if __name__ == "__main__": main()