Skip to content

Commit

Permalink
GPU UT - enable for torchrec example (#527)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #527

Replacing the way we launch distributed UTs for torchrec example, as well as getting rid of a pyre-fixme annotation

Reviewed By: anshulverma

Differential Revision: D48912943

fbshipit-source-id: 96955f1aa64891fdf9ffc671a99bc5aa472034d7
  • Loading branch information
galrotem authored and facebook-github-bot committed Sep 1, 2023
1 parent 23f9d18 commit 1f703d5
Showing 1 changed file with 6 additions and 23 deletions.
29 changes: 6 additions & 23 deletions examples/torchrec/tests/torchrec_example_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,21 @@
# LICENSE file in the root directory of this source tree.

import unittest
import uuid

import torch
from torch.distributed import launcher
from torchtnt.utils.test_utils import skip_if_asan
from torchtnt.utils.test_utils import skip_if_asan, spawn_multi_process

from ..main import main


MIN_NODES = 1
MAX_NODES = 1
PROC_PER_NODE = 2
class TorchrecExampleTest(unittest.TestCase):

cuda_available: bool = torch.cuda.is_available()

class TorchrecExampleTest(unittest.TestCase):
@skip_if_asan
# pyre-fixme[56]: Pyre was not able to infer the type of argument `not
# torch.cuda.is_available()` to decorator factory `unittest.skipIf`.
@unittest.skipIf(
not torch.cuda.is_available(),
@unittest.skipUnless(
cuda_available,
"Skip when CUDA is not available",
)
def test_torchrec_example(self) -> None:
lc = launcher.LaunchConfig(
min_nodes=MIN_NODES,
max_nodes=MAX_NODES,
nproc_per_node=PROC_PER_NODE,
run_id=str(uuid.uuid4()),
rdzv_backend="c10d",
rdzv_endpoint="localhost:0",
max_restarts=0,
monitor_interval=1,
)

launcher.elastic_launch(config=lc, entrypoint=main)([])
spawn_multi_process(2, "nccl", main, [])

0 comments on commit 1f703d5

Please sign in to comment.