Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion examples/start_here/chatbot_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import sys
import gzip
import shutil
import nemo
Expand Down Expand Up @@ -32,6 +31,9 @@

# instantiate neural factory
nf = nemo.core.NeuralModuleFactory()
# To use CPU-only do:
# from nemo.core import DeviceType
# nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)

# instantiate neural modules
dl = nemo.tutorials.DialogDataLayer(**config)
Expand Down
5 changes: 3 additions & 2 deletions examples/start_here/simplest_example.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (c) 2019 NVIDIA Corporation
import nemo

# instantiate Neural Factory with supported backend
nf = nemo.core.NeuralModuleFactory()
# To use CPU-only do:
# from nemo.core import DeviceType
# nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)

# instantiate necessary neural modules
# RealFunctionDataLayer defaults to f=torch.sin, sampling from x=[-4, 4]
Expand Down
57 changes: 41 additions & 16 deletions nemo/nemo/backends/pytorch/actions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2019 NVIDIA Corporation
import importlib
import itertools
import logging
import os
Expand All @@ -21,14 +22,10 @@
from ...core.neural_factory import Actions, ModelMode, Optimization
from ...utils.helpers import get_checkpoint_from_dir

try:
import apex
from apex.parallel import DistributedDataParallel as DDP
from apex.parallel.LARC import LARC
from apex import amp
except ImportError:
raise ImportError(
"Please install apex from https://www.github.com/nvidia/apex")
# these imports will happen on as-needed basis
amp = None
DDP = None
LARC = None

AmpOptimizations = {
Optimization.mxprO0: "O0",
Expand All @@ -45,6 +42,28 @@
class PtActions(Actions):
def __init__(self, local_rank=None, tb_writer=None,
optimization_level=Optimization.mxprO0):
need_apex = local_rank is not None or \
optimization_level != Optimization.mxprO0
if need_apex:
try:
apex = importlib.import_module('apex')
if optimization_level != Optimization.mxprO0:
global amp
amp = importlib.import_module('apex.amp')
if local_rank is not None:
global DDP
global LARC
parallel = importlib.import_module('apex.parallel')
DDP = parallel.DistributedDataParallel
LARC = parallel.LARC

except ImportError:
raise ImportError(
"NVIDIA Apex is necessary for distributed training and"
"mixed precision training. It only works on GPUs."
"Please install Apex from "
"https://www.github.com/nvidia/apex")

super(PtActions, self).__init__(
local_rank=local_rank,
optimization_level=optimization_level)
Expand Down Expand Up @@ -340,8 +359,12 @@ def __initialize_amp(
self, optimizer, optim_level, amp_min_loss_scale=1.0
):
if optim_level not in AmpOptimizations:
raise ValueError("__initialize_amp() was called but optim_level "
"was set to float32.")
raise ValueError(f"__initialize_amp() was called with unknown "
"optim_level={optim_level}")
# in this case, nothing to do here
if optim_level == Optimization.mxprO0:
return optimizer

if len(self.modules) < 1:
raise ValueError("There were no modules to initialize")
pt_modules = []
Expand Down Expand Up @@ -371,11 +394,12 @@ def __nm_graph_forward_pass(self,
m_id = call_chain[ind][0].unique_instance_id
pmodule = self.module_reference_table[m_id][1]

if isinstance(pmodule, DDP):
if disable_allreduce:
pmodule.disable_allreduce()
else:
pmodule.enable_allreduce()
if self._local_rank is not None:
if isinstance(pmodule, DDP):
if disable_allreduce:
pmodule.disable_allreduce()
else:
pmodule.enable_allreduce()

if mode == ModelMode.train:
# if module.is_trainable():
Expand Down Expand Up @@ -1164,7 +1188,8 @@ def train(self,
final_loss += registered_tensors[tensor.unique_name]
if nan:
continue
if self._optim_level in AmpOptimizations:
if self._optim_level in AmpOptimizations \
and self._optim_level != Optimization.mxprO0:
with amp.scale_loss(
final_loss,
curr_optimizer,
Expand Down
10 changes: 10 additions & 0 deletions nemo/nemo/core/neural_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,16 @@ def __init__(
if backend == Backend.PyTorch:
# TODO: Move all framework specific code from this file
import torch
if self._placement != DeviceType.CPU:
if not torch.cuda.is_available():
raise ValueError("You requested to use GPUs but CUDA is "
"not installed. You can try running using"
" CPU-only. To do this, instantiate your"
" factory with placement=DeviceType.CPU"
"\n"
"Note that this is slow and is not "
"well supported.")

torch.backends.cudnn.benchmark = cudnn_benchmark
if random_seed is not None and cudnn_benchmark:
raise ValueError("cudnn_benchmark can not be set to True"
Expand Down