Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/unit/test_adam_acuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
import pytest
import copy

import deepspeed
from deepspeed.ops.adam import DeepSpeedCPUAdam

if not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed", allow_module_level=True)


def check_equal(first, second, atol=1e-2, verbose=False):
x = first.detach().numpy()
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_checkpointing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch

import torch.distributed as dist

import deepspeed
Expand Down Expand Up @@ -151,6 +152,8 @@ def checkpoint_correctness_verification(args,
compare_lr_scheduler_states(trained_model, loaded_model)


@pytest.mark.skipif(not deepspeed.ops.__installed_ops__['lamb'],
reason="lamb is not installed")
def test_checkpoint_unfused_optimizer(tmpdir):
config_dict = {
"train_batch_size": 2,
Expand Down Expand Up @@ -264,6 +267,9 @@ def _test_checkpoint_fused_optimizer(args, model, hidden_dim, load_optimizer_sta
'deepspeed_adam'),
])
def test_checkpoint_zero_optimizer(tmpdir, zero_stage, use_cpu_offload, adam_optimizer):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")

config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down Expand Up @@ -320,6 +326,9 @@ def test_checkpoint_zero_no_optimizer(tmpdir,
zero_stage,
use_cpu_offload,
adam_optimizer):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")

config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down Expand Up @@ -379,6 +388,9 @@ def _test_checkpoint_zero_no_optimizer(args,
'deepspeed_adam'),
])
def test_checkpoint_lr_scheduler(tmpdir, zero_stage, use_cpu_offload, adam_optimizer):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")

config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down Expand Up @@ -450,6 +462,9 @@ def _test_checkpoint_lr_scheduler(args,
'deepspeed_adam'),
])
def test_checkpoint_no_lr_scheduler(tmpdir, zero_stage, use_cpu_offload, adam_optimizer):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")

config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test_cuda_backward.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from modeling import BertEncoder as BertEncoderPostln
from modeling import BertConfig, BertLayerNorm
from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
import deepspeed

import sys

if not deepspeed.ops.__installed_ops__['transformer']:
pytest.skip("transformer kernels are not installed", allow_module_level=True)


def check_equal(first, second, atol=1e-2, verbose=False):
diction_x = {}
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test_cuda_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from modeling import BertEncoder as BertEncoderPostln
from modeling import BertLayerNorm, BertConfig
from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
import deepspeed

import sys

if not deepspeed.ops.__installed_ops__['transformer']:
pytest.skip("transformer kernels are not installed", allow_module_level=True)


def check_equal(first, second, atol=1e-2, verbose=False):
if verbose:
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_dynamic_loss_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from common import distributed_test
from simple_model import SimpleModel, args_from_dict

lamb_available = pytest.mark.skipif(not deepspeed.ops.__installed_ops__['lamb'],
reason="lamb is not installed")


def run_model_step(model, gradient_list):
for value in gradient_list:
Expand Down Expand Up @@ -165,6 +168,7 @@ def _test_fused_some_overflow(args):
_test_fused_some_overflow(args)


@lamb_available
def test_unfused_no_overflow(tmpdir):
config_dict = {
"train_batch_size": 1,
Expand Down Expand Up @@ -208,6 +212,7 @@ def _test_unfused_no_overflow(args):
_test_unfused_no_overflow(args)


@lamb_available
def test_unfused_all_overflow(tmpdir):
config_dict = {
"train_batch_size": 1,
Expand Down Expand Up @@ -253,6 +258,7 @@ def _test_unfused_all_overflow(args):
_test_unfused_all_overflow(args)


@lamb_available
def test_unfused_some_overflow(tmpdir):
config_dict = {
"train_batch_size": 1,
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_fp16.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
from common import distributed_test
from simple_model import SimpleModel, SimpleOptimizer, random_dataloader, args_from_dict

lamb_available = pytest.mark.skipif(not deepspeed.ops.__installed_ops__['lamb'],
reason="lamb is not installed")


@lamb_available
def test_lamb_fp32_grad_clip(tmpdir):
config_dict = {
"train_batch_size": 2,
Expand Down Expand Up @@ -44,6 +48,7 @@ def _test_lamb_fp32_grad_clip(args, model, hidden_dim):
_test_lamb_fp32_grad_clip(args=args, model=model, hidden_dim=hidden_dim)


@lamb_available
def test_lamb_fp16_basic(tmpdir):
config_dict = {
"train_batch_size": 2,
Expand Down Expand Up @@ -81,6 +86,7 @@ def _test_lamb_fp16_basic(args, model, hidden_dim):
_test_lamb_fp16_basic(args=args, model=model, hidden_dim=hidden_dim)


@lamb_available
def test_lamb_fp16_empty_grad(tmpdir):
config_dict = {
"train_batch_size": 2,
Expand Down Expand Up @@ -228,6 +234,8 @@ def _test_adamw_fp16_empty_grad(args, model, hidden_dim):
True),
])
def test_adam_fp16_zero_onecycle_compatibility(tmpdir, zero_stage, use_cpu_offload):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")
config_dict = {
"train_batch_size": 1,
"steps_per_print": 1,
Expand Down Expand Up @@ -294,6 +302,8 @@ def _test_adam_fp16_zero_onecycle_compatibility(args, model, hidden_dim):
True),
])
def test_zero_static_scale(tmpdir, zero_stage, use_cpu_offload):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")
config_dict = {
"train_batch_size": 4,
"steps_per_print": 1,
Expand Down Expand Up @@ -392,6 +402,8 @@ def _test_zero_static_scale(args):
True),
])
def test_zero_allow_untested_optimizer(tmpdir, zero_stage, use_cpu_offload):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")
config_dict = {
"train_batch_size": 4,
"steps_per_print": 1,
Expand Down Expand Up @@ -430,6 +442,8 @@ def _test_zero_allow_untested_optimizer(args):
True),
])
def test_zero_empty_partition(tmpdir, zero_stage, use_cpu_offload):
if use_cpu_offload and not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed")
config_dict = {
"train_micro_batch_size_per_gpu": 1,
"gradient_accumulation_steps": 1,
Expand Down Expand Up @@ -500,6 +514,7 @@ def _test_adam_amp_basic(args, model, hidden_dim):
_test_adam_amp_basic(args=args, model=model, hidden_dim=hidden_dim)


@lamb_available
def test_lamb_amp_basic(tmpdir):
config_dict = {
"train_batch_size": 2,
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test_sparse_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

import pytest
import torch
import deepspeed

if not deepspeed.ops.__installed_ops__['sparse-attn']:
pytest.skip("cpu-adam is not installed", allow_module_level=True)


def test_sparse_attention_module_availability():
Expand Down