Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions deepspeed/autotuning/autotuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from ..runtime.zero.config import ZERO_OPTIMIZATION, ZeroStageEnum
from ..utils import logger
from .config import DeepSpeedAutotuningConfig
from .config import AUTOTUNING, DeepSpeedAutotuningConfig, TunerTypeEnum
from .constants import *
from .scheduler import ResourceManager
from .tuner import GridSearchTuner, RandomTuner, ModelBasedTuner
Expand Down Expand Up @@ -621,9 +621,9 @@ def tune_space(self,
exps = self._generate_experiments(tuning_space, max_train_batch_size_per_gpu)

logger.info(f'Tuner type is {self.autotuning_config.tuner_type}')
if self.autotuning_config.tuner_type == AUTOTUNING_TUNER_MODELBASED:
if self.autotuning_config.tuner_type == TunerTypeEnum.model_based:
t = ModelBasedTuner(exps, self.rm, self.metric(), tuning_space)
elif self.autotuning_config.tuner_type == AUTOTUNING_TUNER_RANDOM:
elif self.autotuning_config.tuner_type == TunerTypeEnum.random:
t = RandomTuner(exps, self.rm, self.metric())
else:
t = GridSearchTuner(exps, self.rm, self.metric())
Expand Down Expand Up @@ -695,13 +695,14 @@ def model_info_profile_run(self):
model_info_path = os.path.join(self.results_dir,
"profile_model_info",
"model_info.json")
ds_config[AUTOTUNING] = {
at_config_dict = {
"enabled": True,
"model_info_path": model_info_path,
"model_info": {
"profile": True
}
}
ds_config[AUTOTUNING] = DeepSpeedAutotuningConfig(**at_config_dict)

exp_config = {}
exp_name = "profile_model_info"
Expand Down Expand Up @@ -804,7 +805,7 @@ def run_tuning_micro_batch_sizes(self,
self.rm.run()
for exp_id, (exp, err) in self.rm.finished_experiments.items():
if exp:
metric_file = exp[DS_CONFIG][AUTOTUNING][AUTOTUNING_METRIC_PATH]
metric_file = exp[DS_CONFIG][AUTOTUNING].metric_path

if os.path.exists(metric_file):
with open(metric_file, 'r') as f:
Expand Down
3 changes: 2 additions & 1 deletion deepspeed/autotuning/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Licensed under the MIT license.
"""


from deepspeed.runtime.config_utils import get_scalar_param, get_dict_param, DeepSpeedConfigObject
from deepspeed.autotuning.constants import *

Expand Down Expand Up @@ -133,4 +134,4 @@ def get_model_info_config(param_dict):


def get_default_model_info_config():
return MODEL_INFO_KEY_DEFAULT_DICT
return MODEL_INFO_KEY_DEFAULT_DICT
1 change: 0 additions & 1 deletion deepspeed/autotuning/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,5 +204,4 @@
}

GLOBAL_TUNING_SPACE = 'global'
# TUNING_MICRO_BATCH_SIZE_PREFIX="tune_micro_batch_size_z"
TUNING_MICRO_BATCH_SIZE_PREFIX = "z"
9 changes: 4 additions & 5 deletions deepspeed/autotuning/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from numpy import BUFSIZE
import json
import subprocess
import os
import sys
import threading
import time
Expand All @@ -11,8 +12,7 @@
from tqdm import tqdm

from ..utils import logger
from .constants import *
from .constants import AUTOTUNING, AUTOTUNING_METRIC_PATH
from .config import AUTOTUNING
from .utils import get_val_by_key, search_error, was_interruptted
"""
thread-0: loop over experiment queue dispatching experiments if they become available
Expand Down Expand Up @@ -71,8 +71,7 @@ def schedule_experiments(self, exp_paths):
exp['name'])
if AUTOTUNING in exp["ds_config"]:
metric_file = os.path.join(result_dir, "metrics.json")
exp["ds_config"][AUTOTUNING][
AUTOTUNING_METRIC_PATH] = metric_file
exp["ds_config"][AUTOTUNING].metric_path = metric_file
stderr_file = os.path.join(result_dir, "stderr.log")
model_info_file = os.path.join(result_dir, "model_info.json")
metric_file = os.path.join(result_dir, "metrics.json")
Expand Down Expand Up @@ -237,7 +236,7 @@ def parse_results(self, metric):
)
continue

metric_file = exp["ds_config"][AUTOTUNING][AUTOTUNING_METRIC_PATH]
metric_file = exp["ds_config"][AUTOTUNING].metric_path

if os.path.exists(metric_file):
with open(metric_file, 'r') as f:
Expand Down
4 changes: 2 additions & 2 deletions deepspeed/autotuning/tuner/base_tuner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys

from deepspeed.autotuning.constants import *
from deepspeed.autotuning.config import DeepSpeedAutotuningConfig
from deepspeed.autotuning.utils import write_experiments
from deepspeed.utils import logger

Expand All @@ -12,7 +12,7 @@ def __init__(self, exps, resource_manager, metric):
self.best_iter = 0
self.best_exp = None
self.best_metric_val = None
self.metric = metric if metric else AUTOTUNING_METRIC_DEFAULT
self.metric = metric if metric else DeepSpeedAutotuningConfig().metric
logger.info(f"total number of exps = {len(self.all_exps)}")

def has_next(self):
Expand Down
9 changes: 3 additions & 6 deletions deepspeed/autotuning/tuner/model_based_tuner.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import hjson

from ..constants import AUTOTUNING, AUTOTUNING_METRIC_PATH
from ..config import AUTOTUNING, MetricEnum
from .base_tuner import BaseTuner
from .cost_model import XGBoostCostModel
from .utils import *
from ..utils import *
import numbers
from ..constants import AUTOTUNING_METRIC_LATENCY

INIT_NUM = 2

Expand Down Expand Up @@ -69,9 +68,7 @@ def find_estimated_top_configs(self):

n = len(estimates)
top_idx = np.argsort(estimates)
top_idx_ret = top_idx if self.metric == AUTOTUNING_METRIC_LATENCY else top_idx[::
-1][:
n]
top_idx_ret = top_idx if self.metric == MetricEnum.latency else top_idx[::-1][:n]

# top_configs = [self.all_configs[i] for i in top_idx]

Expand Down Expand Up @@ -129,7 +126,7 @@ def update(self):
self.evaluated_perf.append(0.0)
continue

p = exp["ds_config"][AUTOTUNING][AUTOTUNING_METRIC_PATH]
p = exp["ds_config"][AUTOTUNING].metric_path
with open(p, 'r') as f:
results = hjson.load(f)
curr_iter = results[self.metric]
Expand Down
3 changes: 1 addition & 2 deletions deepspeed/comm/comm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import os

from ..constants import TORCH_DISTRIBUTED_DEFAULT_PORT, default_pg_timeout
from .constants import *


class ReduceOp(Enum):
Expand Down Expand Up @@ -75,7 +74,7 @@ class ReduceOp(Enum):


def _configure_using_config_file(config):
if config.comms_logger_enabled:
if config.enabled:
comms_logger.configure(config)


Expand Down
32 changes: 11 additions & 21 deletions deepspeed/comm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,19 @@
Licensed under the MIT license.
"""

from pydantic import BaseModel
from .constants import *
from deepspeed.runtime.config_utils import DeepSpeedConfigModel

COMMS_LOGGER = "comms_logger"

class CommsConfig(BaseModel):
class Config:
validate_all = True
validate_assignment = True
use_enum_values = True
extra = 'forbid'

def get_comms_config(param_dict):
comms_config_dict = param_dict.get(COMMS_LOGGER, {})
return DeepSpeedCommsConfig(**comms_config_dict)

class CommsLoggerConfig(CommsConfig):
enabled: bool = COMMS_LOGGER_ENABLED_DEFAULT
prof_all: bool = COMMS_LOGGER_PROF_ALL_DEFAULT
prof_ops: list = COMMS_LOGGER_PROF_OPS_DEFAULT
verbose: bool = COMMS_LOGGER_VERBOSE_DEFAULT
debug: bool = COMMS_LOGGER_DEBUG_DEFAULT


class DeepSpeedCommsConfig:
def __init__(self, ds_config):
self.comms_logger_enabled = 'comms_logger' in ds_config

if self.comms_logger_enabled:
self.comms_logger = CommsLoggerConfig(**ds_config['comms_logger'])
class DeepSpeedCommsConfig(DeepSpeedConfigModel):
enabled: bool = False
prof_all: bool = True
prof_ops: list = []
verbose: bool = False
debug: bool = False
44 changes: 0 additions & 44 deletions deepspeed/comm/constants.py

This file was deleted.

31 changes: 15 additions & 16 deletions deepspeed/compression/compress.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re
from .helper import compression_preparation, fix_compression, recursive_getattr, is_module_compressible
from .config import get_compression_config
from ..runtime.config_utils import dict_raise_error_on_duplicate_keys
from .constants import *
from .config import get_compression_config
import os
import json

Expand Down Expand Up @@ -49,21 +49,20 @@ def get_module_name(group_name,
def get_compress_methods(model, compress_methods, mpu=None):
# extract the compression module for each method in compress_methods
layer_added_compress_methods = []
for method, method_content in compress_methods.items():
for method, method_content in compress_methods:
if LAYER_REDUCTION in method:
continue
# for loop different methods, i.e., weight quantization, activation quantization etc
exist_module_name = set()
shared_parameters = method_content[
SHARED_PARAMETERS] # get all the shared parameters
for group_name, method_parameters in method_content[DIFFERENT_GROUPS].items():
shared_parameters = method_content.shared_parameters # get all the shared parameters
for group_name, method_parameters in method_content.different_groups.items():
# for loop different groups, i.e., weight quantization group 1, weight quantization group 2 etc
module_name_list = []
related_module_name_list = []
if method_parameters[DIFFERENT_GROUPS_RELATED_MODULE_SCOPE]:
if method_parameters.related_modules:
# this is used for head/row/channel pruning, if users provide the related module scope, we can shrink the layer dim for them
# otherwise we just mask those as zeros
for key_word, related_key_words in zip(method_parameters[DIFFERENT_GROUPS_MODULE_SCOPE], method_parameters[DIFFERENT_GROUPS_RELATED_MODULE_SCOPE]):
for key_word, related_key_words in zip(method_parameters.modules, method_parameters.related_modules):
module_name, exist_module_name = get_module_name(group_name, model, key_word, exist_module_name, mpu=mpu)
module_name_list.append(module_name)
tmp_related_module_name_list = []
Expand All @@ -73,15 +72,15 @@ def get_compress_methods(model, compress_methods, mpu=None):
tmp_related_module_name_list.append(module_name)
related_module_name_list.append(tmp_related_module_name_list)
else:
for key_word in method_parameters[DIFFERENT_GROUPS_MODULE_SCOPE]:
for key_word in method_parameters.modules:
module_name, exist_module_name = get_module_name(group_name, model, key_word, exist_module_name, mpu=mpu)
module_name_list.append(module_name)

if module_name_list:
# combine shared parameters with each group
combined_method_parameters = {
**(method_parameters.copy().pop(DIFFERENT_GROUPS_PARAMETERS)),
**shared_parameters
**method_parameters.dict().pop(DIFFERENT_GROUPS_PARAMETERS),
**shared_parameters.dict()
}
compression_item = [
module_name_list,
Expand Down Expand Up @@ -112,7 +111,7 @@ def init_compression(model, deepspeed_config, teacher_model=None, mpu=None):
c_model = model

# For layer reduction
if compress_methods[LAYER_REDUCTION][LAYER_REDUCTION_ENABLED]:
if compress_methods.layer_reduction.enabled:
assert teacher_model is not None, "Teacher model is required for layer reduction"
student_initialization(c_model, teacher_model, deepspeed_config)

Expand All @@ -135,7 +134,7 @@ def redundancy_clean(model, deepspeed_config, mpu=None):
mpu
The mpu module for Row/Column parallelism
"""
compress_methods = get_compression_config(check_deepspeed_config(deepspeed_config))
compress_methods = deepspeed_config.commpression_config
if hasattr(model, 'module'):
c_model = model.module
else:
Expand Down Expand Up @@ -191,12 +190,12 @@ def student_initialization(student_model, teacher_model, deepspeed_config):
The path of ds_config
'''
config = get_compression_config(check_deepspeed_config(deepspeed_config))
compress_methods = config[LAYER_REDUCTION]
compress_methods = config.layer_reduction

module_name_prefix = compress_methods[MODULE_NAME_PREFIX]
teacher_layer = compress_methods[TEACHER_LAYER]
module_name_prefix = compress_methods.module_name_prefix
teacher_layer = compress_methods.teacher_layer
student_layer = [i for i in range(len(teacher_layer))]
other_module_name = compress_methods[OTHER_MODULE_NAME]
other_module_name = compress_methods.other_module_name
'''
name_prefix (`str`)
The prefix name before the layer #.
Expand Down
Loading