deepspeedai · mrwyattii · Aug 11, 2022 · Aug 11, 2022 · Aug 11, 2022 · Aug 11, 2022
@@ -11,7 +11,7 @@
 
 from ..runtime.zero.config import ZERO_OPTIMIZATION, ZeroStageEnum
 from ..utils import logger
-from .config import DeepSpeedAutotuningConfig
+from .config import AUTOTUNING, DeepSpeedAutotuningConfig, TunerTypeEnum
 from .constants import *
 from .scheduler import ResourceManager
 from .tuner import GridSearchTuner, RandomTuner, ModelBasedTuner
@@ -621,9 +621,9 @@ def tune_space(self,
         exps = self._generate_experiments(tuning_space, max_train_batch_size_per_gpu)
 
         logger.info(f'Tuner type is {self.autotuning_config.tuner_type}')
-        if self.autotuning_config.tuner_type == AUTOTUNING_TUNER_MODELBASED:
+        if self.autotuning_config.tuner_type == TunerTypeEnum.model_based:
             t = ModelBasedTuner(exps, self.rm, self.metric(), tuning_space)
-        elif self.autotuning_config.tuner_type == AUTOTUNING_TUNER_RANDOM:
+        elif self.autotuning_config.tuner_type == TunerTypeEnum.random:
             t = RandomTuner(exps, self.rm, self.metric())
         else:
             t = GridSearchTuner(exps, self.rm, self.metric())
@@ -695,13 +695,14 @@ def model_info_profile_run(self):
         model_info_path = os.path.join(self.results_dir,
                                        "profile_model_info",
                                        "model_info.json")
-        ds_config[AUTOTUNING] = {
+        at_config_dict = {
             "enabled": True,
             "model_info_path": model_info_path,
             "model_info": {
                 "profile": True
             }
         }
+        ds_config[AUTOTUNING] = DeepSpeedAutotuningConfig(**at_config_dict)
 
         exp_config = {}
         exp_name = "profile_model_info"
@@ -804,7 +805,7 @@ def run_tuning_micro_batch_sizes(self,
         self.rm.run()
         for exp_id, (exp, err) in self.rm.finished_experiments.items():
             if exp:
-                metric_file = exp[DS_CONFIG][AUTOTUNING][AUTOTUNING_METRIC_PATH]
+                metric_file = exp[DS_CONFIG][AUTOTUNING].metric_path
 
                 if os.path.exists(metric_file):
                     with open(metric_file, 'r') as f:

@@ -3,6 +3,7 @@
 Licensed under the MIT license.
 """
 
+
 from deepspeed.runtime.config_utils import get_scalar_param, get_dict_param, DeepSpeedConfigObject
 from deepspeed.autotuning.constants import *
 
@@ -133,4 +134,4 @@ def get_model_info_config(param_dict):
 
 
 def get_default_model_info_config():
-    return MODEL_INFO_KEY_DEFAULT_DICT
+    return MODEL_INFO_KEY_DEFAULT_DICT
@@ -204,5 +204,4 @@
 }
 
 GLOBAL_TUNING_SPACE = 'global'
-# TUNING_MICRO_BATCH_SIZE_PREFIX="tune_micro_batch_size_z"
 TUNING_MICRO_BATCH_SIZE_PREFIX = "z"
@@ -3,6 +3,7 @@
 from numpy import BUFSIZE
 import json
 import subprocess
+import os
 import sys
 import threading
 import time
@@ -11,8 +12,7 @@
 from tqdm import tqdm
 
 from ..utils import logger
-from .constants import *
-from .constants import AUTOTUNING, AUTOTUNING_METRIC_PATH
+from .config import AUTOTUNING
 from .utils import get_val_by_key, search_error, was_interruptted
 """
 thread-0: loop over experiment queue dispatching experiments if they become available
@@ -71,8 +71,7 @@ def schedule_experiments(self, exp_paths):
                         exp['name'])
                     if AUTOTUNING in exp["ds_config"]:
                         metric_file = os.path.join(result_dir, "metrics.json")
-                        exp["ds_config"][AUTOTUNING][
-                            AUTOTUNING_METRIC_PATH] = metric_file
+                        exp["ds_config"][AUTOTUNING].metric_path = metric_file
                     stderr_file = os.path.join(result_dir, "stderr.log")
                     model_info_file = os.path.join(result_dir, "model_info.json")
                     metric_file = os.path.join(result_dir, "metrics.json")
@@ -237,7 +236,7 @@ def parse_results(self, metric):
                 )
                 continue
 
-            metric_file = exp["ds_config"][AUTOTUNING][AUTOTUNING_METRIC_PATH]
+            metric_file = exp["ds_config"][AUTOTUNING].metric_path
 
             if os.path.exists(metric_file):
                 with open(metric_file, 'r') as f:

@@ -1,6 +1,6 @@
 import sys
 
-from deepspeed.autotuning.constants import *
+from deepspeed.autotuning.config import DeepSpeedAutotuningConfig
 from deepspeed.autotuning.utils import write_experiments
 from deepspeed.utils import logger
 
@@ -12,7 +12,7 @@ def __init__(self, exps, resource_manager, metric):
         self.best_iter = 0
         self.best_exp = None
         self.best_metric_val = None
-        self.metric = metric if metric else AUTOTUNING_METRIC_DEFAULT
+        self.metric = metric if metric else DeepSpeedAutotuningConfig().metric
         logger.info(f"total number of exps =  {len(self.all_exps)}")
 
     def has_next(self):

@@ -1,12 +1,11 @@
 import hjson
 
-from ..constants import AUTOTUNING, AUTOTUNING_METRIC_PATH
+from ..config import AUTOTUNING, MetricEnum
 from .base_tuner import BaseTuner
 from .cost_model import XGBoostCostModel
 from .utils import *
 from ..utils import *
 import numbers
-from ..constants import AUTOTUNING_METRIC_LATENCY
 
 INIT_NUM = 2
 
@@ -69,9 +68,7 @@ def find_estimated_top_configs(self):
 
         n = len(estimates)
         top_idx = np.argsort(estimates)
-        top_idx_ret = top_idx if self.metric == AUTOTUNING_METRIC_LATENCY else top_idx[::
-                                                                                       -1][:
-                                                                                           n]
+        top_idx_ret = top_idx if self.metric == MetricEnum.latency else top_idx[::-1][:n]
 
         # top_configs = [self.all_configs[i] for i in top_idx]
 
@@ -129,7 +126,7 @@ def update(self):
                 self.evaluated_perf.append(0.0)
                 continue
 
-            p = exp["ds_config"][AUTOTUNING][AUTOTUNING_METRIC_PATH]
+            p = exp["ds_config"][AUTOTUNING].metric_path
             with open(p, 'r') as f:
                 results = hjson.load(f)
                 curr_iter = results[self.metric]

@@ -30,7 +30,6 @@
 import os
 
 from ..constants import TORCH_DISTRIBUTED_DEFAULT_PORT, default_pg_timeout
-from .constants import *
 
 
 class ReduceOp(Enum):
@@ -75,7 +74,7 @@ class ReduceOp(Enum):
 
 
 def _configure_using_config_file(config):
-    if config.comms_logger_enabled:
+    if config.enabled:
         comms_logger.configure(config)
 
 

@@ -3,29 +3,19 @@
 Licensed under the MIT license.
 """
 
-from pydantic import BaseModel
-from .constants import *
+from deepspeed.runtime.config_utils import DeepSpeedConfigModel
 
+COMMS_LOGGER = "comms_logger"
 
-class CommsConfig(BaseModel):
-    class Config:
-        validate_all = True
-        validate_assignment = True
-        use_enum_values = True
-        extra = 'forbid'
 
+def get_comms_config(param_dict):
+    comms_config_dict = param_dict.get(COMMS_LOGGER, {})
+    return DeepSpeedCommsConfig(**comms_config_dict)
 
-class CommsLoggerConfig(CommsConfig):
-    enabled: bool = COMMS_LOGGER_ENABLED_DEFAULT
-    prof_all: bool = COMMS_LOGGER_PROF_ALL_DEFAULT
-    prof_ops: list = COMMS_LOGGER_PROF_OPS_DEFAULT
-    verbose: bool = COMMS_LOGGER_VERBOSE_DEFAULT
-    debug: bool = COMMS_LOGGER_DEBUG_DEFAULT
 
-
-class DeepSpeedCommsConfig:
-    def __init__(self, ds_config):
-        self.comms_logger_enabled = 'comms_logger' in ds_config
-
-        if self.comms_logger_enabled:
-            self.comms_logger = CommsLoggerConfig(**ds_config['comms_logger'])
+class DeepSpeedCommsConfig(DeepSpeedConfigModel):
+    enabled: bool = False
+    prof_all: bool = True
+    prof_ops: list = []
+    verbose: bool = False
+    debug: bool = False
@@ -1,8 +1,8 @@
 import re
 from .helper import compression_preparation, fix_compression, recursive_getattr, is_module_compressible
-from .config import get_compression_config
 from ..runtime.config_utils import dict_raise_error_on_duplicate_keys
 from .constants import *
+from .config import get_compression_config
 import os
 import json
 
@@ -49,21 +49,20 @@ def get_module_name(group_name,
 def get_compress_methods(model, compress_methods, mpu=None):
     # extract the compression module for each method in compress_methods
     layer_added_compress_methods = []
-    for method, method_content in compress_methods.items():
+    for method, method_content in compress_methods:
         if LAYER_REDUCTION in method:
             continue
         # for loop different methods, i.e., weight quantization, activation quantization etc
         exist_module_name = set()
-        shared_parameters = method_content[
-            SHARED_PARAMETERS]  # get all the shared parameters
-        for group_name, method_parameters in method_content[DIFFERENT_GROUPS].items():
+        shared_parameters = method_content.shared_parameters  # get all the shared parameters
+        for group_name, method_parameters in method_content.different_groups.items():
             # for loop different groups, i.e., weight quantization group 1, weight quantization group 2 etc
             module_name_list = []
             related_module_name_list = []
-            if method_parameters[DIFFERENT_GROUPS_RELATED_MODULE_SCOPE]:
+            if method_parameters.related_modules:
                 # this is used for head/row/channel pruning, if users provide the related module scope, we can shrink the layer dim for them
                 # otherwise we just mask those as zeros
-                for key_word, related_key_words in zip(method_parameters[DIFFERENT_GROUPS_MODULE_SCOPE], method_parameters[DIFFERENT_GROUPS_RELATED_MODULE_SCOPE]):
+                for key_word, related_key_words in zip(method_parameters.modules, method_parameters.related_modules):
                     module_name, exist_module_name = get_module_name(group_name, model, key_word, exist_module_name, mpu=mpu)
                     module_name_list.append(module_name)
                     tmp_related_module_name_list = []
@@ -73,15 +72,15 @@ def get_compress_methods(model, compress_methods, mpu=None):
                         tmp_related_module_name_list.append(module_name)
                     related_module_name_list.append(tmp_related_module_name_list)
             else:
-                for key_word in method_parameters[DIFFERENT_GROUPS_MODULE_SCOPE]:
+                for key_word in method_parameters.modules:
                     module_name, exist_module_name = get_module_name(group_name, model, key_word, exist_module_name, mpu=mpu)
                     module_name_list.append(module_name)
 
             if module_name_list:
                 # combine shared parameters with each group
                 combined_method_parameters = {
-                    **(method_parameters.copy().pop(DIFFERENT_GROUPS_PARAMETERS)),
-                    **shared_parameters
+                    **method_parameters.dict().pop(DIFFERENT_GROUPS_PARAMETERS),
+                    **shared_parameters.dict()
                 }
                 compression_item = [
                     module_name_list,
@@ -112,7 +111,7 @@ def init_compression(model, deepspeed_config, teacher_model=None, mpu=None):
         c_model = model
 
     # For layer reduction
-    if compress_methods[LAYER_REDUCTION][LAYER_REDUCTION_ENABLED]:
+    if compress_methods.layer_reduction.enabled:
         assert teacher_model is not None, "Teacher model is required for layer reduction"
         student_initialization(c_model, teacher_model, deepspeed_config)
 
@@ -135,7 +134,7 @@ def redundancy_clean(model, deepspeed_config, mpu=None):
         mpu
             The mpu module for Row/Column parallelism
     """
-    compress_methods = get_compression_config(check_deepspeed_config(deepspeed_config))
+    compress_methods = deepspeed_config.commpression_config
     if hasattr(model, 'module'):
         c_model = model.module
     else:
@@ -191,12 +190,12 @@ def student_initialization(student_model, teacher_model, deepspeed_config):
             The path of ds_config
     '''
     config = get_compression_config(check_deepspeed_config(deepspeed_config))
-    compress_methods = config[LAYER_REDUCTION]
+    compress_methods = config.layer_reduction
 
-    module_name_prefix = compress_methods[MODULE_NAME_PREFIX]
-    teacher_layer = compress_methods[TEACHER_LAYER]
+    module_name_prefix = compress_methods.module_name_prefix
+    teacher_layer = compress_methods.teacher_layer
     student_layer = [i for i in range(len(teacher_layer))]
-    other_module_name = compress_methods[OTHER_MODULE_NAME]
+    other_module_name = compress_methods.other_module_name
     '''
         name_prefix (`str`)
             The prefix name before the layer #.