From 2b4ce8d2c071fe3afc69ff230c09e9b3bc33b3d6 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Mon, 29 Jul 2024 22:17:43 -0400 Subject: [PATCH 01/23] Removal of transformers logger and addition of python logger Signed-off-by: Abhishek --- tuning/config/configs.py | 7 +++ tuning/sft_trainer.py | 58 +++++++++++-------- tuning/trackers/aimstack_tracker.py | 7 ++- tuning/trackers/filelogging_tracker.py | 7 ++- tuning/trackers/tracker_factory.py | 7 ++- tuning/trainercontroller/callback.py | 7 ++- .../controllermetrics/eval_metrics.py | 7 ++- .../history_based_metrics.py | 7 ++- .../operations/hfcontrols.py | 7 ++- tuning/trainercontroller/patience.py | 7 ++- tuning/utils/data_type_utils.py | 7 ++- tuning/utils/preprocessing_utils.py | 7 ++- 12 files changed, 92 insertions(+), 43 deletions(-) diff --git a/tuning/config/configs.py b/tuning/config/configs.py index 92fb4f8f8..30d342b72 100644 --- a/tuning/config/configs.py +++ b/tuning/config/configs.py @@ -136,6 +136,13 @@ class TrainingArguments(transformers.TrainingArguments): + "Requires additional configs, see tuning.configs/tracker_configs.py" }, ) + log_level: str = field( + default="warning", + metadata={ + "help": "The log level to adopt during training. \ + Possible values are 'debug', 'info', 'warning', 'error' and 'critical'" + }, + ) @dataclass diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 0e360ad4f..4c3946646 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -33,7 +33,8 @@ LlamaTokenizerFast, TrainerCallback, ) -from transformers.utils import is_accelerate_available, logging +from transformers.utils import is_accelerate_available +import logging, os from trl import SFTConfig, SFTTrainer import fire import transformers @@ -111,8 +112,6 @@ def train( fused_lora and fast_kernels must used together (may change in future). \ """ - logger = logging.get_logger("sft_trainer") - # Validate parameters if (not isinstance(train_args.num_train_epochs, (float, int))) or ( train_args.num_train_epochs <= 0 @@ -218,9 +217,9 @@ def train( ) max_seq_length = min(train_args.max_seq_length, tokenizer.model_max_length) - logger.info("Max sequence length is %s", max_seq_length) + logging.info("Max sequence length is %s", max_seq_length) if train_args.max_seq_length > tokenizer.model_max_length: - logger.warning( + logging.warning( "max_seq_length %s exceeds tokenizer.model_max_length \ %s, using tokenizer.model_max_length %s", train_args.max_seq_length, @@ -231,16 +230,16 @@ def train( # TODO: we need to change this, perhaps follow what open instruct does? special_tokens_dict = {} if tokenizer.pad_token is None: - logger.warning("PAD token set to default, missing in tokenizer") + logging.warning("PAD token set to default, missing in tokenizer") special_tokens_dict["pad_token"] = configs.DEFAULT_PAD_TOKEN if tokenizer.eos_token is None: - logger.warning("EOS token set to default, missing in tokenizer") + logging.warning("EOS token set to default, missing in tokenizer") special_tokens_dict["eos_token"] = configs.DEFAULT_EOS_TOKEN if tokenizer.bos_token is None: - logger.warning("BOS token set to default, missing in tokenizer") + logging.warning("BOS token set to default, missing in tokenizer") special_tokens_dict["bos_token"] = configs.DEFAULT_BOS_TOKEN if tokenizer.unk_token is None: - logger.warning("UNK token set to default, missing in tokenizer") + logging.warning("UNK token set to default, missing in tokenizer") special_tokens_dict["unk_token"] = configs.DEFAULT_UNK_TOKEN # TODO: lower priority but understand if resizing impacts inference quality and why its needed. @@ -254,11 +253,11 @@ def train( # Configure the collator and validate args related to packing prior to formatting the dataset if train_args.packing: - logger.info("Packing is set to True") + logging.info("Packing is set to True") data_collator = None packing = True else: - logger.info("Packing is set to False") + logging.info("Packing is set to False") packing = False # Validate if data args are set properly @@ -317,7 +316,7 @@ def train( tracker.track(metric=v, name=k, stage="additional_metrics") tracker.set_params(params=exp_metadata, name="experiment_metadata") except ValueError as e: - logger.error( + logging.error( "Exception while saving additional metrics and metadata %s", repr(e), ) @@ -456,11 +455,8 @@ def parse_arguments(parser, json_config=None): def main(**kwargs): # pylint: disable=unused-argument - logger = logging.get_logger("__main__") - parser = get_parser() job_config = get_json_config() - logger.debug("Input args parsed: %s", job_config) # accept arguments via command-line or JSON try: ( @@ -475,7 +471,23 @@ def main(**kwargs): # pylint: disable=unused-argument fusedops_kernels_config, exp_metadata, ) = parse_arguments(parser, job_config) - logger.debug( + + # Configure log level: + # If log_level is "passive" (not set by cli), then check environment variable. + if training_args.log_level == "passive": + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + + # If log_level is set by environment variable, assign the transformers log_level value + # along with log level value of python logger + # OR else set both as default value ("warning") + logging.basicConfig(level=LOGLEVEL) + training_args.log_level=LOGLEVEL.lower() + + # If log_level is set using cli argument, set same value to log level of python logger + else: + logging.basicConfig(level=training_args.log_level.upper()) + + logging.debug( "Input args parsed: \ model_args %s, data_args %s, training_args %s, trainer_controller_args %s, \ tune_config %s, file_logger_config, %s aim_config %s, \ @@ -505,12 +517,12 @@ def main(**kwargs): # pylint: disable=unused-argument try: metadata = json.loads(exp_metadata) if metadata is None or not isinstance(metadata, Dict): - logger.warning( + logging.warning( "metadata cannot be converted to simple k:v dict ignoring" ) metadata = None except ValueError as e: - logger.error( + logging.error( "failed while parsing extra metadata. pass a valid json %s", repr(e) ) @@ -533,27 +545,27 @@ def main(**kwargs): # pylint: disable=unused-argument fusedops_kernels_config=fusedops_kernels_config, ) except (MemoryError, OutOfMemoryError) as e: - logger.error(traceback.format_exc()) + logging.error(traceback.format_exc()) write_termination_log(f"OOM error during training. {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) except FileNotFoundError as e: - logger.error(traceback.format_exc()) + logging.error(traceback.format_exc()) write_termination_log("Unable to load file: {}".format(e)) sys.exit(USER_ERROR_EXIT_CODE) except HFValidationError as e: - logger.error(traceback.format_exc()) + logging.error(traceback.format_exc()) write_termination_log( f"There may be a problem with loading the model. Exception: {e}" ) sys.exit(USER_ERROR_EXIT_CODE) except (TypeError, ValueError, EnvironmentError) as e: - logger.error(traceback.format_exc()) + logging.error(traceback.format_exc()) write_termination_log( f"Exception raised during training. This may be a problem with your input: {e}" ) sys.exit(USER_ERROR_EXIT_CODE) except Exception as e: # pylint: disable=broad-except - logger.error(traceback.format_exc()) + logging.error(traceback.format_exc()) write_termination_log(f"Unhandled exception during training: {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index bc2f8364d..3e2ca03a4 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -18,7 +18,7 @@ # Third Party from aim.hugging_face import AimCallback # pylint: disable=import-error -from transformers.utils import logging +import logging # Local from .tracker import Tracker @@ -97,7 +97,10 @@ def __init__(self, tracker_config: AimConfig): information about the repo or the server and port where aim db is present. """ super().__init__(name="aim", tracker_config=tracker_config) - self.logger = logging.get_logger("aimstack_tracker") + # Configure log level + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + logging.basicConfig(level=LOGLEVEL) + self.logger = logging.getLogger("aimstack_tracker") def get_hf_callback(self): """Returns the aim.hugging_face.AimCallback object associated with this tracker. diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 213377d96..8cacda932 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -19,7 +19,7 @@ # Third Party from transformers import TrainerCallback -from transformers.utils import logging +import logging # Local from .tracker import Tracker @@ -80,7 +80,10 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): which contains the location of file where logs are recorded. """ super().__init__(name="file_logger", tracker_config=tracker_config) - self.logger = logging.get_logger("file_logging_tracker") + # Configure log level + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + logging.basicConfig(level=LOGLEVEL) + self.logger = logging.getLogger("file_logging_tracker") def get_hf_callback(self): """Returns the FileLoggingCallback object associated with this tracker. diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index 98771c143..e894dac07 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -16,14 +16,17 @@ import dataclasses # Third Party -from transformers.utils import logging +import logging, os from transformers.utils.import_utils import _is_package_available # Local from .filelogging_tracker import FileLoggingTracker from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory -logger = logging.get_logger("tracker_factory") +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger("tracker_factory") # Information about all registered trackers diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index b7cd005b5..6d581fdfd 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -29,7 +29,7 @@ TrainerState, TrainingArguments, ) -from transformers.utils import logging +import logging import yaml # Local @@ -45,7 +45,10 @@ from tuning.trainercontroller.patience import PatienceControl from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator -logger = logging.get_logger(__name__) +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger(__name__) # Configuration keys CONTROLLER_METRICS_KEY = "controller_metrics" diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 696714437..1dfb83c8a 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -19,12 +19,15 @@ from typing import Any # Third Party -from transformers.utils import logging +import logging, os # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler -logger = logging.get_logger(__name__) +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger(__name__) class EvalMetrics(MetricHandler): diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index ae547d3c6..d5eaf39ee 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -21,12 +21,15 @@ # Third Party from transformers import TrainerState -from transformers.utils import logging +import logging, os # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler -logger = logging.get_logger(__name__) +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger(__name__) METRICS_KEY = "metrics" LOG_LOSS_KEY = "loss" TRAINING_LOSS_KEY = "training_loss" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 2bba9a1d2..c3abb9acf 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -5,12 +5,15 @@ # Third Party from transformers import TrainerControl -from transformers.utils import logging +import logging, os # Local from .operation import Operation -logger = logging.get_logger(__name__) +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger(__name__) class HFControls(Operation): diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index b8098fdf0..2697df151 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -16,7 +16,7 @@ # https://spdx.dev/learn/handling-license-info/ # Third Party -from transformers.utils import logging +import logging, os # Resets the patience if the rule outcome happens to be false. # Here, the expectation is to have unbroken "True"s for patience @@ -31,7 +31,10 @@ # will be exceeded afer the fifth event. MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" -logger = logging.get_logger(__name__) +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger(__name__) class PatienceControl: diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index cefebb100..7e6da6c52 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -16,10 +16,13 @@ from typing import Union # Third Party -from transformers.utils import logging +import logging, os import torch -logger = logging.get_logger("data_utils") +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger("data_utils") def str_to_torch_dtype(dtype_str: str) -> torch.dtype: diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index 88db911a6..a2dfc0040 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -18,7 +18,7 @@ # Third Party from datasets import Dataset from transformers import AutoTokenizer, DataCollatorForSeq2Seq -from transformers.utils import logging +import logging, os from trl import DataCollatorForCompletionOnlyLM import datasets @@ -26,7 +26,10 @@ from tuning.config import configs from tuning.utils.data_utils import apply_custom_formatting_template -logger = logging.get_logger("sft_trainer_preprocessing") +# Configure log level +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +logging.basicConfig(level=LOGLEVEL) +logger = logging.getLogger("sft_trainer_preprocessing") def validate_data_args(data_args: configs.DataArguments, packing: bool): From 76fffc521e91a5773d95d2576bdd4018161192ca Mon Sep 17 00:00:00 2001 From: Abhishek Date: Mon, 29 Jul 2024 22:54:07 -0400 Subject: [PATCH 02/23] FMT and lint check: Removal of transformers logger and addition of python logger Signed-off-by: Abhishek --- tuning/sft_trainer.py | 11 ++++++----- tuning/trackers/aimstack_tracker.py | 2 +- tuning/trackers/filelogging_tracker.py | 2 +- tuning/trackers/tracker_factory.py | 3 ++- tuning/trainercontroller/callback.py | 2 +- .../controllermetrics/eval_metrics.py | 5 ++--- .../controllermetrics/history_based_metrics.py | 3 ++- tuning/trainercontroller/operations/hfcontrols.py | 3 ++- tuning/trainercontroller/patience.py | 5 +++-- tuning/utils/data_type_utils.py | 3 ++- tuning/utils/preprocessing_utils.py | 3 ++- 11 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 4c3946646..ed1991f8e 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -16,6 +16,8 @@ from typing import Dict, List, Optional, Union import dataclasses import json +import logging +import os import sys import time import traceback @@ -34,7 +36,6 @@ TrainerCallback, ) from transformers.utils import is_accelerate_available -import logging, os from trl import SFTConfig, SFTTrainer import fire import transformers @@ -472,16 +473,16 @@ def main(**kwargs): # pylint: disable=unused-argument exp_metadata, ) = parse_arguments(parser, job_config) - # Configure log level: + # Configure log level: # If log_level is "passive" (not set by cli), then check environment variable. if training_args.log_level == "passive": LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() - # If log_level is set by environment variable, assign the transformers log_level value - # along with log level value of python logger + # If log_level is set by environment variable, assign the transformers log_level value + # along with log level value of python logger # OR else set both as default value ("warning") logging.basicConfig(level=LOGLEVEL) - training_args.log_level=LOGLEVEL.lower() + training_args.log_level = LOGLEVEL.lower() # If log_level is set using cli argument, set same value to log level of python logger else: diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 3e2ca03a4..7c807b2ad 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -14,11 +14,11 @@ # Standard import json +import logging import os # Third Party from aim.hugging_face import AimCallback # pylint: disable=import-error -import logging # Local from .tracker import Tracker diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 8cacda932..55ad5edf7 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -15,11 +15,11 @@ # Standard from datetime import datetime import json +import logging import os # Third Party from transformers import TrainerCallback -import logging # Local from .tracker import Tracker diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index e894dac07..df70a2ad4 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -14,9 +14,10 @@ # Standard import dataclasses +import logging +import os # Third Party -import logging, os from transformers.utils.import_utils import _is_package_available # Local diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index 6d581fdfd..c4d055039 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -18,6 +18,7 @@ # Standard from typing import Dict, List, Union import inspect +import logging import os import re @@ -29,7 +30,6 @@ TrainerState, TrainingArguments, ) -import logging import yaml # Local diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 1dfb83c8a..79de526cb 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -17,9 +17,8 @@ # Standard from typing import Any - -# Third Party -import logging, os +import logging +import os # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index d5eaf39ee..e434523f7 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -18,10 +18,11 @@ # Standard from collections import deque from typing import Any +import logging +import os # Third Party from transformers import TrainerState -import logging, os # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index c3abb9acf..88c12cbba 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -1,11 +1,12 @@ # Standard from dataclasses import fields import inspect +import logging +import os import re # Third Party from transformers import TrainerControl -import logging, os # Local from .operation import Operation diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index 2697df151..e396dc78f 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -15,8 +15,9 @@ # SPDX-License-Identifier: Apache-2.0 # https://spdx.dev/learn/handling-license-info/ -# Third Party -import logging, os +# Standard +import logging +import os # Resets the patience if the rule outcome happens to be false. # Here, the expectation is to have unbroken "True"s for patience diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 7e6da6c52..40ec77538 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -14,9 +14,10 @@ # Standard from typing import Union +import logging +import os # Third Party -import logging, os import torch # Configure log level diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index a2dfc0040..3272e3b30 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -14,11 +14,12 @@ # Standard from typing import Any, Callable, Dict, Optional import json +import logging +import os # Third Party from datasets import Dataset from transformers import AutoTokenizer, DataCollatorForSeq2Seq -import logging, os from trl import DataCollatorForCompletionOnlyLM import datasets From 773f0d1f727dc7fd62cbc7a3a832010032740ed6 Mon Sep 17 00:00:00 2001 From: Sukriti Sharma Date: Mon, 29 Jul 2024 14:20:02 -0600 Subject: [PATCH 03/23] fix: remove lm_head for granite with llama arch models (#258) * initial code for deleting lm_head Signed-off-by: Anh-Uong * fix logic for copying checkpoint Signed-off-by: Anh-Uong * fix check that embed_tokens and lm_head weights are the same Signed-off-by: Anh-Uong * fix warning assertion Signed-off-by: Anh-Uong * fix lm_head check, remove test Signed-off-by: Anh-Uong * small fixes from code review Signed-off-by: Anh-Uong * fmt Signed-off-by: Anh-Uong --------- Signed-off-by: Anh-Uong Co-authored-by: Anh-Uong Signed-off-by: Abhishek --- build/accelerate_launch.py | 103 +++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 9af5ad809..ee8718b5d 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -26,9 +26,13 @@ import tempfile import shutil from pathlib import Path +import json # Third Party from accelerate.commands.launch import launch_command +from transformers import AutoModelForCausalLM, AutoTokenizer +from peft import PeftModel +from torch import bfloat16 # Local from build.utils import ( @@ -44,10 +48,18 @@ USER_ERROR_EXIT_CODE, INTERNAL_ERROR_EXIT_CODE, ) +from tuning.data import tokenizer_data_utils ERROR_LOG = "/dev/termination-log" +def get_base_model_from_adapter_config(adapter_config): + """Given path to adapter_config.json file, returns the base model name""" + with open(adapter_config, "r", encoding="utf-8") as config_file: + adapter_config = json.load(config_file) + return adapter_config.get("base_model_name_or_path") + + def main(): LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) @@ -118,16 +130,89 @@ def main(): sys.exit(INTERNAL_ERROR_EXIT_CODE) try: - # copy last checkpoint into mounted output dir - pt_checkpoint_dir = get_highest_checkpoint(tempdir) - logging.info( - "Copying last checkpoint %s into output dir %s", - pt_checkpoint_dir, - original_output_dir, - ) - copy_checkpoint( - os.path.join(tempdir, pt_checkpoint_dir), original_output_dir + last_checkpoint_dir = get_highest_checkpoint(tempdir) + last_checkpoint_path = os.path.join(tempdir, last_checkpoint_dir) + + use_flash_attn = job_config.get("use_flash_attn", True) + adapter_config_path = os.path.join( + last_checkpoint_path, "adapter_config.json" ) + tokenizer = AutoTokenizer.from_pretrained(last_checkpoint_path) + + if os.path.exists(adapter_config_path): + base_model_path = get_base_model_from_adapter_config( + adapter_config_path + ) + base_model = AutoModelForCausalLM.from_pretrained( + base_model_path, + attn_implementation="flash_attention_2" if use_flash_attn else None, + torch_dtype=bfloat16 if use_flash_attn else None, + ) + + # since the peft library (PEFTModelForCausalLM) does not handle cases + # where the model's layers are modified, in our case the embedding layer + # is modified, so we resize the backbone model's embedding layer with our own + # utility before passing it along to load the PEFT model. + tokenizer_data_utils.tokenizer_and_embedding_resize( + {}, tokenizer=tokenizer, model=base_model + ) + model = PeftModel.from_pretrained( + base_model, + last_checkpoint_path, + attn_implementation="flash_attention_2" if use_flash_attn else None, + torch_dtype=bfloat16 if use_flash_attn else None, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + last_checkpoint_path, + attn_implementation="flash_attention_2" if use_flash_attn else None, + torch_dtype=bfloat16 if use_flash_attn else None, + ) + + model_arch = model.config.model_type + # check that it is a granite model with llama architecture with tied weights + # ie. lm_head is duplicate of embeddings + + # a fine tuned model will have params_dict.get("model.embed_tokens.weight") + # a prompt adapter has params_dict.get("base_model.model.embed_tokens.weight") + # a lora adapter has params_dict.get("base_model.model.model.embed_tokens.weight") + copy_checkpoint_bool = True + if model_arch == "llama" and hasattr(model, "lm_head"): + if ( + # lora tuned model has an addt model layer + ( + hasattr(model.model, "model") + and model.lm_head.weight.untyped_storage().data_ptr() + == model.model.model.embed_tokens.weight.untyped_storage().data_ptr() + ) + # prompt tuned model or fine tuned model + or ( + hasattr(model.model, "embed_tokens") + and model.lm_head.weight.untyped_storage().data_ptr() + == model.model.embed_tokens.weight.untyped_storage().data_ptr() + ) + ): + + copy_checkpoint_bool = False + logging.info("Removing lm_head from checkpoint") + del model.lm_head.weight + + if hasattr(model, "lm_head.weight"): + logging.warning("Failed to delete lm_head.weight from model") + + logging.info("Saving checkpoint to %s", original_output_dir) + model.save_pretrained(original_output_dir) + # save tokenizer with model + tokenizer.save_pretrained(original_output_dir) + + # copy last checkpoint into mounted output dir + if copy_checkpoint_bool: + logging.info( + "Copying last checkpoint %s into output dir %s", + last_checkpoint_dir, + original_output_dir, + ) + copy_checkpoint(last_checkpoint_path, original_output_dir) except Exception as e: # pylint: disable=broad-except logging.error(traceback.format_exc()) write_termination_log( From 8846bc27188ef04e3be560cd19175376dbeb0d9b Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 30 Jul 2024 13:21:38 -0400 Subject: [PATCH 04/23] Fix: Addition of env var TRANSFORMERS_VERBOSITY check Signed-off-by: Abhishek --- build/accelerate_launch.py | 2 +- tuning/sft_trainer.py | 4 +++- tuning/trackers/aimstack_tracker.py | 2 +- tuning/trackers/filelogging_tracker.py | 2 +- tuning/trackers/tracker_factory.py | 2 +- tuning/trainercontroller/callback.py | 2 +- tuning/trainercontroller/controllermetrics/eval_metrics.py | 2 +- .../controllermetrics/history_based_metrics.py | 2 +- tuning/trainercontroller/operations/hfcontrols.py | 2 +- tuning/trainercontroller/patience.py | 2 +- tuning/utils/data_type_utils.py | 2 +- tuning/utils/preprocessing_utils.py | 2 +- 12 files changed, 14 insertions(+), 12 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index ee8718b5d..2c7ab289c 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -61,7 +61,7 @@ def get_base_model_from_adapter_config(adapter_config): def main(): - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) if not os.getenv("TERMINATION_LOG_FILE"): diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index ed1991f8e..409b76abd 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -475,8 +475,10 @@ def main(**kwargs): # pylint: disable=unused-argument # Configure log level: # If log_level is "passive" (not set by cli), then check environment variable. + # First check TRANSFORMERS_VERBOSITY and then Fallback to check LOG_LEVEL. + # If both env var is not set, then assign "WARNING" as default. if training_args.log_level == "passive": - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() # If log_level is set by environment variable, assign the transformers log_level value # along with log level value of python logger diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 7c807b2ad..799eaa927 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -98,7 +98,7 @@ def __init__(self, tracker_config: AimConfig): """ super().__init__(name="aim", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("aimstack_tracker") diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 55ad5edf7..acfd3d380 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -81,7 +81,7 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): """ super().__init__(name="file_logger", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("file_logging_tracker") diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index df70a2ad4..46c974778 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -25,7 +25,7 @@ from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("tracker_factory") diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index c4d055039..5e82147ac 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -46,7 +46,7 @@ from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 79de526cb..5e4e97f56 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -24,7 +24,7 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index e434523f7..3aa43674a 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -28,7 +28,7 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) METRICS_KEY = "metrics" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 88c12cbba..d33438517 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -12,7 +12,7 @@ from .operation import Operation # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index e396dc78f..8b4fa38d7 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -33,7 +33,7 @@ MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 40ec77538..17673d8c7 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -21,7 +21,7 @@ import torch # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("data_utils") diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index 3272e3b30..4d26fc124 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -28,7 +28,7 @@ from tuning.utils.data_utils import apply_custom_formatting_template # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() +LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("sft_trainer_preprocessing") From 4ed38783f0a80537985068d352aa771e46271228 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 30 Jul 2024 13:28:22 -0400 Subject: [PATCH 05/23] FMT Fix: Addition of env var TRANSFORMERS_VERBOSITY check Signed-off-by: Abhishek --- build/accelerate_launch.py | 4 +++- tuning/sft_trainer.py | 6 ++++-- tuning/trackers/aimstack_tracker.py | 4 +++- tuning/trackers/filelogging_tracker.py | 4 +++- tuning/trackers/tracker_factory.py | 4 +++- tuning/trainercontroller/callback.py | 4 +++- tuning/trainercontroller/controllermetrics/eval_metrics.py | 4 +++- .../controllermetrics/history_based_metrics.py | 4 +++- tuning/trainercontroller/operations/hfcontrols.py | 4 +++- tuning/trainercontroller/patience.py | 4 +++- tuning/utils/data_type_utils.py | 4 +++- tuning/utils/preprocessing_utils.py | 4 +++- 12 files changed, 37 insertions(+), 13 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 2c7ab289c..f57f5c439 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -61,7 +61,9 @@ def get_base_model_from_adapter_config(adapter_config): def main(): - LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() + LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") + ).upper() logging.basicConfig(level=LOGLEVEL) if not os.getenv("TERMINATION_LOG_FILE"): diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 409b76abd..66e61a8b6 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -475,10 +475,12 @@ def main(**kwargs): # pylint: disable=unused-argument # Configure log level: # If log_level is "passive" (not set by cli), then check environment variable. - # First check TRANSFORMERS_VERBOSITY and then Fallback to check LOG_LEVEL. + # First check TRANSFORMERS_VERBOSITY and then Fallback to check LOG_LEVEL. # If both env var is not set, then assign "WARNING" as default. if training_args.log_level == "passive": - LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() + LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") + ).upper() # If log_level is set by environment variable, assign the transformers log_level value # along with log level value of python logger diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 799eaa927..334b391c1 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -98,7 +98,9 @@ def __init__(self, tracker_config: AimConfig): """ super().__init__(name="aim", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() + LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") + ).upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("aimstack_tracker") diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index acfd3d380..7a5a5e591 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -81,7 +81,9 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): """ super().__init__(name="file_logger", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() + LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") + ).upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("file_logging_tracker") diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index 46c974778..73f58c641 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -25,7 +25,9 @@ from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("tracker_factory") diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index 5e82147ac..ecb92eb82 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -46,7 +46,9 @@ from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 5e4e97f56..c6615479d 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -24,7 +24,9 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index 3aa43674a..daabba06e 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -28,7 +28,9 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) METRICS_KEY = "metrics" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index d33438517..6f1f93dd9 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -12,7 +12,9 @@ from .operation import Operation # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index 8b4fa38d7..8203867b5 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -33,7 +33,9 @@ MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 17673d8c7..5a4509c69 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -21,7 +21,9 @@ import torch # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("data_utils") diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index 4d26fc124..e9f7990bc 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -28,7 +28,9 @@ from tuning.utils.data_utils import apply_custom_formatting_template # Configure log level -LOGLEVEL = os.environ.get("TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING")).upper() +LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") +).upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("sft_trainer_preprocessing") From efb83631301d29401d0229ddcbb779cb7b958bbc Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 30 Jul 2024 16:22:35 -0400 Subject: [PATCH 06/23] Adding logging support to accelerate launch Signed-off-by: Abhishek --- build/accelerate_launch.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index f57f5c439..fdb373d65 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -60,12 +60,7 @@ def get_base_model_from_adapter_config(adapter_config): return adapter_config.get("base_model_name_or_path") -def main(): - LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") - ).upper() - logging.basicConfig(level=LOGLEVEL) - +def main(): if not os.getenv("TERMINATION_LOG_FILE"): os.environ["TERMINATION_LOG_FILE"] = ERROR_LOG @@ -81,7 +76,17 @@ def main(): "Must set environment variable 'SFT_TRAINER_CONFIG_JSON_PATH' \ or 'SFT_TRAINER_CONFIG_JSON_ENV_VAR'." ) - + + # Get log_level to be applied and Configure it. + LOGLEVEL=None + if "log_level" in job_config and job_config["log_level"]: + LOGLEVEL=job_config["log_level"].upper() + logging.basicConfig(level=LOGLEVEL) + else: + LOGLEVEL = os.environ.get( + "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") + ).upper() + logging.basicConfig(level=LOGLEVEL) args = process_accelerate_launch_args(job_config) logging.debug("accelerate launch parsed args: %s", args) except FileNotFoundError as e: @@ -112,6 +117,14 @@ def main(): updated_args = serialize_args(job_config) os.environ["SFT_TRAINER_CONFIG_JSON_ENV_VAR"] = updated_args + # LAUNCH COMMAND ALWAYS TAKES LOG LEVEL FROM ENV VAR EVEN THOUGH, \ + # CODE LOGIC IN SFTTRAINER.PY TELLS TO TAKE FROM CLI. + # HENCE ITS BETTER TO MODIFY ENV VAR HERE AND ASSIGN THE VALUE BASED ON CODE LOGIC HERE, + # THEREFORE APPLYING THIS LOG LEVEL TO WHOLE TUNING JOB AND PYTHON NATIVE LOGGING + if os.environ.get("TRANSFORMERS_VERBOSITY"): + os.environ['TRANSFORMERS_VERBOSITY']=LOGLEVEL.lower() + else: + os.environ['LOG_LEVEL']=LOGLEVEL launch_command(args) except subprocess.CalledProcessError as e: # If the subprocess throws an exception, the base exception is hidden in the From 2ecfaf790faf0ab015f8c4f72d10814207c53d7f Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 30 Jul 2024 16:23:59 -0400 Subject: [PATCH 07/23] FMT_FIX: Adding logging support to accelerate launch Signed-off-by: Abhishek --- build/accelerate_launch.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index fdb373d65..312e53e7e 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -60,7 +60,7 @@ def get_base_model_from_adapter_config(adapter_config): return adapter_config.get("base_model_name_or_path") -def main(): +def main(): if not os.getenv("TERMINATION_LOG_FILE"): os.environ["TERMINATION_LOG_FILE"] = ERROR_LOG @@ -76,13 +76,13 @@ def main(): "Must set environment variable 'SFT_TRAINER_CONFIG_JSON_PATH' \ or 'SFT_TRAINER_CONFIG_JSON_ENV_VAR'." ) - + # Get log_level to be applied and Configure it. - LOGLEVEL=None + LOGLEVEL = None if "log_level" in job_config and job_config["log_level"]: - LOGLEVEL=job_config["log_level"].upper() + LOGLEVEL = job_config["log_level"].upper() logging.basicConfig(level=LOGLEVEL) - else: + else: LOGLEVEL = os.environ.get( "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") ).upper() @@ -118,13 +118,13 @@ def main(): os.environ["SFT_TRAINER_CONFIG_JSON_ENV_VAR"] = updated_args # LAUNCH COMMAND ALWAYS TAKES LOG LEVEL FROM ENV VAR EVEN THOUGH, \ - # CODE LOGIC IN SFTTRAINER.PY TELLS TO TAKE FROM CLI. - # HENCE ITS BETTER TO MODIFY ENV VAR HERE AND ASSIGN THE VALUE BASED ON CODE LOGIC HERE, - # THEREFORE APPLYING THIS LOG LEVEL TO WHOLE TUNING JOB AND PYTHON NATIVE LOGGING + # CODE LOGIC IN SFTTRAINER.PY TELLS TO TAKE FROM CLI. + # HENCE ITS BETTER TO MODIFY ENV VAR HERE AND ASSIGN THE VALUE BASED ON CODE LOGIC HERE, + # THEREFORE APPLYING THIS LOG LEVEL TO WHOLE TUNING JOB AND PYTHON NATIVE LOGGING if os.environ.get("TRANSFORMERS_VERBOSITY"): - os.environ['TRANSFORMERS_VERBOSITY']=LOGLEVEL.lower() + os.environ["TRANSFORMERS_VERBOSITY"] = LOGLEVEL.lower() else: - os.environ['LOG_LEVEL']=LOGLEVEL + os.environ["LOG_LEVEL"] = LOGLEVEL launch_command(args) except subprocess.CalledProcessError as e: # If the subprocess throws an exception, the base exception is hidden in the From 5d08efba654c58c9940add8963ffc28ff475da2b Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 1 Aug 2024 14:08:45 -0400 Subject: [PATCH 08/23] Logging changes and unit tests added Signed-off-by: Abhishek --- build/accelerate_launch.py | 18 ++---- tests/test_sft_trainer.py | 60 +++++++++++++++++++ tuning/config/configs.py | 2 +- tuning/sft_trainer.py | 57 ++++++++++++------ tuning/trackers/aimstack_tracker.py | 4 +- tuning/trackers/filelogging_tracker.py | 4 +- tuning/trackers/tracker_factory.py | 4 +- tuning/trainercontroller/callback.py | 4 +- .../controllermetrics/eval_metrics.py | 4 +- .../history_based_metrics.py | 4 +- .../operations/hfcontrols.py | 4 +- tuning/trainercontroller/patience.py | 4 +- tuning/utils/data_type_utils.py | 4 +- tuning/utils/preprocessing_utils.py | 4 +- 14 files changed, 116 insertions(+), 61 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 312e53e7e..1fc3eee68 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -77,15 +77,13 @@ def main(): or 'SFT_TRAINER_CONFIG_JSON_ENV_VAR'." ) - # Get log_level to be applied and Configure it. + # Configure log_level of python native logger. LOGLEVEL = None if "log_level" in job_config and job_config["log_level"]: LOGLEVEL = job_config["log_level"].upper() logging.basicConfig(level=LOGLEVEL) else: - LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") - ).upper() + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) args = process_accelerate_launch_args(job_config) logging.debug("accelerate launch parsed args: %s", args) @@ -117,14 +115,10 @@ def main(): updated_args = serialize_args(job_config) os.environ["SFT_TRAINER_CONFIG_JSON_ENV_VAR"] = updated_args - # LAUNCH COMMAND ALWAYS TAKES LOG LEVEL FROM ENV VAR EVEN THOUGH, \ - # CODE LOGIC IN SFTTRAINER.PY TELLS TO TAKE FROM CLI. - # HENCE ITS BETTER TO MODIFY ENV VAR HERE AND ASSIGN THE VALUE BASED ON CODE LOGIC HERE, - # THEREFORE APPLYING THIS LOG LEVEL TO WHOLE TUNING JOB AND PYTHON NATIVE LOGGING - if os.environ.get("TRANSFORMERS_VERBOSITY"): - os.environ["TRANSFORMERS_VERBOSITY"] = LOGLEVEL.lower() - else: - os.environ["LOG_LEVEL"] = LOGLEVEL + # Configure for Image to get log level as the code after + # launch_command only takes log level from env var LOG_LEVEL and not CLI + os.environ["LOG_LEVEL"] = LOGLEVEL + launch_command(args) except subprocess.CalledProcessError as e: # If the subprocess throws an exception, the base exception is hidden in the diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index b01c216c4..e17f06176 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -18,6 +18,7 @@ # Standard import copy import json +import logging import os import tempfile @@ -727,6 +728,7 @@ def test_run_with_good_experimental_metadata(): ) +<<<<<<< Updated upstream ### Tests for pretokenized data def test_pretokenized_dataset(): """Ensure that we can provide a pretokenized dataset with input/output format.""" @@ -780,3 +782,61 @@ def test_pretokenized_dataset_wrong_format(): # is essentially swallowing a KeyError here. with pytest.raises(ValueError): sft_trainer.train(MODEL_ARGS, data_args, train_args, PEFT_PT_ARGS) +======= +def test_set_log_level_for_logger_default(): + """ + Ensure that the correct log level is being set + for python native logger and transformers logger + """ + + # Set env var TRANSFORMERS_VERBOSITY as None and test + os.unsetenv("TRANSFORMERS_VERBOSITY") + os.unsetenv("LOG_LEVEL") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") + train_args = copy.deepcopy(TRAIN_ARGS) + + # TEST IF NO ENV VAR ARE SET AND NO CLI ARGUMENT IS PASSED + training_args, logger = sft_trainer.set_log_level(train_args) + assert logger.level == logging.WARNING + assert training_args.log_level == "warning" + + +def test_set_log_level_for_logger_with_env_var(): + """ + Ensure that the correct log level is being set + for python native logger and transformers logger + """ + + os.unsetenv("TRANSFORMERS_VERBOSITY") + os.unsetenv("LOG_LEVEL") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") + train_args = copy.deepcopy(TRAIN_ARGS) + + # TEST IF LOG_LEVEL ENV VAR IS SET AND NO CLI ARGUMENT IS PASSED + os.environ["LOG_LEVEL"] = "info" + training_args, logger = sft_trainer.set_log_level(train_args) + assert logger.level == logging.INFO + assert training_args.log_level == "info" + + +def test_set_log_level_for_logger_with_env_var_and_cli(): + """ + Ensure that the correct log level is being set + for python native logger and transformers logger + """ + + os.unsetenv("TRANSFORMERS_VERBOSITY") + os.unsetenv("LOG_LEVEL") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") + os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") + train_args = copy.deepcopy(TRAIN_ARGS) + + # TEST IF LOG_LEVEL ENV VAR IS SET AND --log_level CLI ARGUMENT IS PASSED + os.environ["LOG_LEVEL"] = "info" + train_args.log_level = "error" + training_args, logger = sft_trainer.set_log_level(train_args) + assert logger.level == logging.ERROR + assert training_args.log_level == "error" +>>>>>>> Stashed changes diff --git a/tuning/config/configs.py b/tuning/config/configs.py index 30d342b72..d4b6b5f1c 100644 --- a/tuning/config/configs.py +++ b/tuning/config/configs.py @@ -137,7 +137,7 @@ class TrainingArguments(transformers.TrainingArguments): }, ) log_level: str = field( - default="warning", + default="passive", metadata={ "help": "The log level to adopt during training. \ Possible values are 'debug', 'info', 'warning', 'error' and 'critical'" diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 15128b18f..598d1e65e 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -461,6 +461,43 @@ def parse_arguments(parser, json_config=None): ) +def set_log_level(parsed_training_args): + """Set log level of python native logger and TF logger via argument from CLI or env variable. + + Args: + parsed_training_args + Training arguments for training model. + """ + + # Clear any existing handlers if necessary + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Configure Python native logger log level + # If CLI arg is passed, assign same log level to python native logger + if parsed_training_args.log_level != "passive": + logging.basicConfig(level=parsed_training_args.log_level.upper()) + else: + # Assign value of either env var LOG_LEVEL or warning + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + logging.basicConfig(level=LOGLEVEL) + train_logger = logging.getLogger() + + # Check if env var TRANSFORMERS_VERBOSITY is not set. + # Else if env var is already set then, log level of transformers is automatically set. + if os.environ.get("TRANSFORMERS_VERBOSITY") is None: + + # Check if "--log_level" CLI argument is not used (passive/warning is the default log level) + if parsed_training_args.log_level == "passive": + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + + # Set log_level in TrainingArguments + parsed_training_args.log_level = LOGLEVEL.lower() + + train_logger = logging.getLogger() + return parsed_training_args, train_logger + + def main(**kwargs): # pylint: disable=unused-argument parser = get_parser() job_config = get_json_config() @@ -479,24 +516,8 @@ def main(**kwargs): # pylint: disable=unused-argument exp_metadata, ) = parse_arguments(parser, job_config) - # Configure log level: - # If log_level is "passive" (not set by cli), then check environment variable. - # First check TRANSFORMERS_VERBOSITY and then Fallback to check LOG_LEVEL. - # If both env var is not set, then assign "WARNING" as default. - if training_args.log_level == "passive": - LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") - ).upper() - - # If log_level is set by environment variable, assign the transformers log_level value - # along with log level value of python logger - # OR else set both as default value ("warning") - logging.basicConfig(level=LOGLEVEL) - training_args.log_level = LOGLEVEL.lower() - - # If log_level is set using cli argument, set same value to log level of python logger - else: - logging.basicConfig(level=training_args.log_level.upper()) + # Function to set log level for python native logger and transformers training logger + training_args, _ = set_log_level(training_args) logging.debug( "Input args parsed: \ diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 334b391c1..7c807b2ad 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -98,9 +98,7 @@ def __init__(self, tracker_config: AimConfig): """ super().__init__(name="aim", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") - ).upper() + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("aimstack_tracker") diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 7a5a5e591..55ad5edf7 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -81,9 +81,7 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): """ super().__init__(name="file_logger", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") - ).upper() + LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) self.logger = logging.getLogger("file_logging_tracker") diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index 73f58c641..df70a2ad4 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -25,9 +25,7 @@ from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("tracker_factory") diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index 4c59ac31d..f319acbb2 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -46,9 +46,7 @@ from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index c6615479d..79de526cb 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -24,9 +24,7 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index daabba06e..e434523f7 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -28,9 +28,7 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) METRICS_KEY = "metrics" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 12110e87a..3fcb14487 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -12,9 +12,7 @@ from .operation import Operation # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index 8203867b5..e396dc78f 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -33,9 +33,7 @@ MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 5a4509c69..40ec77538 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -21,9 +21,7 @@ import torch # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("data_utils") diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index 8fedd5f6c..062988578 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -28,9 +28,7 @@ from tuning.utils.data_utils import apply_custom_formatting_template # Configure log level -LOGLEVEL = os.environ.get( - "TRANSFORMERS_VERBOSITY", os.environ.get("LOG_LEVEL", "WARNING") -).upper() +LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger("sft_trainer_preprocessing") From 7fffda7cf25e4725e2efb210eac9fb81dbb92753 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 1 Aug 2024 15:31:53 -0400 Subject: [PATCH 09/23] Solved conflict with main Signed-off-by: Abhishek --- tests/test_sft_trainer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index e17f06176..4a98bf26c 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -728,7 +728,6 @@ def test_run_with_good_experimental_metadata(): ) -<<<<<<< Updated upstream ### Tests for pretokenized data def test_pretokenized_dataset(): """Ensure that we can provide a pretokenized dataset with input/output format.""" @@ -782,7 +781,6 @@ def test_pretokenized_dataset_wrong_format(): # is essentially swallowing a KeyError here. with pytest.raises(ValueError): sft_trainer.train(MODEL_ARGS, data_args, train_args, PEFT_PT_ARGS) -======= def test_set_log_level_for_logger_default(): """ Ensure that the correct log level is being set @@ -839,4 +837,3 @@ def test_set_log_level_for_logger_with_env_var_and_cli(): training_args, logger = sft_trainer.set_log_level(train_args) assert logger.level == logging.ERROR assert training_args.log_level == "error" ->>>>>>> Stashed changes From ba8a972254bb7bf639b91a129a0a3f66b5c6f492 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 1 Aug 2024 15:53:23 -0400 Subject: [PATCH 10/23] FMT:Fix Solved conflict with main Signed-off-by: Abhishek --- tests/test_sft_trainer.py | 2 ++ tuning/trainercontroller/callback.py | 7 ++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 4a98bf26c..63f2c83fd 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -781,6 +781,8 @@ def test_pretokenized_dataset_wrong_format(): # is essentially swallowing a KeyError here. with pytest.raises(ValueError): sft_trainer.train(MODEL_ARGS, data_args, train_args, PEFT_PT_ARGS) + + def test_set_log_level_for_logger_default(): """ Ensure that the correct log level is being set diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index f319acbb2..2c5ddfcdf 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -18,7 +18,6 @@ # Standard from typing import Dict, List, Union import inspect -import logging import os import re @@ -30,6 +29,7 @@ TrainerState, TrainingArguments, ) +from transformers.utils import logging import yaml # Local @@ -45,10 +45,7 @@ from tuning.trainercontroller.patience import PatienceControl from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator -# Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) -logger = logging.getLogger(__name__) +logger = logging.get_logger(__name__) # Configuration keys CONTROLLER_METRICS_KEY = "controller_metrics" From f1159f96e58cc4a72d31a212d3df4a75822e7b1e Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 1 Aug 2024 16:35:37 -0400 Subject: [PATCH 11/23] enabling tests for prompt tuning Signed-off-by: Abhishek --- tests/test_sft_trainer.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index b01c216c4..a52bc35fc 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -163,9 +163,6 @@ def test_parse_arguments_peft_method(job_config): ############################# Prompt Tuning Tests ############################# -@pytest.mark.skip( - reason="currently inference doesn't work with transformer version 4.42.4" -) def test_run_causallm_pt_and_inference(): """Check if we can bootstrap and peft tune causallm models""" with tempfile.TemporaryDirectory() as tempdir: @@ -196,9 +193,6 @@ def test_run_causallm_pt_and_inference(): assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference -@pytest.mark.skip( - reason="currently inference doesn't work with transformer version 4.42.4" -) def test_run_causallm_pt_and_inference_with_formatting_data(): """Check if we can bootstrap and peft tune causallm models This test needs the trainer to format data to a single sequence internally. @@ -237,9 +231,6 @@ def test_run_causallm_pt_and_inference_with_formatting_data(): assert "### Text: @NortonSupport Thanks much.\n\n### Label:" in output_inference -@pytest.mark.skip( - reason="currently inference doesn't work with transformer version 4.42.4" -) def test_run_causallm_pt_and_inference_JSON_file_formatter(): """Check if we can bootstrap and peft tune causallm models with JSON train file format""" with tempfile.TemporaryDirectory() as tempdir: From 8af579201c6370671697317c4819b84ebe2824d6 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Mon, 5 Aug 2024 15:41:42 -0400 Subject: [PATCH 12/23] PR changes for changing logger Signed-off-by: Abhishek --- build/accelerate_launch.py | 26 +++---- tuning/config/configs.py | 4 +- tuning/sft_trainer.py | 68 +++++++++---------- tuning/trackers/aimstack_tracker.py | 4 +- tuning/trackers/filelogging_tracker.py | 4 +- tuning/trackers/tracker_factory.py | 4 +- .../controllermetrics/eval_metrics.py | 2 - .../history_based_metrics.py | 2 - .../operations/hfcontrols.py | 2 - tuning/trainercontroller/patience.py | 2 - tuning/utils/data_type_utils.py | 4 +- tuning/utils/preprocessing_utils.py | 4 +- 12 files changed, 56 insertions(+), 70 deletions(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 1fc3eee68..0767af344 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -78,13 +78,20 @@ def main(): ) # Configure log_level of python native logger. - LOGLEVEL = None - if "log_level" in job_config and job_config["log_level"]: - LOGLEVEL = job_config["log_level"].upper() - logging.basicConfig(level=LOGLEVEL) - else: - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() - logging.basicConfig(level=LOGLEVEL) + log_level = job_config.get( + "log_level" + ) # this will be set to either the value found or None + if ( + not log_level + ): # if log level not set by job_config aka by JSON, set it via env var or set default + log_level = os.environ.get("LOG_LEVEL", "WARNING") + log_level = log_level.upper() + logging.basicConfig(level=log_level) + + # Configure for Image to get log level as the code after + # launch_command only takes log level from env var LOG_LEVEL and not CLI + os.environ["LOG_LEVEL"] = log_level + args = process_accelerate_launch_args(job_config) logging.debug("accelerate launch parsed args: %s", args) except FileNotFoundError as e: @@ -114,11 +121,6 @@ def main(): job_config["output_dir"] = tempdir updated_args = serialize_args(job_config) os.environ["SFT_TRAINER_CONFIG_JSON_ENV_VAR"] = updated_args - - # Configure for Image to get log level as the code after - # launch_command only takes log level from env var LOG_LEVEL and not CLI - os.environ["LOG_LEVEL"] = LOGLEVEL - launch_command(args) except subprocess.CalledProcessError as e: # If the subprocess throws an exception, the base exception is hidden in the diff --git a/tuning/config/configs.py b/tuning/config/configs.py index 4ccbf7086..69d94e72e 100644 --- a/tuning/config/configs.py +++ b/tuning/config/configs.py @@ -140,7 +140,9 @@ class TrainingArguments(transformers.TrainingArguments): default="passive", metadata={ "help": "The log level to adopt during training. \ - Possible values are 'debug', 'info', 'warning', 'error' and 'critical'" + 'passive' level which doesn't set anything and keeps the \ + current log level for the Transformers library (which will be 'warning` by default) \ + Other possible values are 'debug', 'info', 'warning', 'error' and 'critical'" }, ) diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 11a7cc4bc..acde18260 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -113,6 +113,8 @@ def train( fused_lora and fast_kernels must used together (may change in future). \ """ + train_args, logger = set_log_level(train_args, "sft_trainer_train") + # Validate parameters if (not isinstance(train_args.num_train_epochs, (float, int))) or ( train_args.num_train_epochs <= 0 @@ -232,9 +234,9 @@ def train( ) max_seq_length = min(train_args.max_seq_length, tokenizer.model_max_length) - logging.info("Max sequence length is %s", max_seq_length) + logger.info("Max sequence length is %s", max_seq_length) if train_args.max_seq_length > tokenizer.model_max_length: - logging.warning( + logger.warning( "max_seq_length %s exceeds tokenizer.model_max_length \ %s, using tokenizer.model_max_length %s", train_args.max_seq_length, @@ -270,11 +272,11 @@ def train( # Configure the collator and validate args related to packing prior to formatting the dataset if train_args.packing: - logging.info("Packing is set to True") + logger.info("Packing is set to True") data_collator = None packing = True else: - logging.info("Packing is set to False") + logger.info("Packing is set to False") packing = False # Validate if data args are set properly @@ -339,7 +341,7 @@ def train( tracker.track(metric=v, name=k, stage="additional_metrics") tracker.set_params(params=exp_metadata, name="experiment_metadata") except ValueError as e: - logging.error( + logger.error( "Exception while saving additional metrics and metadata %s", repr(e), ) @@ -477,7 +479,7 @@ def parse_arguments(parser, json_config=None): ) -def set_log_level(parsed_training_args): +def set_log_level(parsed_training_args, logger_name=__name__): """Set log level of python native logger and TF logger via argument from CLI or env variable. Args: @@ -489,28 +491,25 @@ def set_log_level(parsed_training_args): for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) - # Configure Python native logger log level + # Configure Python native logger and transformers log level # If CLI arg is passed, assign same log level to python native logger + log_level = "WARNING" if parsed_training_args.log_level != "passive": - logging.basicConfig(level=parsed_training_args.log_level.upper()) - else: - # Assign value of either env var LOG_LEVEL or warning - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() - logging.basicConfig(level=LOGLEVEL) - train_logger = logging.getLogger() - - # Check if env var TRANSFORMERS_VERBOSITY is not set. - # Else if env var is already set then, log level of transformers is automatically set. - if os.environ.get("TRANSFORMERS_VERBOSITY") is None: - - # Check if "--log_level" CLI argument is not used (passive/warning is the default log level) - if parsed_training_args.log_level == "passive": - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() + log_level = parsed_training_args.log_level + + # If CLI arg not is passed and env var LOG_LEVEL is set, + # assign same log level to both logger + elif os.environ.get("LOG_LEVEL"): # AND parsed_training_args.log_level == "passive" + log_level = os.environ.get("LOG_LEVEL") + parsed_training_args.log_level = ( + log_level.lower() + if not os.environ.get("TRANSFORMERS_VERBOSITY") + else os.environ.get("TRANSFORMERS_VERBOSITY") + ) - # Set log_level in TrainingArguments - parsed_training_args.log_level = LOGLEVEL.lower() + logging.basicConfig(level=log_level.upper()) - train_logger = logging.getLogger() + train_logger = logging.getLogger(logger_name) return parsed_training_args, train_logger @@ -533,9 +532,10 @@ def main(**kwargs): # pylint: disable=unused-argument ) = parse_arguments(parser, job_config) # Function to set log level for python native logger and transformers training logger - training_args, _ = set_log_level(training_args) + # training_args, logger = set_log_level(training_args, "sft_trainer_main") + training_args, logger = set_log_level(training_args) - logging.debug( + logger.debug( "Input args parsed: \ model_args %s, data_args %s, training_args %s, trainer_controller_args %s, \ tune_config %s, file_logger_config, %s aim_config %s, \ @@ -553,7 +553,7 @@ def main(**kwargs): # pylint: disable=unused-argument exp_metadata, ) except Exception as e: # pylint: disable=broad-except - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log( f"Exception raised during training. This may be a problem with your input: {e}" ) @@ -565,12 +565,12 @@ def main(**kwargs): # pylint: disable=unused-argument try: metadata = json.loads(exp_metadata) if metadata is None or not isinstance(metadata, Dict): - logging.warning( + logger.warning( "metadata cannot be converted to simple k:v dict ignoring" ) metadata = None except ValueError as e: - logging.error( + logger.error( "failed while parsing extra metadata. pass a valid json %s", repr(e) ) @@ -593,27 +593,27 @@ def main(**kwargs): # pylint: disable=unused-argument fusedops_kernels_config=fusedops_kernels_config, ) except (MemoryError, OutOfMemoryError) as e: - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log(f"OOM error during training. {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) except FileNotFoundError as e: - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log("Unable to load file: {}".format(e)) sys.exit(USER_ERROR_EXIT_CODE) except HFValidationError as e: - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log( f"There may be a problem with loading the model. Exception: {e}" ) sys.exit(USER_ERROR_EXIT_CODE) except (TypeError, ValueError, EnvironmentError) as e: - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log( f"Exception raised during training. This may be a problem with your input: {e}" ) sys.exit(USER_ERROR_EXIT_CODE) except Exception as e: # pylint: disable=broad-except - logging.error(traceback.format_exc()) + logger.error(traceback.format_exc()) write_termination_log(f"Unhandled exception during training: {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 7c807b2ad..3ee7673b1 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -98,9 +98,7 @@ def __init__(self, tracker_config: AimConfig): """ super().__init__(name="aim", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() - logging.basicConfig(level=LOGLEVEL) - self.logger = logging.getLogger("aimstack_tracker") + self.logger = logging.getLogger(__name__) def get_hf_callback(self): """Returns the aim.hugging_face.AimCallback object associated with this tracker. diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 55ad5edf7..c76f0dea0 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -81,9 +81,7 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): """ super().__init__(name="file_logger", tracker_config=tracker_config) # Configure log level - LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() - logging.basicConfig(level=LOGLEVEL) - self.logger = logging.getLogger("file_logging_tracker") + self.logger = logging.getLogger(__name__) def get_hf_callback(self): """Returns the FileLoggingCallback object associated with this tracker. diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index df70a2ad4..2cbc3743d 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -25,9 +25,7 @@ from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) -logger = logging.getLogger("tracker_factory") +logger = logging.getLogger(__name__) # Information about all registered trackers diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 79de526cb..54ad467ae 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -24,8 +24,6 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index e434523f7..a66396b5d 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -28,8 +28,6 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) METRICS_KEY = "metrics" LOG_LOSS_KEY = "loss" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 3fcb14487..1da5fef9d 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -12,8 +12,6 @@ from .operation import Operation # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index e396dc78f..a3d0a01bb 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -33,8 +33,6 @@ MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) logger = logging.getLogger(__name__) diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 40ec77538..a76e9ab81 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -21,9 +21,7 @@ import torch # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) -logger = logging.getLogger("data_utils") +logger = logging.getLogger(__name__) def str_to_torch_dtype(dtype_str: str) -> torch.dtype: diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index e40c6ce66..4982d6c9a 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -29,9 +29,7 @@ from tuning.utils.data_utils import apply_custom_formatting_template # Configure log level -LOGLEVEL = os.environ.get("LOG_LEVEL", "WARNING").upper() -logging.basicConfig(level=LOGLEVEL) -logger = logging.getLogger("sft_trainer_preprocessing") +logger = logging.getLogger(__name__) # In future we may make the fields configurable JSON_INPUT_KEY = "input" From ba489b56bdac9e4faff1cb89c1c6a5ee41a3cddf Mon Sep 17 00:00:00 2001 From: Abhishek Date: Mon, 5 Aug 2024 17:31:05 -0400 Subject: [PATCH 13/23] Unit Tests changes Signed-off-by: Abhishek --- tests/test_sft_trainer.py | 13 +++++++------ tuning/sft_trainer.py | 1 - tuning/trackers/tracker_factory.py | 1 - .../controllermetrics/eval_metrics.py | 1 - .../controllermetrics/history_based_metrics.py | 1 - tuning/trainercontroller/operations/hfcontrols.py | 1 - tuning/trainercontroller/patience.py | 1 - tuning/utils/data_type_utils.py | 1 - tuning/utils/preprocessing_utils.py | 1 - 9 files changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index c960b4f82..94b6887ea 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -826,8 +826,8 @@ def test_set_log_level_for_logger_default(): # TEST IF NO ENV VAR ARE SET AND NO CLI ARGUMENT IS PASSED training_args, logger = sft_trainer.set_log_level(train_args) - assert logger.level == logging.WARNING - assert training_args.log_level == "warning" + assert logger.getEffectiveLevel() == logging.WARNING + assert training_args.log_level in ["passive", "warning"] def test_set_log_level_for_logger_with_env_var(): @@ -840,12 +840,13 @@ def test_set_log_level_for_logger_with_env_var(): os.unsetenv("LOG_LEVEL") os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") - train_args = copy.deepcopy(TRAIN_ARGS) + train_args_env = copy.deepcopy(TRAIN_ARGS) # TEST IF LOG_LEVEL ENV VAR IS SET AND NO CLI ARGUMENT IS PASSED os.environ["LOG_LEVEL"] = "info" - training_args, logger = sft_trainer.set_log_level(train_args) - assert logger.level == logging.INFO + train_args_env.log_level = "passive" # Default + training_args, logger = sft_trainer.set_log_level(train_args_env) + assert logger.getEffectiveLevel() == logging.INFO assert training_args.log_level == "info" @@ -865,5 +866,5 @@ def test_set_log_level_for_logger_with_env_var_and_cli(): os.environ["LOG_LEVEL"] = "info" train_args.log_level = "error" training_args, logger = sft_trainer.set_log_level(train_args) - assert logger.level == logging.ERROR + assert logger.getEffectiveLevel() == logging.ERROR assert training_args.log_level == "error" diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index acde18260..9e8136063 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -532,7 +532,6 @@ def main(**kwargs): # pylint: disable=unused-argument ) = parse_arguments(parser, job_config) # Function to set log level for python native logger and transformers training logger - # training_args, logger = set_log_level(training_args, "sft_trainer_main") training_args, logger = set_log_level(training_args) logger.debug( diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index 2cbc3743d..9e140b784 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -15,7 +15,6 @@ # Standard import dataclasses import logging -import os # Third Party from transformers.utils.import_utils import _is_package_available diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 54ad467ae..03f44de5f 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -18,7 +18,6 @@ # Standard from typing import Any import logging -import os # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index a66396b5d..682223fbf 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -19,7 +19,6 @@ from collections import deque from typing import Any import logging -import os # Third Party from transformers import TrainerState diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 1da5fef9d..f14342023 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -2,7 +2,6 @@ from dataclasses import fields import inspect import logging -import os import re # Third Party diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index a3d0a01bb..b14b75e7e 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -17,7 +17,6 @@ # Standard import logging -import os # Resets the patience if the rule outcome happens to be false. # Here, the expectation is to have unbroken "True"s for patience diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index a76e9ab81..0012fc89c 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -15,7 +15,6 @@ # Standard from typing import Union import logging -import os # Third Party import torch diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index 4982d6c9a..ca4618493 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -15,7 +15,6 @@ from typing import Any, Callable, Dict, Optional, Union import json import logging -import os # Third Party from datasets import Dataset, IterableDataset From dc9a52147c32b7d4bef85b9bc9e56b7a849433e0 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Mon, 5 Aug 2024 20:10:59 -0400 Subject: [PATCH 14/23] commented os.environ[LOG_LEVEL] in accelerate.py for testing Signed-off-by: Abhishek --- build/accelerate_launch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 0767af344..99abb8e0c 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -90,7 +90,7 @@ def main(): # Configure for Image to get log level as the code after # launch_command only takes log level from env var LOG_LEVEL and not CLI - os.environ["LOG_LEVEL"] = log_level + # os.environ["LOG_LEVEL"] = log_level args = process_accelerate_launch_args(job_config) logging.debug("accelerate launch parsed args: %s", args) From cfeb709f3d04a430a9c4bb7ea0fdca19b0a1884c Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 6 Aug 2024 16:43:09 -0400 Subject: [PATCH 15/23] PR changes Signed-off-by: Abhishek --- build/accelerate_launch.py | 4 - tests/test_sft_trainer.py | 119 +++++++++--------- tuning/config/configs.py | 2 +- tuning/sft_trainer.py | 39 +----- tuning/trackers/aimstack_tracker.py | 4 +- tuning/trackers/filelogging_tracker.py | 4 +- tuning/trackers/tracker_factory.py | 14 +-- .../controllermetrics/eval_metrics.py | 4 - .../history_based_metrics.py | 2 - .../operations/hfcontrols.py | 5 +- tuning/trainercontroller/patience.py | 7 +- tuning/utils/data_type_utils.py | 5 +- tuning/utils/logging.py | 45 +++++++ tuning/utils/preprocessing_utils.py | 7 +- 14 files changed, 123 insertions(+), 138 deletions(-) create mode 100644 tuning/utils/logging.py diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 99abb8e0c..526130b36 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -88,10 +88,6 @@ def main(): log_level = log_level.upper() logging.basicConfig(level=log_level) - # Configure for Image to get log level as the code after - # launch_command only takes log level from env var LOG_LEVEL and not CLI - # os.environ["LOG_LEVEL"] = log_level - args = process_accelerate_launch_args(job_config) logging.debug("accelerate launch parsed args: %s", args) except FileNotFoundError as e: diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 94b6887ea..f811c6bb2 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -16,6 +16,7 @@ """ # Standard +from unittest import mock import copy import json import logging @@ -44,6 +45,7 @@ # Local from tuning import sft_trainer from tuning.config import configs, peft_config +from tuning.utils.logging import set_log_level MODEL_ARGS = configs.ModelArguments( model_name_or_path=MODEL_NAME, use_flash_attn=False, torch_dtype="float32" @@ -78,6 +80,64 @@ PEFT_LORA_ARGS = peft_config.LoraConfig(r=8, lora_alpha=32, lora_dropout=0.05) +@mock.patch.dict(os.environ, {}, clear=True) +def test_set_log_level_for_logger_default(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when no env var or CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.WARNING + assert training_args.log_level == "passive" + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_set_log_level_for_logger_with_env_var(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when env var LOG_LEVEL is used + """ + + train_args_env = copy.deepcopy(TRAIN_ARGS) + os.environ["LOG_LEVEL"] = "info" + training_args, logger = set_log_level(train_args_env) + assert logger.getEffectiveLevel() == logging.INFO + assert training_args.log_level == "info" + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_set_log_level_for_logger_with_set_verbosity_and_cli(): + """ + Ensure that the correct log level is being set for python native logger and + log_level of transformers logger is unchanged when env var TRANSFORMERS_VERBOSITY is used + and CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + os.environ["TRANSFORMERS_VERBOSITY"] = "info" + train_args.log_level = "error" + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.ERROR + assert training_args.log_level == "error" + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_set_log_level_for_logger_with_env_var_and_cli(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when env var LOG_LEVEL is used and CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + os.environ["LOG_LEVEL"] = "info" + train_args.log_level = "error" + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.ERROR + assert training_args.log_level == "error" + + def test_run_train_requires_output_dir(): """Check fails when output dir not provided.""" updated_output_dir_train_args = copy.deepcopy(TRAIN_ARGS) @@ -809,62 +869,3 @@ def test_pretokenized_dataset_wrong_format(): # is essentially swallowing a KeyError here. with pytest.raises(ValueError): sft_trainer.train(MODEL_ARGS, data_args, train_args, PEFT_PT_ARGS) - - -def test_set_log_level_for_logger_default(): - """ - Ensure that the correct log level is being set - for python native logger and transformers logger - """ - - # Set env var TRANSFORMERS_VERBOSITY as None and test - os.unsetenv("TRANSFORMERS_VERBOSITY") - os.unsetenv("LOG_LEVEL") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") - train_args = copy.deepcopy(TRAIN_ARGS) - - # TEST IF NO ENV VAR ARE SET AND NO CLI ARGUMENT IS PASSED - training_args, logger = sft_trainer.set_log_level(train_args) - assert logger.getEffectiveLevel() == logging.WARNING - assert training_args.log_level in ["passive", "warning"] - - -def test_set_log_level_for_logger_with_env_var(): - """ - Ensure that the correct log level is being set - for python native logger and transformers logger - """ - - os.unsetenv("TRANSFORMERS_VERBOSITY") - os.unsetenv("LOG_LEVEL") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") - train_args_env = copy.deepcopy(TRAIN_ARGS) - - # TEST IF LOG_LEVEL ENV VAR IS SET AND NO CLI ARGUMENT IS PASSED - os.environ["LOG_LEVEL"] = "info" - train_args_env.log_level = "passive" # Default - training_args, logger = sft_trainer.set_log_level(train_args_env) - assert logger.getEffectiveLevel() == logging.INFO - assert training_args.log_level == "info" - - -def test_set_log_level_for_logger_with_env_var_and_cli(): - """ - Ensure that the correct log level is being set - for python native logger and transformers logger - """ - - os.unsetenv("TRANSFORMERS_VERBOSITY") - os.unsetenv("LOG_LEVEL") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_ENV_VAR") - os.unsetenv("SFT_TRAINER_CONFIG_JSON_PATH") - train_args = copy.deepcopy(TRAIN_ARGS) - - # TEST IF LOG_LEVEL ENV VAR IS SET AND --log_level CLI ARGUMENT IS PASSED - os.environ["LOG_LEVEL"] = "info" - train_args.log_level = "error" - training_args, logger = sft_trainer.set_log_level(train_args) - assert logger.getEffectiveLevel() == logging.ERROR - assert training_args.log_level == "error" diff --git a/tuning/config/configs.py b/tuning/config/configs.py index 69d94e72e..2990ef801 100644 --- a/tuning/config/configs.py +++ b/tuning/config/configs.py @@ -140,7 +140,7 @@ class TrainingArguments(transformers.TrainingArguments): default="passive", metadata={ "help": "The log level to adopt during training. \ - 'passive' level which doesn't set anything and keeps the \ + By default, 'passive' level is set which keeps the \ current log level for the Transformers library (which will be 'warning` by default) \ Other possible values are 'debug', 'info', 'warning', 'error' and 'critical'" }, diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 9e8136063..2c161987f 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -16,8 +16,6 @@ from typing import Dict, List, Optional, Union import dataclasses import json -import logging -import os import sys import time import traceback @@ -62,6 +60,7 @@ USER_ERROR_EXIT_CODE, write_termination_log, ) +from tuning.utils.logging import set_log_level from tuning.utils.preprocessing_utils import ( format_dataset, get_data_collator, @@ -479,40 +478,6 @@ def parse_arguments(parser, json_config=None): ) -def set_log_level(parsed_training_args, logger_name=__name__): - """Set log level of python native logger and TF logger via argument from CLI or env variable. - - Args: - parsed_training_args - Training arguments for training model. - """ - - # Clear any existing handlers if necessary - for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - - # Configure Python native logger and transformers log level - # If CLI arg is passed, assign same log level to python native logger - log_level = "WARNING" - if parsed_training_args.log_level != "passive": - log_level = parsed_training_args.log_level - - # If CLI arg not is passed and env var LOG_LEVEL is set, - # assign same log level to both logger - elif os.environ.get("LOG_LEVEL"): # AND parsed_training_args.log_level == "passive" - log_level = os.environ.get("LOG_LEVEL") - parsed_training_args.log_level = ( - log_level.lower() - if not os.environ.get("TRANSFORMERS_VERBOSITY") - else os.environ.get("TRANSFORMERS_VERBOSITY") - ) - - logging.basicConfig(level=log_level.upper()) - - train_logger = logging.getLogger(logger_name) - return parsed_training_args, train_logger - - def main(**kwargs): # pylint: disable=unused-argument parser = get_parser() job_config = get_json_config() @@ -532,7 +497,7 @@ def main(**kwargs): # pylint: disable=unused-argument ) = parse_arguments(parser, job_config) # Function to set log level for python native logger and transformers training logger - training_args, logger = set_log_level(training_args) + training_args, logger = set_log_level(training_args, __name__) logger.debug( "Input args parsed: \ diff --git a/tuning/trackers/aimstack_tracker.py b/tuning/trackers/aimstack_tracker.py index 3ee7673b1..b0fc9652d 100644 --- a/tuning/trackers/aimstack_tracker.py +++ b/tuning/trackers/aimstack_tracker.py @@ -97,8 +97,8 @@ def __init__(self, tracker_config: AimConfig): information about the repo or the server and port where aim db is present. """ super().__init__(name="aim", tracker_config=tracker_config) - # Configure log level - self.logger = logging.getLogger(__name__) + # Get logger with root log level + self.logger = logging.getLogger() def get_hf_callback(self): """Returns the aim.hugging_face.AimCallback object associated with this tracker. diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index c76f0dea0..133687866 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -80,8 +80,8 @@ def __init__(self, tracker_config: FileLoggingTrackerConfig): which contains the location of file where logs are recorded. """ super().__init__(name="file_logger", tracker_config=tracker_config) - # Configure log level - self.logger = logging.getLogger(__name__) + # Get logger with root log level + self.logger = logging.getLogger() def get_hf_callback(self): """Returns the FileLoggingCallback object associated with this tracker. diff --git a/tuning/trackers/tracker_factory.py b/tuning/trackers/tracker_factory.py index 9e140b784..096099306 100644 --- a/tuning/trackers/tracker_factory.py +++ b/tuning/trackers/tracker_factory.py @@ -23,10 +23,6 @@ from .filelogging_tracker import FileLoggingTracker from tuning.config.tracker_configs import FileLoggingTrackerConfig, TrackerConfigFactory -# Configure log level -logger = logging.getLogger(__name__) - - # Information about all registered trackers AIMSTACK_TRACKER = "aim" FILE_LOGGING_TRACKER = "file_logger" @@ -55,9 +51,9 @@ def _register_aim_tracker(): AimTracker = _get_tracker_class(AimStackTracker, AimConfig) REGISTERED_TRACKERS[AIMSTACK_TRACKER] = AimTracker - logger.info("Registered aimstack tracker") + logging.info("Registered aimstack tracker") else: - logger.info( + logging.info( "Not registering Aimstack tracker due to unavailablity of package.\n" "Please install aim if you intend to use it.\n" "\t pip install aim" @@ -73,14 +69,14 @@ def _is_tracker_installed(name): def _register_file_logging_tracker(): FileTracker = _get_tracker_class(FileLoggingTracker, FileLoggingTrackerConfig) REGISTERED_TRACKERS[FILE_LOGGING_TRACKER] = FileTracker - logger.info("Registered file logging tracker") + logging.info("Registered file logging tracker") # List of Available Trackers # file_logger - Logs loss to a file # aim - Aimstack Tracker def _register_trackers(): - logger.info("Registering trackers") + logging.info("Registering trackers") if AIMSTACK_TRACKER not in REGISTERED_TRACKERS: _register_aim_tracker() if FILE_LOGGING_TRACKER not in REGISTERED_TRACKERS: @@ -143,7 +139,7 @@ def get_tracker(name: str, tracker_configs: TrackerConfigFactory): e = "Requested Tracker {} not found. List trackers available for use is - {} ".format( name, available ) - logger.error(e) + logging.error(e) raise ValueError(e) meta = REGISTERED_TRACKERS[name] diff --git a/tuning/trainercontroller/controllermetrics/eval_metrics.py b/tuning/trainercontroller/controllermetrics/eval_metrics.py index 03f44de5f..a87772674 100644 --- a/tuning/trainercontroller/controllermetrics/eval_metrics.py +++ b/tuning/trainercontroller/controllermetrics/eval_metrics.py @@ -17,14 +17,10 @@ # Standard from typing import Any -import logging # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler -# Configure log level -logger = logging.getLogger(__name__) - class EvalMetrics(MetricHandler): """Implements the controller metric which exposes the evaluation metrics""" diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index 682223fbf..afb0e0cac 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -18,7 +18,6 @@ # Standard from collections import deque from typing import Any -import logging # Third Party from transformers import TrainerState @@ -27,7 +26,6 @@ from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler # Configure log level -logger = logging.getLogger(__name__) METRICS_KEY = "metrics" LOG_LOSS_KEY = "loss" TRAINING_LOSS_KEY = "training_loss" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index f14342023..0548b4c12 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -10,9 +10,6 @@ # Local from .operation import Operation -# Configure log level -logger = logging.getLogger(__name__) - class HFControls(Operation): """Implements the control actions for the HuggingFace controls in @@ -40,7 +37,7 @@ def control_action(self, control: TrainerControl, **kwargs): control: TrainerControl. Data class for controls. kwargs: List of arguments (key, value)-pairs """ - logger.debug("Arguments passed to control_action: %s", repr(kwargs)) + logging.debug("Arguments passed to control_action: %s", repr(kwargs)) frame_info = inspect.currentframe().f_back arg_values = inspect.getargvalues(frame_info) setattr(control, arg_values.locals["action"], True) diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index b14b75e7e..ecdb0699a 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -31,9 +31,6 @@ # will be exceeded afer the fifth event. MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" -# Configure log level -logger = logging.getLogger(__name__) - class PatienceControl: """Implements the patience control for every rule""" @@ -52,7 +49,7 @@ def should_tolerate( elif self._mode == MODE_RESET_ON_FAILURE: self._patience_counter = 0 if self._patience_counter <= self._patience_threshold: - logger.debug( + logging.debug( "Control {} triggered on event {}: " "Enforcing patience [patience_counter = {:.2f}, " "patience_threshold = {:.2f}]".format( @@ -63,7 +60,7 @@ def should_tolerate( ) ) return True - logger.debug( + logging.debug( "Control {} triggered on event {}: " "Exceeded patience [patience_counter = {:.2f}, " "patience_threshold = {:.2f}]".format( diff --git a/tuning/utils/data_type_utils.py b/tuning/utils/data_type_utils.py index 0012fc89c..52bae6d77 100644 --- a/tuning/utils/data_type_utils.py +++ b/tuning/utils/data_type_utils.py @@ -19,9 +19,6 @@ # Third Party import torch -# Configure log level -logger = logging.getLogger(__name__) - def str_to_torch_dtype(dtype_str: str) -> torch.dtype: """Given a string representation of a Torch data type, convert it to the actual torch dtype. @@ -36,7 +33,7 @@ def str_to_torch_dtype(dtype_str: str) -> torch.dtype: """ dt = getattr(torch, dtype_str, None) if not isinstance(dt, torch.dtype): - logger.error(" ValueError: Unrecognized data type of a torch.Tensor") + logging.error(" ValueError: Unrecognized data type of a torch.Tensor") raise ValueError("Unrecognized data type of a torch.Tensor") return dt diff --git a/tuning/utils/logging.py b/tuning/utils/logging.py new file mode 100644 index 000000000..efeb1a338 --- /dev/null +++ b/tuning/utils/logging.py @@ -0,0 +1,45 @@ +# Standard +import os +import logging + +def set_log_level(train_args, logger_name=None): + """Set log level of python native logger and TF logger via argument from CLI or env variable. + + Args: + train_args + Training arguments for training model. + + Returns: + train_args + Updated training arguments for training model. + train_logger + Logger with updated effective log level + """ + + # Clear any existing handlers if necessary + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Configure Python native logger and transformers log level + # If CLI arg is passed, assign same log level to python native logger + log_level = "WARNING" + if train_args.log_level != "passive": + log_level = train_args.log_level + + # If CLI arg not is passed and env var LOG_LEVEL is set, + # assign same log level to both logger + elif os.environ.get("LOG_LEVEL"): + log_level = os.environ.get("LOG_LEVEL") + train_args.log_level = ( + log_level.lower() + if not os.environ.get("TRANSFORMERS_VERBOSITY") + else os.environ.get("TRANSFORMERS_VERBOSITY") + ) + + logging.basicConfig(level=log_level.upper()) + + if logger_name: + train_logger = logging.getLogger(logger_name) + else: + train_logger = logging.getLogger() + return train_args, train_logger diff --git a/tuning/utils/preprocessing_utils.py b/tuning/utils/preprocessing_utils.py index ca4618493..68b2755d8 100644 --- a/tuning/utils/preprocessing_utils.py +++ b/tuning/utils/preprocessing_utils.py @@ -27,9 +27,6 @@ from tuning.config import configs from tuning.utils.data_utils import apply_custom_formatting_template -# Configure log level -logger = logging.getLogger(__name__) - # In future we may make the fields configurable JSON_INPUT_KEY = "input" JSON_OUTPUT_KEY = "output" @@ -221,7 +218,7 @@ def format_dataset( tokenizer, data_args.data_formatter_template, ) - logger.info("Training dataset length is %s", len(train_dataset)) + logging.info("Training dataset length is %s", len(train_dataset)) if data_args.validation_data_path: (eval_dataset) = get_formatted_dataset_with_single_sequence( data_args.validation_data_path, @@ -229,7 +226,7 @@ def format_dataset( tokenizer, data_args.data_formatter_template, ) - logger.info("Validation dataset length is %s", len(eval_dataset)) + logging.info("Validation dataset length is %s", len(eval_dataset)) else: # This is for JSON containing input/output fields train_dataset = get_preprocessed_dataset( From bf36b36dac859747e0e04b01c9264d4212b49022 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 6 Aug 2024 16:50:33 -0400 Subject: [PATCH 16/23] FIX:FMT Signed-off-by: Abhishek --- tuning/utils/logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tuning/utils/logging.py b/tuning/utils/logging.py index efeb1a338..819a065cc 100644 --- a/tuning/utils/logging.py +++ b/tuning/utils/logging.py @@ -1,6 +1,7 @@ # Standard -import os import logging +import os + def set_log_level(train_args, logger_name=None): """Set log level of python native logger and TF logger via argument from CLI or env variable. From fe4b6d5fb98d5733e6c3c5815291361e18dac851 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 6 Aug 2024 17:39:04 -0400 Subject: [PATCH 17/23] PR Changes Signed-off-by: Abhishek --- tests/test_sft_trainer.py | 61 ------------ tests/utils/test_logging.py | 97 +++++++++++++++++++ .../history_based_metrics.py | 1 - 3 files changed, 97 insertions(+), 62 deletions(-) create mode 100644 tests/utils/test_logging.py diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index f811c6bb2..26067124a 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -16,10 +16,8 @@ """ # Standard -from unittest import mock import copy import json -import logging import os import tempfile @@ -45,7 +43,6 @@ # Local from tuning import sft_trainer from tuning.config import configs, peft_config -from tuning.utils.logging import set_log_level MODEL_ARGS = configs.ModelArguments( model_name_or_path=MODEL_NAME, use_flash_attn=False, torch_dtype="float32" @@ -80,64 +77,6 @@ PEFT_LORA_ARGS = peft_config.LoraConfig(r=8, lora_alpha=32, lora_dropout=0.05) -@mock.patch.dict(os.environ, {}, clear=True) -def test_set_log_level_for_logger_default(): - """ - Ensure that the correct log level is being set for python native logger and - transformers logger when no env var or CLI flag is passed - """ - - train_args = copy.deepcopy(TRAIN_ARGS) - training_args, logger = set_log_level(train_args) - assert logger.getEffectiveLevel() == logging.WARNING - assert training_args.log_level == "passive" - - -@mock.patch.dict(os.environ, {}, clear=True) -def test_set_log_level_for_logger_with_env_var(): - """ - Ensure that the correct log level is being set for python native logger and - transformers logger when env var LOG_LEVEL is used - """ - - train_args_env = copy.deepcopy(TRAIN_ARGS) - os.environ["LOG_LEVEL"] = "info" - training_args, logger = set_log_level(train_args_env) - assert logger.getEffectiveLevel() == logging.INFO - assert training_args.log_level == "info" - - -@mock.patch.dict(os.environ, {}, clear=True) -def test_set_log_level_for_logger_with_set_verbosity_and_cli(): - """ - Ensure that the correct log level is being set for python native logger and - log_level of transformers logger is unchanged when env var TRANSFORMERS_VERBOSITY is used - and CLI flag is passed - """ - - train_args = copy.deepcopy(TRAIN_ARGS) - os.environ["TRANSFORMERS_VERBOSITY"] = "info" - train_args.log_level = "error" - training_args, logger = set_log_level(train_args) - assert logger.getEffectiveLevel() == logging.ERROR - assert training_args.log_level == "error" - - -@mock.patch.dict(os.environ, {}, clear=True) -def test_set_log_level_for_logger_with_env_var_and_cli(): - """ - Ensure that the correct log level is being set for python native logger and - transformers logger when env var LOG_LEVEL is used and CLI flag is passed - """ - - train_args = copy.deepcopy(TRAIN_ARGS) - os.environ["LOG_LEVEL"] = "info" - train_args.log_level = "error" - training_args, logger = set_log_level(train_args) - assert logger.getEffectiveLevel() == logging.ERROR - assert training_args.log_level == "error" - - def test_run_train_requires_output_dir(): """Check fails when output dir not provided.""" updated_output_dir_train_args = copy.deepcopy(TRAIN_ARGS) diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py new file mode 100644 index 000000000..a70047960 --- /dev/null +++ b/tests/utils/test_logging.py @@ -0,0 +1,97 @@ +# Copyright The FMS HF Tuning Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# SPDX-License-Identifier: Apache-2.0 +# https://spdx.dev/learn/handling-license-info/ + +# Standard +from unittest import mock +import logging +import os +import copy + +# Local +from tuning.config import configs +from tuning.utils.logging import set_log_level + +TRAIN_ARGS = configs.TrainingArguments( + num_train_epochs=5, + per_device_train_batch_size=4, + per_device_eval_batch_size=4, + gradient_accumulation_steps=4, + learning_rate=0.00001, + weight_decay=0, + warmup_ratio=0.03, + lr_scheduler_type="cosine", + logging_steps=1, + include_tokens_per_second=True, + packing=False, + max_seq_length=4096, + save_strategy="epoch", + output_dir="tmp", +) + +@mock.patch.dict(os.environ, {}, clear=True) +def test_set_log_level_for_logger_default(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when no env var or CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.WARNING + assert training_args.log_level == "passive" + + +@mock.patch.dict(os.environ, {"LOG_LEVEL": "info"}, clear=True) +def test_set_log_level_for_logger_with_env_var(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when env var LOG_LEVEL is used + """ + + train_args_env = copy.deepcopy(TRAIN_ARGS) + training_args, logger = set_log_level(train_args_env) + assert logger.getEffectiveLevel() == logging.INFO + assert training_args.log_level == "info" + + +@mock.patch.dict(os.environ, {"TRANSFORMERS_VERBOSITY": "info"}, clear=True) +def test_set_log_level_for_logger_with_set_verbosity_and_cli(): + """ + Ensure that the correct log level is being set for python native logger and + log_level of transformers logger is unchanged when env var TRANSFORMERS_VERBOSITY is used + and CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + train_args.log_level = "error" + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.ERROR + assert training_args.log_level == "error" + + +@mock.patch.dict(os.environ, {"LOG_LEVEL": "info"}, clear=True) +def test_set_log_level_for_logger_with_env_var_and_cli(): + """ + Ensure that the correct log level is being set for python native logger and + transformers logger when env var LOG_LEVEL is used and CLI flag is passed + """ + + train_args = copy.deepcopy(TRAIN_ARGS) + train_args.log_level = "error" + training_args, logger = set_log_level(train_args) + assert logger.getEffectiveLevel() == logging.ERROR + assert training_args.log_level == "error" diff --git a/tuning/trainercontroller/controllermetrics/history_based_metrics.py b/tuning/trainercontroller/controllermetrics/history_based_metrics.py index afb0e0cac..f66d634e5 100644 --- a/tuning/trainercontroller/controllermetrics/history_based_metrics.py +++ b/tuning/trainercontroller/controllermetrics/history_based_metrics.py @@ -25,7 +25,6 @@ # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler -# Configure log level METRICS_KEY = "metrics" LOG_LOSS_KEY = "loss" TRAINING_LOSS_KEY = "training_loss" From c544f4769612d17b200b70b2186ac2e1419adf74 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Tue, 6 Aug 2024 17:44:26 -0400 Subject: [PATCH 18/23] PR Changes Signed-off-by: Abhishek --- tests/utils/test_logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py index a70047960..708376c66 100644 --- a/tests/utils/test_logging.py +++ b/tests/utils/test_logging.py @@ -17,9 +17,9 @@ # Standard from unittest import mock +import copy import logging import os -import copy # Local from tuning.config import configs @@ -42,6 +42,7 @@ output_dir="tmp", ) + @mock.patch.dict(os.environ, {}, clear=True) def test_set_log_level_for_logger_default(): """ From 4841119ddcdb16f89e831d8d9156e5381c2df986 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Wed, 7 Aug 2024 10:39:09 -0400 Subject: [PATCH 19/23] PR Changes Signed-off-by: Abhishek --- tests/utils/test_logging.py | 21 +++------------------ tuning/utils/logging.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py index 708376c66..e142f4514 100644 --- a/tests/utils/test_logging.py +++ b/tests/utils/test_logging.py @@ -21,27 +21,12 @@ import logging import os +# First Party +from tests.test_sft_trainer import TRAIN_ARGS + # Local -from tuning.config import configs from tuning.utils.logging import set_log_level -TRAIN_ARGS = configs.TrainingArguments( - num_train_epochs=5, - per_device_train_batch_size=4, - per_device_eval_batch_size=4, - gradient_accumulation_steps=4, - learning_rate=0.00001, - weight_decay=0, - warmup_ratio=0.03, - lr_scheduler_type="cosine", - logging_steps=1, - include_tokens_per_second=True, - packing=False, - max_seq_length=4096, - save_strategy="epoch", - output_dir="tmp", -) - @mock.patch.dict(os.environ, {}, clear=True) def test_set_log_level_for_logger_default(): diff --git a/tuning/utils/logging.py b/tuning/utils/logging.py index 819a065cc..229fa2aac 100644 --- a/tuning/utils/logging.py +++ b/tuning/utils/logging.py @@ -1,3 +1,17 @@ +# Copyright The FMS HF Tuning Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Standard import logging import os From 5ecf4ddf4e95944f6f80dbf48932295ea7c59af8 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 8 Aug 2024 11:22:17 -0400 Subject: [PATCH 20/23] Metrics file epoch indexing from 0 Signed-off-by: Abhishek --- tuning/trackers/filelogging_tracker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 213377d96..3df473867 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -56,7 +56,9 @@ def _track_loss(self, loss_key, log_name, log_file, logs, state): log_obj = { "name": log_name, "data": { - "epoch": round(logs["epoch"], 2), + "epoch": round((logs["epoch"] - 1), 2) + if logs["epoch"] != 1.0 + else 0, "step": state.global_step, "value": logs[loss_key], "timestamp": datetime.isoformat(datetime.now()), From 89124ac9358b0b290c91d7e1a829664d76776853 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 8 Aug 2024 17:03:44 -0400 Subject: [PATCH 21/23] Revert last commit Signed-off-by: Abhishek --- tuning/trackers/filelogging_tracker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tuning/trackers/filelogging_tracker.py b/tuning/trackers/filelogging_tracker.py index 3df473867..213377d96 100644 --- a/tuning/trackers/filelogging_tracker.py +++ b/tuning/trackers/filelogging_tracker.py @@ -56,9 +56,7 @@ def _track_loss(self, loss_key, log_name, log_file, logs, state): log_obj = { "name": log_name, "data": { - "epoch": round((logs["epoch"] - 1), 2) - if logs["epoch"] != 1.0 - else 0, + "epoch": round(logs["epoch"], 2), "step": state.global_step, "value": logs[loss_key], "timestamp": datetime.isoformat(datetime.now()), From 3068f5187c243eadb901c4668dfda5643b2da930 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 8 Aug 2024 19:39:02 -0400 Subject: [PATCH 22/23] PR Changes Signed-off-by: Abhishek --- build/accelerate_launch.py | 1 + tests/utils/test_logging.py | 3 ++- tuning/utils/logging.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index 526130b36..2cfc9069f 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -78,6 +78,7 @@ def main(): ) # Configure log_level of python native logger. + # CLI arg takes precedence over env var. And if neither is set, we use default "WARNING" log_level = job_config.get( "log_level" ) # this will be set to either the value found or None diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py index e142f4514..7b7aa1a2a 100644 --- a/tests/utils/test_logging.py +++ b/tests/utils/test_logging.py @@ -73,7 +73,8 @@ def test_set_log_level_for_logger_with_set_verbosity_and_cli(): def test_set_log_level_for_logger_with_env_var_and_cli(): """ Ensure that the correct log level is being set for python native logger and - transformers logger when env var LOG_LEVEL is used and CLI flag is passed + transformers logger when env var LOG_LEVEL is used and CLI flag is passed. + In this case, CLI arg takes precedence over the set env var LOG_LEVEL. """ train_args = copy.deepcopy(TRAIN_ARGS) diff --git a/tuning/utils/logging.py b/tuning/utils/logging.py index 229fa2aac..752651ceb 100644 --- a/tuning/utils/logging.py +++ b/tuning/utils/logging.py @@ -23,6 +23,8 @@ def set_log_level(train_args, logger_name=None): Args: train_args Training arguments for training model. + logger_name + Logger name with which the logger is instantiated. Returns: train_args From 0866bcea86be3a8e7bc07c526b5f6fca50ccd8d5 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 8 Aug 2024 20:24:40 -0400 Subject: [PATCH 23/23] PR Changes Signed-off-by: Abhishek --- tuning/utils/logging.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tuning/utils/logging.py b/tuning/utils/logging.py index 752651ceb..1f1b6c73e 100644 --- a/tuning/utils/logging.py +++ b/tuning/utils/logging.py @@ -53,7 +53,9 @@ def set_log_level(train_args, logger_name=None): else os.environ.get("TRANSFORMERS_VERBOSITY") ) - logging.basicConfig(level=log_level.upper()) + logging.basicConfig( + format="%(levelname)s:%(filename)s:%(message)s", level=log_level.upper() + ) if logger_name: train_logger = logging.getLogger(logger_name)