diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py index 6c71d0357..4243491f4 100644 --- a/optimum_benchmark/backends/base.py +++ b/optimum_benchmark/backends/base.py @@ -3,7 +3,6 @@ import random import shutil from abc import ABC -from dataclasses import dataclass from logging import getLogger from multiprocessing import Process from typing import ( @@ -15,14 +14,12 @@ Generic, List, Optional, - TypeVar, Union, ) import numpy as np import torch from optimum.exporters import TasksManager -from psutil import cpu_count from transformers import AutoConfig, AutoProcessor if TYPE_CHECKING: @@ -39,6 +36,7 @@ from .utils import PreTrainedProcessor from ..task_utils import DIFFUSION_TASKS, TEXT_GENERATION_TASKS +from .config import BackendConfigT from .utils import ( check_no_process_is_running_on_cuda_device, check_only_this_process_is_running_on_cuda_device, @@ -46,39 +44,8 @@ extract_shapes_from_model_artifacts, ) - -@dataclass -class BackendConfig(ABC): - name: str - version: str - _target_: str - - # backend options - seed: int = 42 - inter_op_num_threads: Optional[int] = None - intra_op_num_threads: Optional[int] = None - - # isolation options - initial_isolation_check: bool = True - continous_isolation_check: bool = True - - # clean up options - delete_cache: bool = False - - def __post_init__(self): - if self.inter_op_num_threads is not None: - if self.inter_op_num_threads == -1: - self.inter_op_num_threads = cpu_count() - - if self.intra_op_num_threads is not None: - if self.intra_op_num_threads == -1: - self.intra_op_num_threads = cpu_count() - - LOGGER = getLogger("backend") -BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig) - class Backend(Generic[BackendConfigT], ABC): NAME: ClassVar[str] diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py new file mode 100644 index 000000000..4943f3955 --- /dev/null +++ b/optimum_benchmark/backends/config.py @@ -0,0 +1,36 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Optional, TypeVar + +from psutil import cpu_count + + +@dataclass +class BackendConfig(ABC): + name: str + version: str + _target_: str + + # backend options + seed: int = 42 + inter_op_num_threads: Optional[int] = None + intra_op_num_threads: Optional[int] = None + + # isolation options + initial_isolation_check: bool = True + continous_isolation_check: bool = True + + # clean up options + delete_cache: bool = False + + def __post_init__(self): + if self.inter_op_num_threads is not None: + if self.inter_op_num_threads == -1: + self.inter_op_num_threads = cpu_count() + + if self.intra_op_num_threads is not None: + if self.intra_op_num_threads == -1: + self.intra_op_num_threads = cpu_count() + + +BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig) diff --git a/optimum_benchmark/backends/neural_compressor/config.py b/optimum_benchmark/backends/neural_compressor/config.py index 1108a000c..c0b9754f3 100644 --- a/optimum_benchmark/backends/neural_compressor/config.py +++ b/optimum_benchmark/backends/neural_compressor/config.py @@ -1,15 +1,12 @@ -import importlib.metadata from dataclasses import dataclass, field from typing import Any, Dict from omegaconf import OmegaConf -from ..base import BackendConfig +from ...import_utils import neural_compressor_version +from ..config import BackendConfig -OmegaConf.register_new_resolver( - "neural_compressor_version", - lambda: importlib.metadata.version("neural_compressor"), -) +OmegaConf.register_new_resolver("neural_compressor_version", neural_compressor_version) # https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L490 ACCURACY_CRITERION_CONFIG = { diff --git a/optimum_benchmark/backends/onnxruntime/config.py b/optimum_benchmark/backends/onnxruntime/config.py index 9ae25e927..4da85c2e6 100644 --- a/optimum_benchmark/backends/onnxruntime/config.py +++ b/optimum_benchmark/backends/onnxruntime/config.py @@ -1,42 +1,36 @@ -import importlib.metadata -import importlib.util from dataclasses import dataclass, field from typing import Any, Dict, Optional from omegaconf import OmegaConf -from ..base import BackendConfig -from .utils import infer_device_id +from ...import_utils import onnxruntime_version +from ..config import BackendConfig -def onnxruntime_version(): - try: - return "ort:" + importlib.metadata.version("onnxruntime") - except importlib.metadata.PackageNotFoundError: - try: - return "ort-gpu:" + importlib.metadata.version("onnxruntime-gpu") - except importlib.metadata.PackageNotFoundError: - return "ort:unknown" +def infer_device_id(device: str) -> int: + """Infer the device id from the given device string.""" + if device == "cuda": + # torch.cuda.current_device() will always return 0 + # unless torch.cuda.set_device() is called somewhere + return 0 + elif "cuda" in device: + return int(device.split(":")[1]) + elif device == "cpu": + return -1 + else: + raise ValueError(f"Unknown device: {device}") +OmegaConf.register_new_resolver("onnxruntime_version", onnxruntime_version) + OmegaConf.register_new_resolver("is_gpu", lambda device: "cuda" in device) +OmegaConf.register_new_resolver("infer_device_id", lambda device: infer_device_id(device)) +OmegaConf.register_new_resolver("is_profiling", lambda benchmark_name: benchmark_name == "profiling") OmegaConf.register_new_resolver( - "is_profiling", - lambda benchmark_name: benchmark_name == "profiling", -) -OmegaConf.register_new_resolver( - "infer_provider", - lambda device: "CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider", -) -OmegaConf.register_new_resolver( - "infer_device_id", - lambda device: infer_device_id(device), -) -OmegaConf.register_new_resolver( - "onnxruntime_version", - lambda: onnxruntime_version(), + "infer_provider", lambda device: "CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider" ) + OPTIMIZATION_CONFIG = { "optimization_level": 1, # 0, 1, 2, 99 "fp16": False, diff --git a/optimum_benchmark/backends/onnxruntime/utils.py b/optimum_benchmark/backends/onnxruntime/utils.py index be63fef8a..5b4a3147c 100644 --- a/optimum_benchmark/backends/onnxruntime/utils.py +++ b/optimum_benchmark/backends/onnxruntime/utils.py @@ -11,20 +11,6 @@ TASKS_TO_ORTMODELS = {task: task_dict["class"][0] for task, task_dict in ORT_SUPPORTED_TASKS.items()} -def infer_device_id(device: str) -> int: - """Infer the device id from the given device string.""" - if device == "cuda": - # torch.cuda.current_device() will always return 0 - # unless torch.cuda.set_device() is called somewhere - return 0 - elif "cuda" in device: - return int(device.split(":")[1]) - elif device == "cpu": - return -1 - else: - raise ValueError(f"Unknown device: {device}") - - def format_quantization_config(quantization_config: Dict[str, Any]) -> None: """Format the quantization dictionary for onnxruntime.""" # the conditionals are here because some quantization strategies don't have all the options diff --git a/optimum_benchmark/backends/openvino/config.py b/optimum_benchmark/backends/openvino/config.py index 1f6a49aea..f4a05ce56 100644 --- a/optimum_benchmark/backends/openvino/config.py +++ b/optimum_benchmark/backends/openvino/config.py @@ -1,15 +1,12 @@ -import importlib.metadata from dataclasses import dataclass, field from typing import Any, Dict from omegaconf import OmegaConf -from ..base import BackendConfig +from ...import_utils import openvino_version +from ..config import BackendConfig -OmegaConf.register_new_resolver( - "openvino_version", - lambda: importlib.metadata.version("openvino"), -) +OmegaConf.register_new_resolver("openvino_version", openvino_version) # https://github.com/huggingface/optimum-intel/blob/main/optimum/intel/openvino/configuration.py#L81 QUANTIZATION_CONFIG = { diff --git a/optimum_benchmark/backends/pytorch/backned.py b/optimum_benchmark/backends/pytorch/backned.py index 6be3dfec7..cb3771f35 100644 --- a/optimum_benchmark/backends/pytorch/backned.py +++ b/optimum_benchmark/backends/pytorch/backned.py @@ -4,8 +4,6 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List import torch -from accelerate import init_empty_weights -from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model from optimum.bettertransformer import BetterTransformer from torch.distributed.elastic.multiprocessing.errors import record from torch.distributed.launcher.api import LaunchConfig, elastic_launch @@ -143,6 +141,8 @@ def load_model_from_pretrained(self) -> None: def load_model_from_config(self) -> None: # TODO: create no_weights tests LOGGER.info("\t+ Initializing empty weights model on device: meta") + from accelerate import init_empty_weights + with init_empty_weights(): self.pretrained_model = self.automodel_class.from_config( config=self.pretrained_config, @@ -156,6 +156,8 @@ def load_model_from_config(self) -> None: LOGGER.info("\t+ Randomizing model weights") randomize_weights(self.pretrained_model) LOGGER.info("\t+ Processing BnB config") + from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model + bnb_quantization_config = BnbQuantizationConfig( **self.config.quantization_config, torch_dtype=self.config.torch_dtype, diff --git a/optimum_benchmark/backends/pytorch/config.py b/optimum_benchmark/backends/pytorch/config.py index ad8884c1b..574e7290b 100644 --- a/optimum_benchmark/backends/pytorch/config.py +++ b/optimum_benchmark/backends/pytorch/config.py @@ -1,24 +1,15 @@ -import importlib.metadata import os from dataclasses import dataclass, field from typing import Any, Dict, Optional from omegaconf import OmegaConf -from ..base import BackendConfig - -OmegaConf.register_new_resolver( - "device_count", - lambda: len(os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")), -) -OmegaConf.register_new_resolver( - "is_inference", - lambda benchmark_name: benchmark_name == "inference", -) -OmegaConf.register_new_resolver( - "pytorch_version", - lambda: importlib.metadata.version("torch"), -) +from ...import_utils import torch_version +from ..config import BackendConfig + +OmegaConf.register_new_resolver("device_count", lambda: len(os.environ.get("CUDA_VISIBLE_DEVICES", "").split(","))) +OmegaConf.register_new_resolver("is_inference", lambda benchmark_name: benchmark_name == "inference") +OmegaConf.register_new_resolver("pytorch_version", torch_version) DEVICE_MAPS = ["auto", "sequential"] AMP_DTYPES = ["bfloat16", "float16"] diff --git a/optimum_benchmark/benchmarks/base.py b/optimum_benchmark/benchmarks/base.py index 24cc27961..4c33ed787 100644 --- a/optimum_benchmark/benchmarks/base.py +++ b/optimum_benchmark/benchmarks/base.py @@ -1,9 +1,10 @@ from abc import ABC from dataclasses import dataclass from logging import getLogger -from typing import ClassVar, Generic, TypeVar +from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar -from optimum_benchmark.backends.base import Backend +if TYPE_CHECKING: + from ..backends.base import Backend LOGGER = getLogger("benchmark") @@ -29,7 +30,7 @@ def configure(self, config: BenchmarkConfigT) -> None: LOGGER.info(f"Configuring {self.NAME} benchmark") self.config = config - def run(self, backend: Backend) -> None: + def run(self, backend: "Backend") -> None: raise NotImplementedError("Benchmark must implement run method") def save(self) -> None: diff --git a/optimum_benchmark/benchmarks/inference.py b/optimum_benchmark/benchmarks/inference/benchmark.py similarity index 61% rename from optimum_benchmark/benchmarks/inference.py rename to optimum_benchmark/benchmarks/inference/benchmark.py index 2b603cf10..1816660a2 100644 --- a/optimum_benchmark/benchmarks/inference.py +++ b/optimum_benchmark/benchmarks/inference/benchmark.py @@ -1,107 +1,20 @@ import statistics -from dataclasses import dataclass, field from logging import getLogger -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, List -from omegaconf import OmegaConf from pandas import DataFrame -from ..backends.base import Backend -from ..generators.input_generator import InputGenerator -from ..task_utils import DIFFUSION_TASKS, TEXT_GENERATION_TASKS -from ..trackers.latency import latency_tracker_class_for_backend -from ..trackers.memory import memory_tracker_class_for_backend -from .base import Benchmark, BenchmarkConfig -from .utils import three_significant_digits_wrapper +from ...generators.input_generator import InputGenerator +from ...trackers.latency import latency_tracker_class_for_backend +from ...trackers.memory import memory_tracker_class_for_backend +from ..base import Benchmark +from ..utils import three_significant_digits_wrapper +from .config import InferenceConfig -LOGGER = getLogger("inference") - -OmegaConf.register_new_resolver( - "can_generate", - lambda task: task in TEXT_GENERATION_TASKS, -) -OmegaConf.register_new_resolver( - "can_diffuse", - lambda task: task in DIFFUSION_TASKS, -) - -GENERATE_CONFIG = { - "max_new_tokens": 100, - "min_new_tokens": 100, - "do_sample": False, - "use_cache": True, - "pad_token_id": 0, - "num_beams": 1, -} - -DIFUSION_CONFIG = { - "num_images_per_prompt": 1, -} - - -@dataclass -class InferenceConfig(BenchmarkConfig): - name: str = "inference" - _target_: str = "optimum_benchmark.benchmarks.inference.InferenceBenchmark" - - # benchmark options - memory: bool = False - duration: int = 10 - warmup_runs: int = 10 - benchmark_duration: Optional[int] = None - - # input options - input_shapes: Dict = field( - default_factory=lambda: { - # used with all tasks - "batch_size": 2, - # used with text input tasks - "sequence_length": 16, - # used with multiple choice tasks where input - # is of shape (batch_size, num_choices, sequence_length) - "num_choices": 1, - # used with audio input tasks - "feature_size": 80, - "nb_max_frames": 3000, - "audio_sequence_length": 16000, - }, - ) - - # TODO: deprecate this and use `benchamrk.generate_kwargs` - new_tokens: Optional[int] = None - - # forward options - can_diffuse: bool = "${can_diffuse:${task}}" - forward_kwargs: Dict[str, Any] = field(default_factory=dict) - - # generation options - can_generate: bool = "${can_generate:${task}}" - generate_kwargs: Dict[str, Any] = field(default_factory=dict) - - def __post_init__(self): - if self.can_diffuse: - self.forward_kwargs = OmegaConf.to_object(OmegaConf.merge(self.forward_kwargs, DIFUSION_CONFIG)) - - if self.can_generate: - self.generate_kwargs = OmegaConf.to_object(OmegaConf.merge(self.generate_kwargs, GENERATE_CONFIG)) - - if self.generate_kwargs["max_new_tokens"] != self.generate_kwargs["min_new_tokens"]: - raise ValueError("`max_new_tokens` and `min_new_tokens` must be equal for fixed length output.") - - if self.new_tokens is not None: - LOGGER.warning( - "The `new_tokens` option is deprecated, please use `generate_kwargs` instead. " - "`generate_kwargs.max_new_tokens` and `generate_kwargs.min_new_tokens` will be set to the value of `new_tokens`." - ) - self.generate_kwargs["max_new_tokens"] = self.new_tokens - self.generate_kwargs["min_new_tokens"] = self.new_tokens +if TYPE_CHECKING: + from ...backends.base import Backend - if self.benchmark_duration: - LOGGER.warning( - "The `benchmark_duration` option is deprecated, please use `duration` instead. " - "`duration` will be set to the value of `benchmark_duration`." - ) - self.duration = self.benchmark_duration +LOGGER = getLogger("inference") class InferenceBenchmark(Benchmark[InferenceConfig]): @@ -113,10 +26,10 @@ def __init__(self): self.forward_latencies: List[float] = [] self.generate_latencies: List[float] = [] - def configure(self, config: InferenceConfig): + def configure(self, config: "InferenceConfig"): super().configure(config) - def run(self, backend: Backend) -> None: + def run(self, backend: "Backend") -> None: LOGGER.info("Running inference benchmark") self.config.input_shapes.update(backend.model_shapes) @@ -133,7 +46,7 @@ def run(self, backend: Backend) -> None: # if possible, run generation pass tracking self.run_generate_tracking(backend) - def run_forward_tracking(self, backend: Backend) -> None: + def run_forward_tracking(self, backend: "Backend") -> None: forward_input = self.input_generator.generate( mode="forward", ) @@ -170,7 +83,7 @@ def run_forward_tracking(self, backend: Backend) -> None: self.forward_peak_memory = memory_tracker.get_peak_memory() LOGGER.info(f"\t+ Forward pass peak memory: {self.forward_peak_memory} (MB)") - def run_generate_tracking(self, backend: Backend) -> None: + def run_generate_tracking(self, backend: "Backend") -> None: generate_input = self.input_generator.generate( mode="generate", ) diff --git a/optimum_benchmark/benchmarks/inference/config.py b/optimum_benchmark/benchmarks/inference/config.py new file mode 100644 index 000000000..7f5e4d9c0 --- /dev/null +++ b/optimum_benchmark/benchmarks/inference/config.py @@ -0,0 +1,97 @@ +from dataclasses import dataclass, field +from logging import getLogger +from typing import Any, Dict, Optional + +from omegaconf import OmegaConf + +from ...task_utils import DIFFUSION_TASKS, TEXT_GENERATION_TASKS +from ..base import BenchmarkConfig + +LOGGER = getLogger("inference") + +OmegaConf.register_new_resolver( + "can_generate", + lambda task: task in TEXT_GENERATION_TASKS, +) +OmegaConf.register_new_resolver( + "can_diffuse", + lambda task: task in DIFFUSION_TASKS, +) + +GENERATE_CONFIG = { + "max_new_tokens": 100, + "min_new_tokens": 100, + "do_sample": False, + "use_cache": True, + "pad_token_id": 0, + "num_beams": 1, +} + +DIFUSION_CONFIG = { + "num_images_per_prompt": 1, +} + + +@dataclass +class InferenceConfig(BenchmarkConfig): + name: str = "inference" + _target_: str = "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark" + + # benchmark options + memory: bool = False + duration: int = 10 + warmup_runs: int = 10 + benchmark_duration: Optional[int] = None + + # input options + input_shapes: Dict = field( + default_factory=lambda: { + # used with all tasks + "batch_size": 2, + # used with text input tasks + "sequence_length": 16, + # used with multiple choice tasks where input + # is of shape (batch_size, num_choices, sequence_length) + "num_choices": 1, + # used with audio input tasks + "feature_size": 80, + "nb_max_frames": 3000, + "audio_sequence_length": 16000, + }, + ) + + # TODO: deprecate this and use `benchamrk.generate_kwargs` + new_tokens: Optional[int] = None + + # forward options + can_diffuse: bool = "${can_diffuse:${task}}" + forward_kwargs: Dict[str, Any] = field(default_factory=dict) + + # generation options + can_generate: bool = "${can_generate:${task}}" + generate_kwargs: Dict[str, Any] = field(default_factory=dict) + + def __post_init__(self): + if self.can_diffuse: + self.forward_kwargs = OmegaConf.to_object(OmegaConf.merge(self.forward_kwargs, DIFUSION_CONFIG)) + + if self.can_generate: + self.generate_kwargs = OmegaConf.to_object(OmegaConf.merge(self.generate_kwargs, GENERATE_CONFIG)) + + if self.generate_kwargs["max_new_tokens"] != self.generate_kwargs["min_new_tokens"]: + raise ValueError("`max_new_tokens` and `min_new_tokens` must be equal for fixed length output.") + + if self.new_tokens is not None: + LOGGER.warning( + "The `new_tokens` option is deprecated, please use `generate_kwargs` instead. " + "`generate_kwargs.max_new_tokens` and `generate_kwargs.min_new_tokens` will be set to the value of `new_tokens`." + ) + self.generate_kwargs["max_new_tokens"] = self.new_tokens + self.generate_kwargs["min_new_tokens"] = self.new_tokens + + if self.benchmark_duration: + LOGGER.warning( + "The `benchmark_duration` option is deprecated, please use `duration` instead. " + "`duration` will be set to the value of `benchmark_duration`." + ) + self.duration = self.benchmark_duration diff --git a/optimum_benchmark/benchmarks/training.py b/optimum_benchmark/benchmarks/training/benchmark.py similarity index 53% rename from optimum_benchmark/benchmarks/training.py rename to optimum_benchmark/benchmarks/training/benchmark.py index fd8d6e5ed..f415f815b 100644 --- a/optimum_benchmark/benchmarks/training.py +++ b/optimum_benchmark/benchmarks/training/benchmark.py @@ -1,61 +1,17 @@ -from dataclasses import dataclass, field from logging import getLogger -from typing import Any, Dict +from typing import TYPE_CHECKING, Any, Dict -from omegaconf import OmegaConf from pandas import DataFrame -from ..backends.base import Backend -from ..generators.dataset_generator import DatasetGenerator -from .base import Benchmark, BenchmarkConfig -from .utils import MeasurementCallback, get_data_collator +from ...generators.dataset_generator import DatasetGenerator +from ..base import Benchmark +from ..utils import MeasurementCallback, get_data_collator +from .config import TrainingConfig -LOGGER = getLogger("training") - -# resolvers -OmegaConf.register_new_resolver("is_cpu", lambda device: device == "cpu") - - -@dataclass -class TrainingConfig(BenchmarkConfig): - name: str = "training" - _target_: str = "optimum_benchmark.benchmarks.training.TrainingBenchmark" - - # training options - warmup_steps: int = 40 # still thinks this too high +if TYPE_CHECKING: + from ...backends.base import Backend - # dataset options - dataset_shapes: Dict = field( - default_factory=lambda: { - # used with all tasks - "dataset_size": 500, - # used with text input tasks - "sequence_length": 16, - # used with multiple choice tasks where input - # is of shape (batch_size, num_choices, sequence_length) - "num_choices": 1, - # used with audio input tasks - "feature_size": 80, - "nb_max_frames": 3000, - "audio_sequence_length": 16000, - } - ) - - # training options - training_arguments: Dict = field( - default_factory=lambda: { - # these are arguments that we set by default - # but can be overwritten by the user - "skip_memory_metrics": True, - # memory metrics are wrong when using multiple processes - "output_dir": "./trainer_output", - "use_cpu": "${is_cpu:${device}}", - "ddp_find_unused_parameters": False, - "do_train": True, - "do_eval": False, - "do_predict": False, - } - ) +LOGGER = getLogger("training") class TrainingBenchmark(Benchmark[TrainingConfig]): diff --git a/optimum_benchmark/benchmarks/training/config.py b/optimum_benchmark/benchmarks/training/config.py new file mode 100644 index 000000000..88d735e52 --- /dev/null +++ b/optimum_benchmark/benchmarks/training/config.py @@ -0,0 +1,54 @@ +from dataclasses import dataclass, field +from logging import getLogger +from typing import Dict + +from omegaconf import OmegaConf + +from ..base import BenchmarkConfig + +LOGGER = getLogger("training") + +# resolvers +OmegaConf.register_new_resolver("is_cpu", lambda device: device == "cpu") + + +@dataclass +class TrainingConfig(BenchmarkConfig): + name: str = "training" + _target_: str = "optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark" + + # training options + warmup_steps: int = 40 # still thinks this too high + + # dataset options + dataset_shapes: Dict = field( + default_factory=lambda: { + # used with all tasks + "dataset_size": 500, + # used with text input tasks + "sequence_length": 16, + # used with multiple choice tasks where input + # is of shape (batch_size, num_choices, sequence_length) + "num_choices": 1, + # used with audio input tasks + "feature_size": 80, + "nb_max_frames": 3000, + "audio_sequence_length": 16000, + } + ) + + # training options + training_arguments: Dict = field( + default_factory=lambda: { + # these are arguments that we set by default + # but can be overwritten by the user + "skip_memory_metrics": True, + # memory metrics are wrong when using multiple processes + "output_dir": "./trainer_output", + "use_cpu": "${is_cpu:${device}}", + "ddp_find_unused_parameters": False, + "do_train": True, + "do_eval": False, + "do_predict": False, + } + ) diff --git a/optimum_benchmark/experiment.py b/optimum_benchmark/experiment.py index 3859e0319..8d2baf998 100644 --- a/optimum_benchmark/experiment.py +++ b/optimum_benchmark/experiment.py @@ -2,39 +2,33 @@ import platform from dataclasses import dataclass, field from logging import getLogger -from typing import Any, Dict, Type +from typing import TYPE_CHECKING, Any, Dict, Optional, Type import hydra -from accelerate import __version__ as accelerate_version -from diffusers import __version__ as diffusers_version from hydra.core.config_store import ConfigStore from hydra.utils import get_class from omegaconf import DictConfig, OmegaConf -from optimum.exporters import TasksManager -from optimum.version import __version__ as optimum_version -from transformers import __version__ as transformers_version -from .backends.base import Backend from .backends.neural_compressor.config import INCConfig from .backends.onnxruntime.config import ORTConfig from .backends.openvino.config import OVConfig from .backends.pytorch.config import PyTorchConfig -from .benchmarks.base import Benchmark -from .benchmarks.inference import InferenceConfig -from .benchmarks.training import TrainingConfig +from .benchmarks.inference.config import InferenceConfig +from .benchmarks.training.config import TrainingConfig from .env_utils import get_cpu, get_cpu_ram_mb +from .import_utils import ( + accelerate_version, + diffusers_version, + optimum_version, + transformers_version, +) +from .task_utils import infer_task -LOGGER = getLogger("experiment") +if TYPE_CHECKING: + from .backends.base import Backend + from .benchmarks.base import Benchmark -OmegaConf.register_new_resolver( - "infer_task", - # TODO: find a better way for this; it doesn't - # always work because it relies on hub metadata - lambda model, revision: TasksManager.infer_task_from_model( - model=model, - revision=revision, - ), -) +LOGGER = getLogger("experiment") @dataclass @@ -52,7 +46,7 @@ class ExperimentConfig: # Device name or path (cpu, cuda, cuda:0, ...) device: str # Task name (text-classification, image-classification, ...) - task: str = "${infer_task:${model},${hub_kwargs.revision}}" + task: Optional[str] = None # ADDITIONAL MODEL CONFIGURATION: Model revision, use_auth_token, trust_remote_code hub_kwargs: Dict = field( @@ -68,10 +62,10 @@ class ExperimentConfig: # TODO: add gpu info when available environment: Dict = field( default_factory=lambda: { - "optimum_version": optimum_version, - "transformers_version": transformers_version, - "accelerate_version": accelerate_version, - "diffusers_version": diffusers_version, + "optimum_version": optimum_version(), + "transformers_version": transformers_version(), + "accelerate_version": accelerate_version(), + "diffusers_version": diffusers_version(), "python_version": platform.python_version(), "system": platform.system(), "cpu": get_cpu(), @@ -80,6 +74,12 @@ class ExperimentConfig: } ) + def __post_init__(self) -> None: + # Infer task if not provided + if self.task is None: + LOGGER.warning("Task not provided, will try to infer it from the model's metadata") + self.task = infer_task(self.model, self.hub_kwargs.get("revision", "main")) + # Register configurations cs = ConfigStore.instance() @@ -101,8 +101,8 @@ def run_experiment(experiment: DictConfig) -> None: OmegaConf.save(experiment, "hydra_config.yaml", resolve=True) # Allocate requested backend - backend_factory: Type[Backend] = get_class(experiment.backend._target_) - backend: Backend = backend_factory( + backend_factory: Type["Backend"] = get_class(experiment.backend._target_) + backend: "Backend" = backend_factory( task=experiment.task, model=experiment.model, device=experiment.device, @@ -116,8 +116,8 @@ def run_experiment(experiment: DictConfig) -> None: raise e # Allocate requested benchmark - benchmark_factory: Type[Benchmark] = get_class(experiment.benchmark._target_) - benchmark: Benchmark = benchmark_factory() + benchmark_factory: Type["Benchmark"] = get_class(experiment.benchmark._target_) + benchmark: "Benchmark" = benchmark_factory() try: benchmark.configure(experiment.benchmark) except Exception as e: diff --git a/optimum_benchmark/generators/input_generator.py b/optimum_benchmark/generators/input_generator.py index f9858dac3..9b2a1b007 100644 --- a/optimum_benchmark/generators/input_generator.py +++ b/optimum_benchmark/generators/input_generator.py @@ -1,8 +1,7 @@ from logging import getLogger -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Optional if TYPE_CHECKING: - import torch from transformers import PretrainedConfig from optimum_benchmark.generators.model_type_generator import ( @@ -54,7 +53,7 @@ def __init__( # TODO: we can drop the torch dependency here by returning a dict of numpy arrays # and then converting them to torch tensors in backend.prepare_for_inference - def generate(self, mode: str) -> Dict[str, Union["torch.Tensor", List[str]]]: + def generate(self, mode: str) -> Dict[str, Any]: if self.used_generator == "model_type": dummy_input = self.model_type_generator.generate() elif self.used_generator == "task": diff --git a/optimum_benchmark/import_utils.py b/optimum_benchmark/import_utils.py index 13dee6171..2f8847482 100644 --- a/optimum_benchmark/import_utils.py +++ b/optimum_benchmark/import_utils.py @@ -1,22 +1,59 @@ +import importlib.metadata import importlib.util +_transformers_available = importlib.util.find_spec("transformers") is not None +_accelerate_available = importlib.util.find_spec("accelerate") is not None +_diffusers_available = importlib.util.find_spec("diffusers") is not None +_optimum_available = importlib.util.find_spec("optimum") is not None _torch_available = importlib.util.find_spec("torch") is not None _onnxruntime_available = importlib.util.find_spec("onnxruntime") is not None -_is_openvino_available = importlib.util.find_spec("openvino") is not None -_is_neural_compressor_available = importlib.util.find_spec("neural_compressor") is not None +_openvino_available = importlib.util.find_spec("openvino") is not None +_neural_compressor_available = importlib.util.find_spec("neural_compressor") is not None -def is_torch_available(): - return _torch_available +def torch_version(): + if _torch_available: + return importlib.metadata.version("torch") -def is_onnxruntime_available(): - return _onnxruntime_available +def onnxruntime_version(): + try: + return "ort:" + importlib.metadata.version("onnxruntime") + except importlib.metadata.PackageNotFoundError: + try: + return "ort-gpu:" + importlib.metadata.version("onnxruntime-gpu") + except importlib.metadata.PackageNotFoundError: + try: + return "ort-training:" + importlib.metadata.version("onnxruntime-training") + except importlib.metadata.PackageNotFoundError: + return None -def is_openvino_available(): - return _is_openvino_available +def openvino_version(): + if _openvino_available: + return importlib.metadata.version("openvino") -def is_neural_compressor_available(): - return _is_neural_compressor_available +def neural_compressor_version(): + if _neural_compressor_available: + return importlib.metadata.version("neural_compressor") + + +def optimum_version(): + if _optimum_available: + return importlib.metadata.version("optimum") + + +def transformers_version(): + if _transformers_available: + return importlib.metadata.version("transformers") + + +def accelerate_version(): + if _accelerate_available: + return importlib.metadata.version("accelerate") + + +def diffusers_version(): + if _diffusers_available: + return importlib.metadata.version("diffusers") diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index de2c95f45..7fc9da853 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -37,3 +37,12 @@ "zero-shot-image-classification", "zero-shot-object-detection", ] + + +def infer_task(model: str, revision: str) -> str: + from optimum.exporters import TasksManager + + return TasksManager.infer_task_from_model( + model=model, + revision=revision, + ) diff --git a/tests/configs/cuda_onnxruntime_training_bert.yaml b/tests/configs/cuda_onnxruntime_training_bert.yaml index 7fd0ac899..7db912864 100644 --- a/tests/configs/cuda_onnxruntime_training_bert.yaml +++ b/tests/configs/cuda_onnxruntime_training_bert.yaml @@ -9,3 +9,8 @@ experiment_name: cuda_onnxruntime_training_bert model: hf-internal-testing/tiny-random-bert task: text-classification device: cuda + +benchmark: + dataset_shapes: + dataset_size: 1600 + sequence_length: 256 diff --git a/tests/configs/cuda_onnxruntime_training_gpt2.yaml b/tests/configs/cuda_onnxruntime_training_gpt2.yaml index 52f56cd46..96827a967 100644 --- a/tests/configs/cuda_onnxruntime_training_gpt2.yaml +++ b/tests/configs/cuda_onnxruntime_training_gpt2.yaml @@ -9,3 +9,8 @@ experiment_name: cuda_onnxruntime_training_gpt2 model: hf-internal-testing/tiny-random-gpt2 task: text-generation device: cuda + +benchmark: + dataset_shapes: + dataset_size: 1600 + sequence_length: 256 diff --git a/tests/configs/cuda_pytorch_training_bert.yaml b/tests/configs/cuda_pytorch_training_bert.yaml index 08ec37520..a76f6f890 100644 --- a/tests/configs/cuda_pytorch_training_bert.yaml +++ b/tests/configs/cuda_pytorch_training_bert.yaml @@ -11,4 +11,4 @@ device: cuda benchmark: dataset_shapes: - dataset_size: 1200 \ No newline at end of file + dataset_size: 1600 diff --git a/tests/configs/cuda_pytorch_training_gpt2.yaml b/tests/configs/cuda_pytorch_training_gpt2.yaml index 0f03b8495..6dfb7f278 100644 --- a/tests/configs/cuda_pytorch_training_gpt2.yaml +++ b/tests/configs/cuda_pytorch_training_gpt2.yaml @@ -11,4 +11,4 @@ device: cuda benchmark: dataset_shapes: - dataset_size: 1200 \ No newline at end of file + dataset_size: 1600 diff --git a/tests/configs/distributed_cuda_pytorch_inference_gpt2.yaml b/tests/configs/distributed_cuda_pytorch_inference_gpt2.yaml index 9524d0aec..bb57008a3 100644 --- a/tests/configs/distributed_cuda_pytorch_inference_gpt2.yaml +++ b/tests/configs/distributed_cuda_pytorch_inference_gpt2.yaml @@ -12,6 +12,13 @@ device: cuda backend: device_map: auto +benchmark: + dataset_shapes: + dataset_size: 1600 + sequence_length: 256 + training_arguments: + per_device_train_batch_size: 8 + hydra: job: env_set: diff --git a/tests/configs/distributed_cuda_pytorch_training_bert_ddp.yaml b/tests/configs/distributed_cuda_pytorch_training_bert_ddp.yaml index c2ea41614..ea0e768d3 100644 --- a/tests/configs/distributed_cuda_pytorch_training_bert_ddp.yaml +++ b/tests/configs/distributed_cuda_pytorch_training_bert_ddp.yaml @@ -17,10 +17,10 @@ backend: benchmark: dataset_shapes: - dataset_size: 1200 + dataset_size: 1600 sequence_length: 256 training_arguments: - per_device_train_batch_size: 32 + per_device_train_batch_size: 8 hydra: job: diff --git a/tests/configs/distributed_cuda_pytorch_training_bert_dp.yaml b/tests/configs/distributed_cuda_pytorch_training_bert_dp.yaml index a1996f235..f806fd14f 100644 --- a/tests/configs/distributed_cuda_pytorch_training_bert_dp.yaml +++ b/tests/configs/distributed_cuda_pytorch_training_bert_dp.yaml @@ -11,10 +11,10 @@ device: cuda benchmark: dataset_shapes: - dataset_size: 1200 + dataset_size: 1600 sequence_length: 256 training_arguments: - per_device_train_batch_size: 32 + per_device_train_batch_size: 8 hydra: job: