Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 1 addition & 34 deletions optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import random
import shutil
from abc import ABC
from dataclasses import dataclass
from logging import getLogger
from multiprocessing import Process
from typing import (
Expand All @@ -15,14 +14,12 @@
Generic,
List,
Optional,
TypeVar,
Union,
)

import numpy as np
import torch
from optimum.exporters import TasksManager
from psutil import cpu_count
from transformers import AutoConfig, AutoProcessor

if TYPE_CHECKING:
Expand All @@ -39,46 +36,16 @@
from .utils import PreTrainedProcessor

from ..task_utils import DIFFUSION_TASKS, TEXT_GENERATION_TASKS
from .config import BackendConfigT
from .utils import (
check_no_process_is_running_on_cuda_device,
check_only_this_process_is_running_on_cuda_device,
extract_shapes_from_diffusion_pipeline,
extract_shapes_from_model_artifacts,
)


@dataclass
class BackendConfig(ABC):
name: str
version: str
_target_: str

# backend options
seed: int = 42
inter_op_num_threads: Optional[int] = None
intra_op_num_threads: Optional[int] = None

# isolation options
initial_isolation_check: bool = True
continous_isolation_check: bool = True

# clean up options
delete_cache: bool = False

def __post_init__(self):
if self.inter_op_num_threads is not None:
if self.inter_op_num_threads == -1:
self.inter_op_num_threads = cpu_count()

if self.intra_op_num_threads is not None:
if self.intra_op_num_threads == -1:
self.intra_op_num_threads = cpu_count()


LOGGER = getLogger("backend")

BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig)


class Backend(Generic[BackendConfigT], ABC):
NAME: ClassVar[str]
Expand Down
36 changes: 36 additions & 0 deletions optimum_benchmark/backends/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from abc import ABC
from dataclasses import dataclass
from typing import Optional, TypeVar

from psutil import cpu_count


@dataclass
class BackendConfig(ABC):
name: str
version: str
_target_: str

# backend options
seed: int = 42
inter_op_num_threads: Optional[int] = None
intra_op_num_threads: Optional[int] = None

# isolation options
initial_isolation_check: bool = True
continous_isolation_check: bool = True

# clean up options
delete_cache: bool = False

def __post_init__(self):
if self.inter_op_num_threads is not None:
if self.inter_op_num_threads == -1:
self.inter_op_num_threads = cpu_count()

if self.intra_op_num_threads is not None:
if self.intra_op_num_threads == -1:
self.intra_op_num_threads = cpu_count()


BackendConfigT = TypeVar("BackendConfigT", bound=BackendConfig)
9 changes: 3 additions & 6 deletions optimum_benchmark/backends/neural_compressor/config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import importlib.metadata
from dataclasses import dataclass, field
from typing import Any, Dict

from omegaconf import OmegaConf

from ..base import BackendConfig
from ...import_utils import neural_compressor_version
from ..config import BackendConfig

OmegaConf.register_new_resolver(
"neural_compressor_version",
lambda: importlib.metadata.version("neural_compressor"),
)
OmegaConf.register_new_resolver("neural_compressor_version", neural_compressor_version)

# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L490
ACCURACY_CRITERION_CONFIG = {
Expand Down
46 changes: 20 additions & 26 deletions optimum_benchmark/backends/onnxruntime/config.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,36 @@
import importlib.metadata
import importlib.util
from dataclasses import dataclass, field
from typing import Any, Dict, Optional

from omegaconf import OmegaConf

from ..base import BackendConfig
from .utils import infer_device_id
from ...import_utils import onnxruntime_version
from ..config import BackendConfig


def onnxruntime_version():
try:
return "ort:" + importlib.metadata.version("onnxruntime")
except importlib.metadata.PackageNotFoundError:
try:
return "ort-gpu:" + importlib.metadata.version("onnxruntime-gpu")
except importlib.metadata.PackageNotFoundError:
return "ort:unknown"
def infer_device_id(device: str) -> int:
"""Infer the device id from the given device string."""
if device == "cuda":
# torch.cuda.current_device() will always return 0
# unless torch.cuda.set_device() is called somewhere
return 0
elif "cuda" in device:
return int(device.split(":")[1])
elif device == "cpu":
return -1
else:
raise ValueError(f"Unknown device: {device}")


OmegaConf.register_new_resolver("onnxruntime_version", onnxruntime_version)

OmegaConf.register_new_resolver("is_gpu", lambda device: "cuda" in device)
OmegaConf.register_new_resolver("infer_device_id", lambda device: infer_device_id(device))
OmegaConf.register_new_resolver("is_profiling", lambda benchmark_name: benchmark_name == "profiling")
OmegaConf.register_new_resolver(
"is_profiling",
lambda benchmark_name: benchmark_name == "profiling",
)
OmegaConf.register_new_resolver(
"infer_provider",
lambda device: "CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider",
)
OmegaConf.register_new_resolver(
"infer_device_id",
lambda device: infer_device_id(device),
)
OmegaConf.register_new_resolver(
"onnxruntime_version",
lambda: onnxruntime_version(),
"infer_provider", lambda device: "CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider"
)


OPTIMIZATION_CONFIG = {
"optimization_level": 1, # 0, 1, 2, 99
"fp16": False,
Expand Down
14 changes: 0 additions & 14 deletions optimum_benchmark/backends/onnxruntime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@
TASKS_TO_ORTMODELS = {task: task_dict["class"][0] for task, task_dict in ORT_SUPPORTED_TASKS.items()}


def infer_device_id(device: str) -> int:
"""Infer the device id from the given device string."""
if device == "cuda":
# torch.cuda.current_device() will always return 0
# unless torch.cuda.set_device() is called somewhere
return 0
elif "cuda" in device:
return int(device.split(":")[1])
elif device == "cpu":
return -1
else:
raise ValueError(f"Unknown device: {device}")


def format_quantization_config(quantization_config: Dict[str, Any]) -> None:
"""Format the quantization dictionary for onnxruntime."""
# the conditionals are here because some quantization strategies don't have all the options
Expand Down
9 changes: 3 additions & 6 deletions optimum_benchmark/backends/openvino/config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import importlib.metadata
from dataclasses import dataclass, field
from typing import Any, Dict

from omegaconf import OmegaConf

from ..base import BackendConfig
from ...import_utils import openvino_version
from ..config import BackendConfig

OmegaConf.register_new_resolver(
"openvino_version",
lambda: importlib.metadata.version("openvino"),
)
OmegaConf.register_new_resolver("openvino_version", openvino_version)

# https://github.com/huggingface/optimum-intel/blob/main/optimum/intel/openvino/configuration.py#L81
QUANTIZATION_CONFIG = {
Expand Down
6 changes: 4 additions & 2 deletions optimum_benchmark/backends/pytorch/backned.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from typing import TYPE_CHECKING, Any, Callable, Dict, List

import torch
from accelerate import init_empty_weights
from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model
from optimum.bettertransformer import BetterTransformer
from torch.distributed.elastic.multiprocessing.errors import record
from torch.distributed.launcher.api import LaunchConfig, elastic_launch
Expand Down Expand Up @@ -143,6 +141,8 @@ def load_model_from_pretrained(self) -> None:
def load_model_from_config(self) -> None:
# TODO: create no_weights tests
LOGGER.info("\t+ Initializing empty weights model on device: meta")
from accelerate import init_empty_weights

with init_empty_weights():
self.pretrained_model = self.automodel_class.from_config(
config=self.pretrained_config,
Expand All @@ -156,6 +156,8 @@ def load_model_from_config(self) -> None:
LOGGER.info("\t+ Randomizing model weights")
randomize_weights(self.pretrained_model)
LOGGER.info("\t+ Processing BnB config")
from accelerate.utils import BnbQuantizationConfig, load_and_quantize_model

bnb_quantization_config = BnbQuantizationConfig(
**self.config.quantization_config,
torch_dtype=self.config.torch_dtype,
Expand Down
21 changes: 6 additions & 15 deletions optimum_benchmark/backends/pytorch/config.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
import importlib.metadata
import os
from dataclasses import dataclass, field
from typing import Any, Dict, Optional

from omegaconf import OmegaConf

from ..base import BackendConfig

OmegaConf.register_new_resolver(
"device_count",
lambda: len(os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")),
)
OmegaConf.register_new_resolver(
"is_inference",
lambda benchmark_name: benchmark_name == "inference",
)
OmegaConf.register_new_resolver(
"pytorch_version",
lambda: importlib.metadata.version("torch"),
)
from ...import_utils import torch_version
from ..config import BackendConfig

OmegaConf.register_new_resolver("device_count", lambda: len(os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")))
OmegaConf.register_new_resolver("is_inference", lambda benchmark_name: benchmark_name == "inference")
OmegaConf.register_new_resolver("pytorch_version", torch_version)

DEVICE_MAPS = ["auto", "sequential"]
AMP_DTYPES = ["bfloat16", "float16"]
Expand Down
7 changes: 4 additions & 3 deletions optimum_benchmark/benchmarks/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from abc import ABC
from dataclasses import dataclass
from logging import getLogger
from typing import ClassVar, Generic, TypeVar
from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar

from optimum_benchmark.backends.base import Backend
if TYPE_CHECKING:
from ..backends.base import Backend

LOGGER = getLogger("benchmark")

Expand All @@ -29,7 +30,7 @@ def configure(self, config: BenchmarkConfigT) -> None:
LOGGER.info(f"Configuring {self.NAME} benchmark")
self.config = config

def run(self, backend: Backend) -> None:
def run(self, backend: "Backend") -> None:
raise NotImplementedError("Benchmark must implement run method")

def save(self) -> None:
Expand Down
Loading