Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/lighteval/logging/hierarchical_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@

logger = get_logger(__name__, log_level="INFO")
elif is_accelerate_available():
from accelerate import Accelerator, InitProcessGroupKwargs
from accelerate.logging import get_logger

# We must init the accelerator before using the logger
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
logger = get_logger(__name__, log_level="INFO")
else:
logger = Logger(__name__, level="INFO")
Expand Down
9 changes: 8 additions & 1 deletion src/lighteval/models/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@ class AdapterModel(BaseModel):
def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer:
# By default, we look at the model config for the model stored in `base_model`
# (= the parent model, not the model of interest)
return self._create_auto_tokenizer_with_name(config.base_model, config=config, env_config=env_config)
return self._create_auto_tokenizer_with_name(
model_name=config.base_model,
revision=config.revision,
env_config=env_config,
tokenizer_name=config.tokenizer,
subfolder=config.subfolder,
trust_remote_code=config.trust_remote_code,
)

def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM:
"""Returns a PeftModel from a base model and a version fined tuned using PEFT."""
Expand Down
109 changes: 100 additions & 9 deletions src/lighteval/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES

from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset
from lighteval.logging.hierarchical_logger import hlog, hlog_err, hlog_warn
Expand Down Expand Up @@ -57,6 +58,7 @@


if is_accelerate_available():
from accelerate import Accelerator
from accelerate.utils import calculate_maximum_sizes, convert_bytes, get_max_memory

os.environ["TOKENIZERS_PARALLELISM"] = "false"
Expand All @@ -67,8 +69,8 @@
class BaseModel(LightevalModel):
def __init__(
self,
config: BaseModelConfig,
env_config: EnvConfig,
config: BaseModelConfig,
):
"""Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation."""
self._config = config.init_configs(env_config)
Expand Down Expand Up @@ -114,6 +116,72 @@ def __init__(

self.pairwise_tokenization = config.pairwise_tokenization

@classmethod
def from_model(
cls,
model: Union[AutoModelForCausalLM, LightevalModel],
env_config: EnvConfig,
accelerator: "Accelerator" = None,
tokenizer_name: str = None, # custom tokenizer
trust_remote_code: bool = False,
use_chat_template: bool = False,
add_special_tokens: bool = True,
pairwise_tokenization: bool = False,
multichoice_continuations_start_space: bool = None,
):
# Slightly hackish way to test if the model is a AutoModelForCausalLM, since the instances don't
# derive from this class explicitely
assert isinstance(model, LightevalModel) or type(model).__name__ in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values()

if isinstance(model, LightevalModel):
return model

# Instanciate the object without using __init__
self = cls.__new__(cls)
self._config = model.config
self._max_length = self._init_max_length(max_length=model.config.max_length)
self._tokenizer = self._create_auto_tokenizer_with_name(
model_name=model.name_or_path,
revision=model.config._commit_hash,
env_config=env_config,
trust_remote_code=trust_remote_code,
tokenizer_name=tokenizer_name,
)
self.model_name = _simplify_name(model.name_or_path)
self.model_sha = model.config._commit_hash

# If model_parallel is not set we compare the number of processes with the number of GPUs
self.model = model
self.model.eval()
torch.set_grad_enabled(False)

self.accelerator = accelerator
if accelerator is not None:
self._device = accelerator.device
self.model = self.accelerator.prepare(self.model.to(accelerator.device))
else:
self._device = "cpu"

self.use_chat_template = use_chat_template
self._add_special_tokens = add_special_tokens if add_special_tokens is not None else False
self.pairwise_tokenization = pairwise_tokenization
self.multichoice_continuations_start_space = multichoice_continuations_start_space

self.precision = _get_dtype(model.dtype, config=self._config)

if is_accelerate_available():
model_size, _ = calculate_maximum_sizes(self.model)
model_size = convert_bytes(model_size)
else:
model_size = -1
self.model_info = ModelInfo(
model_name=self.model_name,
model_sha=self.model_sha,
model_dtype=self.precision,
model_size=model_size,
)
return self

@property
def tokenizer(self):
return self._tokenizer
Expand Down Expand Up @@ -207,10 +275,23 @@ def _create_auto_model(self, config: BaseModelConfig, env_config: EnvConfig) ->
def _create_auto_tokenizer(
self, config: BaseModelConfig, env_config: EnvConfig
) -> transformers.PreTrainedTokenizer:
return self._create_auto_tokenizer_with_name(config.pretrained, config=config, env_config=env_config)
return self._create_auto_tokenizer_with_name(
model_name=config.pretrained,
revision=config.revision,
env_config=env_config,
tokenizer_name=config.tokenizer,
subfolder=config.subfolder,
trust_remote_code=config.trust_remote_code,
)

def _create_auto_tokenizer_with_name(
self, model_name: str, config: BaseModelConfig, env_config: EnvConfig
self,
model_name: str,
revision: str,
env_config: EnvConfig,
tokenizer_name: str = None,
subfolder: str = None,
trust_remote_code: bool = False,
) -> transformers.PreTrainedTokenizer:
"""
Create a Hugging Face AutoTokenizer for language model.
Expand All @@ -231,25 +312,35 @@ def _create_auto_tokenizer_with_name(
"""
try:
tokenizer = AutoTokenizer.from_pretrained(
model_name if config.tokenizer is None else config.tokenizer,
revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""),
model_name if tokenizer_name is None else tokenizer_name,
revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
cache_dir=env_config.cache_dir,
token=env_config.token,
trust_remote_code=config.trust_remote_code,
trust_remote_code=trust_remote_code,
padding_side="left",
truncation_side="left",
)
except RecursionError:
tokenizer = AutoTokenizer.from_pretrained(
model_name if config.tokenizer is None else config.tokenizer,
revision=config.revision + (f"/{config.subfolder}" if config.subfolder is not None else ""),
model_name if tokenizer_name is None else tokenizer_name,
revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
cache_dir=env_config.cache_dir,
token=env_config.token,
trust_remote_code=config.trust_remote_code,
trust_remote_code=trust_remote_code,
unk_token="<unk>",
padding_side="left",
truncation_side="left",
)
except FileNotFoundError:
hlog_warn("Problem when loading the tokenizer in the cache - discarding the provided cache path value.")
tokenizer = AutoTokenizer.from_pretrained(
model_name if tokenizer_name is None else tokenizer_name,
revision=revision + (f"/{subfolder}" if subfolder is not None else ""),
token=env_config.token,
trust_remote_code=trust_remote_code,
padding_side="left",
truncation_side="left",
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = self.max_length
hlog("Tokenizer truncation and padding size set to the left side.")
Expand Down
12 changes: 10 additions & 2 deletions src/lighteval/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.logging.hierarchical_logger import hlog, htrack_block
from lighteval.metrics.utils.metric_utils import MetricCategory
from lighteval.models.model_loader import load_model
from lighteval.models.model_loader import BaseModel, load_model
from lighteval.models.model_output import ModelResponse
from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks
from lighteval.tasks.registry import Registry, taskinfo_selector
Expand Down Expand Up @@ -164,7 +164,15 @@ def _init_model(self, model_config, model):
)
else:
return load_model(config=model_config, env_config=self.pipeline_parameters.env_config)
return model
if isinstance(model, BaseModel):
return model
else:
return BaseModel.from_model(
model=model,
use_chat_template=self.pipeline_parameters.use_chat_template,
env_config=self.pipeline_parameters.env_config,
accelerator=self.accelerator,
)

def _init_tasks_and_requests(self, tasks: str):
with htrack_block("Tasks loading"):
Expand Down
Loading