Skip to content

Commit 70f7fc6

Browse files
authored
Adds a dummy/random model for baseline init (#220)
1 parent 0528f29 commit 70f7fc6

File tree

5 files changed

+141
-7
lines changed

5 files changed

+141
-7
lines changed

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,17 @@ python run_evals_accelerate.py \
239239
--output_dir "./evals"
240240
```
241241

242+
### Using the dummy model
243+
To debug or obtain random baseline scores for a given set of tasks, you can use the `dummy` model:
244+
```shell
245+
python run_evals_accelerate.py \
246+
--model_args "dummy"\
247+
--tasks <task parameters> \
248+
--output_dir output_dir
249+
```
250+
This "model" randomly generates logprobs (for selection/accuracy tasks) and the string "random baseline" for generation tasks.
251+
You can also select a specific seed for the random logprob values generated by the dummy model: `--model_args "dummy,seed=123"`.
252+
242253
## Deep thanks
243254
`lighteval` was originally built on top of the great [Eleuther AI Harness](https://github.com/EleutherAI/lm-evaluation-harness) (we use the latter to power the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)). We also took a lot of inspiration from the amazing [HELM](https://crfm.stanford.edu/helm/latest/), notably for metrics.
244255

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# MIT License
2+
#
3+
# Copyright (c) 2024 The HuggingFace Team
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
# inspired by https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/models/dummy.py
24+
25+
import random
26+
from typing import Optional
27+
28+
from transformers import AutoTokenizer
29+
30+
from lighteval.models.abstract_model import LightevalModel
31+
from lighteval.models.model_config import DummyModelConfig, EnvConfig
32+
from lighteval.models.model_output import GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn
33+
from lighteval.tasks.requests import (
34+
GreedyUntilRequest,
35+
LoglikelihoodRequest,
36+
LoglikelihoodRollingRequest,
37+
LoglikelihoodSingleTokenRequest,
38+
)
39+
40+
41+
class DummyModel(LightevalModel):
42+
"""Dummy model to generate random baselines."""
43+
44+
def __init__(
45+
self,
46+
config: DummyModelConfig,
47+
env_config: EnvConfig,
48+
):
49+
self.config = config
50+
self.env_config = env_config
51+
self._random = random.Random(self.config.seed)
52+
self._tokenizer = None
53+
54+
@property
55+
def tokenizer(self):
56+
if not self._tokenizer:
57+
self._tokenizer = AutoTokenizer.from_pretrained("gpt2")
58+
return self._tokenizer
59+
60+
@property
61+
def add_special_tokens(self):
62+
return False
63+
64+
@property
65+
def max_length(self) -> int:
66+
return 2048
67+
68+
def greedy_until(
69+
self, requests: list[GreedyUntilRequest], override_bs: Optional[int] = None
70+
) -> list[GenerateReturn]:
71+
return [GenerateReturn(result="random baseline") for _ in range(len(requests))]
72+
73+
def loglikelihood(
74+
self, requests: list[LoglikelihoodRequest], override_bs: Optional[int] = None
75+
) -> list[LoglikelihoodReturn]:
76+
return [LoglikelihoodReturn((-self._random.random(), False)) for _ in requests]
77+
78+
def loglikelihood_rolling(
79+
self, requests: list[LoglikelihoodRollingRequest], override_bs: Optional[int] = None
80+
) -> list[LoglikelihoodReturn]:
81+
return [LoglikelihoodReturn((-self._random.random(), False)) for _ in requests]
82+
83+
def loglikelihood_single_token(
84+
self, requests: list[LoglikelihoodSingleTokenRequest], override_bs: Optional[int] = None
85+
) -> list[LoglikelihoodSingleTokenReturn]:
86+
return [
87+
LoglikelihoodSingleTokenReturn(result=[-self._random.random() for _ in req.tokenized_continuation])
88+
for req in requests
89+
]

src/lighteval/models/model_config.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,11 @@ class TGIModelConfig:
203203
model_id: str
204204

205205

206+
@dataclass
207+
class DummyModelConfig:
208+
seed: int = 42
209+
210+
206211
@dataclass
207212
class InferenceModelConfig:
208213
model: str
@@ -253,7 +258,16 @@ def nullable_keys() -> list[str]:
253258
return ["namespace", "env_vars", "image_url"]
254259

255260

256-
def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]) -> BaseModelConfig: # noqa: C901
261+
def create_model_config( # noqa: C901
262+
args: Namespace, accelerator: Union["Accelerator", None]
263+
) -> Union[
264+
BaseModelConfig,
265+
AdapterModelConfig,
266+
DeltaModelConfig,
267+
TGIModelConfig,
268+
InferenceEndpointModelConfig,
269+
DummyModelConfig,
270+
]:
257271
"""
258272
Create a model configuration based on the provided arguments.
259273
@@ -262,7 +276,7 @@ def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]
262276
accelerator (Union[Accelerator, None]): accelerator to use for model training.
263277
264278
Returns:
265-
BaseModelConfig: model configuration.
279+
Union[BaseModelConfig, AdapterModelConfig, DeltaModelConfig, TGIModelConfig, InferenceEndpointModelConfig, DummyModelConfig]: model configuration.
266280
267281
Raises:
268282
ValueError: If both an inference server address and model arguments are provided.
@@ -271,7 +285,11 @@ def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]
271285
ValueError: If a base model is specified when not using delta weights or adapter weights.
272286
"""
273287
if args.model_args:
274-
args_dict = {k.split("=")[0]: k.split("=")[1] for k in args.model_args.split(",")}
288+
args_dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in args.model_args.split(",")}
289+
290+
if args_dict.pop("dummy", False):
291+
return DummyModelConfig(**args_dict)
292+
275293
args_dict["accelerator"] = accelerator
276294
args_dict["use_chat_template"] = args.use_chat_template
277295

src/lighteval/models/model_loader.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@
2727
from lighteval.models.adapter_model import AdapterModel
2828
from lighteval.models.base_model import BaseModel
2929
from lighteval.models.delta_model import DeltaModel
30+
from lighteval.models.dummy_model import DummyModel
3031
from lighteval.models.endpoint_model import InferenceEndpointModel
3132
from lighteval.models.model_config import (
3233
AdapterModelConfig,
3334
BaseModelConfig,
3435
DeltaModelConfig,
36+
DummyModelConfig,
3537
EnvConfig,
3638
InferenceEndpointModelConfig,
3739
InferenceModelConfig,
@@ -54,9 +56,16 @@ class ModelInfo:
5456

5557

5658
def load_model( # noqa: C901
57-
config: Union[BaseModelConfig, AdapterModelConfig, DeltaModelConfig, TGIModelConfig, InferenceEndpointModelConfig],
59+
config: Union[
60+
BaseModelConfig,
61+
AdapterModelConfig,
62+
DeltaModelConfig,
63+
TGIModelConfig,
64+
InferenceEndpointModelConfig,
65+
DummyModelConfig,
66+
],
5867
env_config: EnvConfig,
59-
) -> Tuple[Union[BaseModel, AdapterModel, DeltaModel, ModelClient], ModelInfo]:
68+
) -> Tuple[Union[BaseModel, AdapterModel, DeltaModel, ModelClient, DummyModel], ModelInfo]:
6069
"""Will load either a model from an inference server or a model from a checkpoint, depending
6170
on the config type.
6271
@@ -82,6 +91,9 @@ def load_model( # noqa: C901
8291
if isinstance(config, BaseModelConfig):
8392
return load_model_with_accelerate_or_default(config=config, env_config=env_config)
8493

94+
if isinstance(config, DummyModelConfig):
95+
return load_dummy_model(config=config, env_config=env_config)
96+
8597

8698
def load_model_with_tgi(config: TGIModelConfig):
8799
if not is_tgi_available():
@@ -143,3 +155,7 @@ def load_model_with_accelerate_or_default(
143155
hlog(f"Model info: {model_info}")
144156

145157
return model, model_info
158+
159+
160+
def load_dummy_model(config: DummyModelConfig, env_config: EnvConfig):
161+
return DummyModel(config=config, env_config=env_config), ModelInfo(model_name="dummy", model_sha=str(config.seed))

src/lighteval/models/model_output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ class ModelReturn:
3131
result: Union[tuple, list, str]
3232
input_tokens: list[int] = field(default_factory=list) # model inputs
3333
generated_tokens: list[int] = field(default_factory=list) # model generations
34-
truncated_tokens_count: Optional[int] = None # How many tokens truncated
35-
padded_tokens_count: Optional[int] = None # How many tokens of padding
34+
truncated_tokens_count: Optional[int] = 0 # How many tokens truncated
35+
padded_tokens_count: Optional[int] = 0 # How many tokens of padding
3636

3737
def get_result_for_eval(self):
3838
raise NotImplementedError()

0 commit comments

Comments
 (0)