Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fast_llm/engine/evaluation/lm_eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setup(

self._flm_wrapper = FastLLMLmEvalWrapper(
model=self._hf_model,
tokenizer=self._config.tokenizer.get_tokenizer(),
tokenizer=self._config.tokenizer.get_tokenizer().tokenizer,
truncation=self._config.truncation,
logits_cache=self._config.logits_cache,
add_bos_token=self._config.add_bos_token,
Expand Down
32 changes: 32 additions & 0 deletions fast_llm/models/gpt/conversion/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from fast_llm.engine.checkpoint.config import CheckpointFormat
from fast_llm.layers.attention.config import AttentionConfig
from fast_llm.layers.decoder.mlp.config import MLPConfig
from fast_llm.models.gpt.conversion.config import Qwen2CheckpointFormat
from fast_llm.models.gpt.conversion.llama import (
LlamaAttentionConverter,
Expand All @@ -10,13 +11,30 @@
LlamaDecoderConverter,
LlamaHeadConverter,
LlamaHuggingfaceCheckpointHandler,
LlamaMLPConverter,
)
from fast_llm.utils import Assert


class Qwen2AttentionConverter(LlamaAttentionConverter):
# TODO: Support sliding window with max_window_layers (need 2 kinds of block?)

@classmethod
def import_config(cls, config: dict) -> dict:
config["attention_bias"] = True
out = super().import_config(config)
out["query_layer"] = {"bias": {"enabled": True}}
out["key_layer"] = {"bias": {"enabled": True}}
out["value_layer"] = {"bias": {"enabled": True}}
out["dense_layer"] = {"bias": {"enabled": False}}
return out

@classmethod
def export_config(cls, config: AttentionConfig) -> dict:
out = super().export_config(config)
del out["attention_bias"]
return out

@classmethod
def _check_config(cls, config: AttentionConfig) -> None:
Assert.is_(type(config), AttentionConfig)
Expand All @@ -33,8 +51,22 @@ def _check_config(cls, config: AttentionConfig) -> None:
Assert.incl(config.dense_layer.bias.enabled, (None, False))


class Qwen2MLPConverter(LlamaMLPConverter):
@classmethod
def import_config(cls, config: dict) -> dict:
config["mlp_bias"] = False
return super().import_config(config)

@classmethod
def export_config(cls, config: MLPConfig) -> dict:
out = super().export_config(config)
del out["mlp_bias"]
return out


class Qwen2BlockConverter(LlamaBlockConverter):
mixer_converter_class: typing.ClassVar[type[Qwen2AttentionConverter]] = Qwen2AttentionConverter
mlp_converter_class: typing.ClassVar[type[Qwen2MLPConverter]] = Qwen2MLPConverter


class Qwen2DecoderConverter(LlamaDecoderConverter):
Expand Down
11 changes: 9 additions & 2 deletions tests/models/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from fast_llm.models.gpt.config import PretrainedGPTModelConfig
from fast_llm.models.gpt.conversion.config import LlamaCheckpointFormat
from fast_llm.models.gpt.huggingface import HuggingfaceGPTModelForCausalLM
from tests.utils.distributed_configs import DistributedTestingConfig
from tests.utils.model_configs import ModelTestingGroup
from tests.utils.utils import requires_cuda

Expand Down Expand Up @@ -244,13 +245,19 @@ def test_export_for_generate(run_test_script_for_all_models, model_testing_confi
# Not really testing, anything, but handles dependencies more easily than a fixture.
if model_testing_config.checkpoint_format is None:
pytest.skip(f"Conversion not supported for {model_testing_config.name}")
run_test_script_for_all_models(
[
if torch.cuda.device_count() < 1:
pytest.skip(f"Not enough gpus to run the test")

distr_config = DistributedTestingConfig(
name=model_testing_config.name,
config_args=[
"training.train_iters=1",
f"training.export.format={model_testing_config.checkpoint_format.name}",
"training.export.interval=1",
],
num_gpus=1,
)
run_test_script_for_all_models(distr_config)


@pytest.mark.slow
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_lm_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def do_get_lm_eval_config(base_path):

task_dir = pathlib.Path(lm_eval.tasks.__file__).parent.resolve()
return [
f"data.tokenizer.path={tokenizer_path}",
f"model.base_model.vocab_size=49157",
f"training.evaluators.evaluation_test.evaluator.tokenizer.path={tokenizer_path}",
f"model.base_model.embeddings.vocab_size=49157",
"training.evaluators.evaluation_test.interval=2",
"training.evaluators.evaluation_test.evaluator.type=lm_eval",
"training.evaluators.evaluation_test.evaluator.cli_args="
Expand Down
13 changes: 8 additions & 5 deletions tests/utils/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,17 +470,20 @@ def _update_and_add_testing_config(
"qwen_2",
# TODO: replace
updates={
("model", "base_model", "decoder", "block", "add_linear_biases"): "only_attn_qkv",
("model", "base_model", "decoder", "block", "mixer", "query_layer", "bias", "enabled"): True,
("model", "base_model", "decoder", "block", "mixer", "key_layer", "bias", "enabled"): True,
("model", "base_model", "decoder", "block", "mixer", "value_layer", "bias", "enabled"): True,
("model", "base_model", "decoder", "block", "mixer", "dense_layer", "bias", "enabled"): False,
},
# Megatron doesn't support per sub layer biases.
megatron_args=None,
checkpoint_format=Qwen2CheckpointFormat,
# TODO: Add back generate as `normal` when stable.
groups={
ModelTestingGroup.basic: ModelTestingGroupAction.broken,
ModelTestingGroup.checkpoint: ModelTestingGroupAction.broken,
ModelTestingGroup.convert: ModelTestingGroupAction.broken,
ModelTestingGroup.generate: ModelTestingGroupAction.broken,
ModelTestingGroup.basic: ModelTestingGroupAction.normal,
ModelTestingGroup.checkpoint: ModelTestingGroupAction.normal,
ModelTestingGroup.convert: ModelTestingGroupAction.normal,
ModelTestingGroup.generate: ModelTestingGroupAction.normal,
ModelTestingGroup.megatron: ModelTestingGroupAction.not_implemented,
ModelTestingGroup.distributed: ModelTestingGroupAction.unimportant,
},
Expand Down