From 098788371257d5a85934a36aefb98942f96cc2c6 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:36:07 +0200 Subject: [PATCH 1/7] fix --- src/transformers/pipelines/__init__.py | 44 +++++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index ae6d20265a0a..2434f050a74f 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -33,7 +33,10 @@ from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..tokenization_utils import PreTrainedTokenizer from ..utils import ( + CONFIG_NAME, HUGGINGFACE_CO_RESOLVE_ENDPOINT, + cached_file, + extract_commit_hash, find_adapter_config_file, is_kenlm_available, is_offline_mode, @@ -698,11 +701,14 @@ def pipeline( raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") token = use_auth_token + code_revision = kwargs.pop("code_revision", None) + commit_hash = kwargs.pop("_commit_hash", None) + hub_kwargs = { "revision": revision, "token": token, "trust_remote_code": trust_remote_code, - "_commit_hash": None, + "_commit_hash": commit_hash, } if task is None and model is None: @@ -727,28 +733,42 @@ def pipeline( if isinstance(model, Path): model = str(model) + if hub_kwargs["_commit_hash"] is None: + pretrained_model_name_or_path = None + if isinstance(config, str): + pretrained_model_name_or_path = config + elif config is None and isinstance(model, str): + pretrained_model_name_or_path = model + + if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None: + # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible + resolved_config_file = cached_file( + pretrained_model_name_or_path, + CONFIG_NAME, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + **hub_kwargs, + ) + hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash) + else: + hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None) + # Config is the primordial information item. # Instantiate config if needed if isinstance(config, str): - config = AutoConfig.from_pretrained(config, _from_pipeline=task, **hub_kwargs, **model_kwargs) + config = AutoConfig.from_pretrained(config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs) hub_kwargs["_commit_hash"] = config._commit_hash elif config is None and isinstance(model, str): # Check for an adapter file in the model path if PEFT is available if is_peft_available(): - subfolder = hub_kwargs.get("subfolder", None) - maybe_adapter_path = find_adapter_config_file( - model, - revision=revision, - token=use_auth_token, - subfolder=subfolder, - ) + maybe_adapter_path = find_adapter_config_file(model, **hub_kwargs) if maybe_adapter_path is not None: with open(maybe_adapter_path, "r", encoding="utf-8") as f: adapter_config = json.load(f) model = adapter_config["base_model_name_or_path"] - config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) + config = AutoConfig.from_pretrained(model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs) hub_kwargs["_commit_hash"] = config._commit_hash custom_tasks = {} @@ -769,7 +789,7 @@ def pipeline( "Inferring the task automatically requires to check the hub with a model_id defined as a `str`." f"{model} is not a valid model_id." ) - task = get_task(model, use_auth_token) + task = get_task(model, token) # Retrieve the task if task in custom_tasks: @@ -784,7 +804,7 @@ def pipeline( ) class_ref = targeted_task["impl"] pipeline_class = get_class_from_dynamic_module( - class_ref, model, revision=revision, use_auth_token=use_auth_token + class_ref, model, revision=revision, code_revision=code_revision, **hub_kwargs, ) else: normalized_task, targeted_task, task_options = check_task(task) From 03f45a7cb4badfee8b67355cd3fa16d51850958a Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:37:57 +0200 Subject: [PATCH 2/7] [skip-ci] fix --- src/transformers/pipelines/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 2434f050a74f..f90f0d84e348 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -756,7 +756,9 @@ def pipeline( # Config is the primordial information item. # Instantiate config if needed if isinstance(config, str): - config = AutoConfig.from_pretrained(config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs) + config = AutoConfig.from_pretrained( + config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) hub_kwargs["_commit_hash"] = config._commit_hash elif config is None and isinstance(model, str): # Check for an adapter file in the model path if PEFT is available @@ -768,7 +770,9 @@ def pipeline( adapter_config = json.load(f) model = adapter_config["base_model_name_or_path"] - config = AutoConfig.from_pretrained(model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs) + config = AutoConfig.from_pretrained( + model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) hub_kwargs["_commit_hash"] = config._commit_hash custom_tasks = {} @@ -804,7 +808,11 @@ def pipeline( ) class_ref = targeted_task["impl"] pipeline_class = get_class_from_dynamic_module( - class_ref, model, revision=revision, code_revision=code_revision, **hub_kwargs, + class_ref, + model, + revision=revision, + code_revision=code_revision, + **hub_kwargs, ) else: normalized_task, targeted_task, task_options = check_task(task) From 20756fefd7f528369bf00dd73ce80ae4716d1ea6 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:45:17 +0200 Subject: [PATCH 3/7] [skip-ci] fix --- src/transformers/pipelines/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index f90f0d84e348..bfa5adf3334c 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -763,6 +763,8 @@ def pipeline( elif config is None and isinstance(model, str): # Check for an adapter file in the model path if PEFT is available if is_peft_available(): + # `find_adapter_config_file` doesn't accept `trust_remote_code` + _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"} maybe_adapter_path = find_adapter_config_file(model, **hub_kwargs) if maybe_adapter_path is not None: From 9f2742415ce468cba311e9850bee72aa16d40e4d Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:46:26 +0200 Subject: [PATCH 4/7] [skip-ci] fix --- src/transformers/pipelines/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index bfa5adf3334c..1e8e9c8ea1eb 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -765,7 +765,7 @@ def pipeline( if is_peft_available(): # `find_adapter_config_file` doesn't accept `trust_remote_code` _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"} - maybe_adapter_path = find_adapter_config_file(model, **hub_kwargs) + maybe_adapter_path = find_adapter_config_file(model, **_hub_kwargs) if maybe_adapter_path is not None: with open(maybe_adapter_path, "r", encoding="utf-8") as f: From a3fdc05834eb02fd37d2c7ae1653c4c3b78caa08 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:51:16 +0200 Subject: [PATCH 5/7] [skip-ci] fix --- src/transformers/pipelines/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 1e8e9c8ea1eb..273fc0f9e4a4 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -812,7 +812,6 @@ def pipeline( pipeline_class = get_class_from_dynamic_module( class_ref, model, - revision=revision, code_revision=code_revision, **hub_kwargs, ) From 8141bc3091fa710b951de57acc607294c9ceddc9 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 25 Sep 2023 15:53:44 +0200 Subject: [PATCH 6/7] fix From d07c8ee80570ca4034f56f78096d59490ff85907 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Tue, 26 Sep 2023 11:49:21 +0200 Subject: [PATCH 7/7] fix --- src/transformers/pipelines/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 273fc0f9e4a4..98d54e4a9804 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -733,7 +733,7 @@ def pipeline( if isinstance(model, Path): model = str(model) - if hub_kwargs["_commit_hash"] is None: + if commit_hash is None: pretrained_model_name_or_path = None if isinstance(config, str): pretrained_model_name_or_path = config @@ -765,7 +765,12 @@ def pipeline( if is_peft_available(): # `find_adapter_config_file` doesn't accept `trust_remote_code` _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"} - maybe_adapter_path = find_adapter_config_file(model, **_hub_kwargs) + maybe_adapter_path = find_adapter_config_file( + model, + token=hub_kwargs["token"], + revision=hub_kwargs["revision"], + _commit_hash=hub_kwargs["_commit_hash"], + ) if maybe_adapter_path is not None: with open(maybe_adapter_path, "r", encoding="utf-8") as f: