From 53052d8dcb71d6136c3c4f93447d8ff3c32696e8 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Mon, 6 Feb 2023 19:29:05 +0000 Subject: [PATCH 01/15] v1 fix --- src/transformers/pipelines/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 3905d28d26d2..c20bb144a5cc 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -780,7 +780,9 @@ def __init__( # Special handling if self.framework == "pt" and self.device.type != "cpu": - self.model = self.model.to(self.device) + # there is no need to call `.to` on a model that has been loaded with `accelerate` + if not hasattr(self.model, "hf_device_map"): + self.model = self.model.to(self.device) # Update config with task specific parameters task_specific_params = self.model.config.task_specific_params From bdcfb266b84866b8346df9587eb57d9f3f4be135 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 8 Feb 2023 19:52:31 +0000 Subject: [PATCH 02/15] adapt from suggestions --- src/transformers/pipelines/base.py | 33 ++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index c20bb144a5cc..bc5470bac296 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -749,7 +749,7 @@ def __init__( framework: Optional[str] = None, task: str = "", args_parser: ArgumentHandler = None, - device: Union[int, str, "torch.device"] = -1, + device: Union[int, str, "torch.device"] = None, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, binary_output: bool = False, **kwargs, @@ -769,7 +769,7 @@ def __init__( self.device = device elif isinstance(device, str): self.device = torch.device(device) - elif device < 0: + elif device is None or device < 0: self.device = torch.device("cpu") else: self.device = torch.device(f"cuda:{device}") @@ -779,10 +779,31 @@ def __init__( self.binary_output = binary_output # Special handling - if self.framework == "pt" and self.device.type != "cpu": - # there is no need to call `.to` on a model that has been loaded with `accelerate` - if not hasattr(self.model, "hf_device_map"): - self.model = self.model.to(self.device) + if self.framework == "pt" and device is not None: + self.model = self.model.to(device=device) + + hf_device_map = getattr(self.model, "hf_device_map", None) + if hf_device_map is not None: + logger.warning( + "The model has been loaded with `accelerate` using `device_map=xxx` in `from_pretrained`" + " method, you should not pass a device when initializing your pipeline." + ) + + if device is None and self.framework == "pt": + # `accelerate` device map + hf_device_map = getattr(self.model, "hf_device_map", None) + if hf_device_map is not None: + # Take the main device used by `accelerate`. + # adapted from: https://github.com/huggingface/transformers/pull/21479#issuecomment-1420833512 + if set(hf_device_map.values()) == {"cpu"} or set(hf_device_map.values()) == {"cpu", "disk"}: + accelerate_device = torch.device("cpu") + else: + main_device = [d for d in hf_device_map.values() if d not in ["cpu", "disk"]][0] + accelerate_device = torch.device(f"cuda:{main_device}") + + self.device = accelerate_device + else: + self.device = torch.device("cpu") # Update config with task specific parameters task_specific_params = self.model.config.task_specific_params From dab8d808300d34d7572e5f9d605b5fbfc5de57af Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 8 Feb 2023 19:54:21 +0000 Subject: [PATCH 03/15] make style --- src/transformers/pipelines/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index bc5470bac296..551309fcc86a 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -800,7 +800,7 @@ def __init__( else: main_device = [d for d in hf_device_map.values() if d not in ["cpu", "disk"]][0] accelerate_device = torch.device(f"cuda:{main_device}") - + self.device = accelerate_device else: self.device = torch.device("cpu") @@ -1071,8 +1071,10 @@ def __call__(self, inputs, *args, num_workers=None, batch_size=None, **kwargs): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a" - " dataset", + ( + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" + " use a dataset" + ), UserWarning, ) From 420940a5b36c2cfc81cb7e6f521e5942c06e8723 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 07:57:40 +0000 Subject: [PATCH 04/15] fix tests --- src/transformers/pipelines/base.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 551309fcc86a..85301f97e175 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -780,7 +780,7 @@ def __init__( # Special handling if self.framework == "pt" and device is not None: - self.model = self.model.to(device=device) + self.model = self.model.to(device=self.device) hf_device_map = getattr(self.model, "hf_device_map", None) if hf_device_map is not None: @@ -1071,10 +1071,8 @@ def __call__(self, inputs, *args, num_workers=None, batch_size=None, **kwargs): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - ( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" - " use a dataset" - ), + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" + " use a dataset", UserWarning, ) From 62cf7df817791bb67d0fd4e119384e1cd040f7e8 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 08:04:41 +0000 Subject: [PATCH 05/15] add gpu tests --- .../test_pipelines_text_generation.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 2e97810e7101..763288bfaf98 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -14,7 +14,14 @@ import unittest -from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TextGenerationPipeline, pipeline +from transformers import ( + MODEL_FOR_CAUSAL_LM_MAPPING, + TF_MODEL_FOR_CAUSAL_LM_MAPPING, + AutoModelForCausalLM, + AutoTokenizer, + TextGenerationPipeline, + pipeline, +) from transformers.testing_utils import ( require_accelerate, require_tf, @@ -312,3 +319,17 @@ def test_small_model_fp16(self): pipe = pipeline(model="hf-internal-testing/tiny-random-bloom", device=0, torch_dtype=torch.float16) pipe("This is a test") + + @require_torch + @require_accelerate + @require_torch_gpu + def test_pipeline_accelerate_top_p(self): + import torch + + model_id = "hf-internal-testing/tiny-random-bloom" + + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16) + tokenizer = AutoTokenizer.from_pretrained(model_id) + + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + pipe("This is a test", do_sample=True, top_p=0.5) From 9bbbaea4dfa7c90071850cfcb6ee02c97e1ca0a8 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 08:13:41 +0000 Subject: [PATCH 06/15] update docs --- docs/source/en/pipeline_tutorial.mdx | 37 +++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/source/en/pipeline_tutorial.mdx b/docs/source/en/pipeline_tutorial.mdx index 8560d856f39e..f7fe9e7597bb 100644 --- a/docs/source/en/pipeline_tutorial.mdx +++ b/docs/source/en/pipeline_tutorial.mdx @@ -105,6 +105,8 @@ If the model is too large for a single GPU, you can set `device_map="auto"` to a generator(model="openai/whisper-large", device_map="auto") ``` +Note that if `device_map="auto"` is passed, there is no need to add the argument `device=device` when instantiating your `pipeline` as you may encounter some unexpected behavior! + ### Batch size By default, pipelines will not batch inference for reasons explained in detail [here](https://huggingface.co/docs/transformers/main_classes/pipelines#pipeline-batching). The reason is that batching is not necessarily faster, and can actually be quite slower in some cases. @@ -257,4 +259,37 @@ sudo apt install -y tesseract-ocr pip install pytesseract ``` - \ No newline at end of file + + +## Using `pipeline` on large models with 🤗 `accelerate`: + +You can easily run `pipeline` on large models using 🤗 `accelerate`! First make sure you have installed `accelerate` with `pip install accelerate`. + +Let's assume you fullfill the hardware requirements to run a large model such as `bloom` (which has 176B parameters, so ~350GB in `bfloat16`). First load your model +using `device_map="auto"` + +```py +# pip install accelerate +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", torch_dtype=torch.bfloat16, device_map="auto") +tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom") + +pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) +output = pipe("This is a cool example!", do_sample=True, top_p=0.95) +``` + +You can also pass 8-bit loaded models if you install `bitsandbytes` and add the argument `load_in_8bit=True` + +```py +# pip install accelerate bitsandbytes +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", load_in_8bit=True) +tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom") + +pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) +output = pipe("This is a cool example!", do_sample=True, top_p=0.95) +``` \ No newline at end of file From 8d730f8eab4344db23ae01fa752a1c29a68804cf Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 08:14:50 +0000 Subject: [PATCH 07/15] fix other tests --- src/transformers/pipelines/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 85301f97e175..586bfccb147e 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -774,7 +774,7 @@ def __init__( else: self.device = torch.device(f"cuda:{device}") else: - self.device = device + self.device = device if device is not None else -1 self.torch_dtype = torch_dtype self.binary_output = binary_output From 80e50c9251eacf166066d161a752366d55f6387a Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Thu, 9 Feb 2023 18:15:19 +0100 Subject: [PATCH 08/15] Apply suggestions from code review Co-authored-by: Nicolas Patry --- docs/source/en/pipeline_tutorial.mdx | 14 ++++---------- tests/pipelines/test_pipelines_text_generation.py | 7 +------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/docs/source/en/pipeline_tutorial.mdx b/docs/source/en/pipeline_tutorial.mdx index f7fe9e7597bb..0171e19e91c5 100644 --- a/docs/source/en/pipeline_tutorial.mdx +++ b/docs/source/en/pipeline_tutorial.mdx @@ -271,12 +271,9 @@ using `device_map="auto"` ```py # pip install accelerate import torch -from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline +from transformers import pipeline -model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", torch_dtype=torch.bfloat16, device_map="auto") -tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom") - -pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) +pipe = pipeline(model="bigscience/bloom", torch_dtype=torch.bfloat16, device_map="auto") output = pipe("This is a cool example!", do_sample=True, top_p=0.95) ``` @@ -285,11 +282,8 @@ You can also pass 8-bit loaded models if you install `bitsandbytes` and add the ```py # pip install accelerate bitsandbytes import torch -from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline - -model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", load_in_8bit=True) -tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom") +from transformers import pipeline -pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) +pipe = pipeline(model="bigscience/bloom", device_map="auto", model_kwargs={"load_in_8bit":True}) output = pipe("This is a cool example!", do_sample=True, top_p=0.95) ``` \ No newline at end of file diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 763288bfaf98..71451250a9ee 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -326,10 +326,5 @@ def test_small_model_fp16(self): def test_pipeline_accelerate_top_p(self): import torch - model_id = "hf-internal-testing/tiny-random-bloom" - - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16) - tokenizer = AutoTokenizer.from_pretrained(model_id) - - pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + pipe = pipeline(model="hf-internal-testing/tiny-random-bloom", device_map="auto", torch_dtype=torch.float16) pipe("This is a test", do_sample=True, top_p=0.5) From 8714b5effb98cc35f6b7161ee10a029adb3bc537 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 17:59:42 +0000 Subject: [PATCH 09/15] better fix --- src/transformers/pipelines/__init__.py | 5 +++ src/transformers/pipelines/base.py | 48 ++++++++++---------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index e14d74457990..3d42d483b75d 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -741,6 +741,11 @@ def pipeline( 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those' " arguments might conflict, use only one.)" ) + if device is not None: + logger.warning( + "Both `device` and `device_map` are specified. `device` will override `device_map`. You" + " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`." + ) model_kwargs["device_map"] = device_map if torch_dtype is not None: if "torch_dtype" in model_kwargs: diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 586bfccb147e..d7037744dfdb 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -764,12 +764,25 @@ def __init__( self.image_processor = image_processor self.modelcard = modelcard self.framework = framework + + if self.framework == "pt" and device is not None: + self.model = self.model.to(device=device) + + if device is None: + # `accelerate` device map + hf_device_map = getattr(self.model, "hf_device_map", None) + if hf_device_map is not None: + # Take the first device used by `accelerate`. + device = next(iter(hf_device_map.values())) + else: + device = -1 + if is_torch_available() and self.framework == "pt": if isinstance(device, torch.device): self.device = device elif isinstance(device, str): self.device = torch.device(device) - elif device is None or device < 0: + elif device < 0: self.device = torch.device("cpu") else: self.device = torch.device(f"cuda:{device}") @@ -778,33 +791,6 @@ def __init__( self.torch_dtype = torch_dtype self.binary_output = binary_output - # Special handling - if self.framework == "pt" and device is not None: - self.model = self.model.to(device=self.device) - - hf_device_map = getattr(self.model, "hf_device_map", None) - if hf_device_map is not None: - logger.warning( - "The model has been loaded with `accelerate` using `device_map=xxx` in `from_pretrained`" - " method, you should not pass a device when initializing your pipeline." - ) - - if device is None and self.framework == "pt": - # `accelerate` device map - hf_device_map = getattr(self.model, "hf_device_map", None) - if hf_device_map is not None: - # Take the main device used by `accelerate`. - # adapted from: https://github.com/huggingface/transformers/pull/21479#issuecomment-1420833512 - if set(hf_device_map.values()) == {"cpu"} or set(hf_device_map.values()) == {"cpu", "disk"}: - accelerate_device = torch.device("cpu") - else: - main_device = [d for d in hf_device_map.values() if d not in ["cpu", "disk"]][0] - accelerate_device = torch.device(f"cuda:{main_device}") - - self.device = accelerate_device - else: - self.device = torch.device("cpu") - # Update config with task specific parameters task_specific_params = self.model.config.task_specific_params if task_specific_params is not None and task in task_specific_params: @@ -1071,8 +1057,10 @@ def __call__(self, inputs, *args, num_workers=None, batch_size=None, **kwargs): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" - " use a dataset", + ( + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" + " use a dataset" + ), UserWarning, ) From e5b3dc0bf76280f642e22b63cf4df5b4721ce54f Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 18:01:19 +0000 Subject: [PATCH 10/15] make fixup --- src/transformers/pipelines/base.py | 6 ++---- tests/pipelines/test_pipelines_text_generation.py | 9 +-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index d7037744dfdb..ae3d88229c2c 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1057,10 +1057,8 @@ def __call__(self, inputs, *args, num_workers=None, batch_size=None, **kwargs): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - ( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" - " use a dataset" - ), + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" + " use a dataset", UserWarning, ) diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 71451250a9ee..1f329926813f 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -14,14 +14,7 @@ import unittest -from transformers import ( - MODEL_FOR_CAUSAL_LM_MAPPING, - TF_MODEL_FOR_CAUSAL_LM_MAPPING, - AutoModelForCausalLM, - AutoTokenizer, - TextGenerationPipeline, - pipeline, -) +from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TextGenerationPipeline, pipeline from transformers.testing_utils import ( require_accelerate, require_tf, From adf3ca4d8d147286fa8890854b320e260a6a269a Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 18:03:58 +0000 Subject: [PATCH 11/15] better example --- docs/source/en/pipeline_tutorial.mdx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/en/pipeline_tutorial.mdx b/docs/source/en/pipeline_tutorial.mdx index 0171e19e91c5..00dceeb4f243 100644 --- a/docs/source/en/pipeline_tutorial.mdx +++ b/docs/source/en/pipeline_tutorial.mdx @@ -265,15 +265,14 @@ pip install pytesseract You can easily run `pipeline` on large models using 🤗 `accelerate`! First make sure you have installed `accelerate` with `pip install accelerate`. -Let's assume you fullfill the hardware requirements to run a large model such as `bloom` (which has 176B parameters, so ~350GB in `bfloat16`). First load your model -using `device_map="auto"` +First load your model using `device_map="auto"`! We will use `facebook/opt-1.3b` for our example. ```py # pip install accelerate import torch from transformers import pipeline -pipe = pipeline(model="bigscience/bloom", torch_dtype=torch.bfloat16, device_map="auto") +pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto") output = pipe("This is a cool example!", do_sample=True, top_p=0.95) ``` @@ -284,6 +283,8 @@ You can also pass 8-bit loaded models if you install `bitsandbytes` and add the import torch from transformers import pipeline -pipe = pipeline(model="bigscience/bloom", device_map="auto", model_kwargs={"load_in_8bit":True}) +pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True}) output = pipe("This is a cool example!", do_sample=True, top_p=0.95) -``` \ No newline at end of file +``` + +Note that you can replace the checkpoint with any of the Hugging Face model that supports large model loading such as BLOOM! \ No newline at end of file From 6bea43240e1961e6ad1d0391af6d69b574d994e0 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 18:05:54 +0000 Subject: [PATCH 12/15] revert changes --- src/transformers/pipelines/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index ae3d88229c2c..80248173487c 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1057,8 +1057,8 @@ def __call__(self, inputs, *args, num_workers=None, batch_size=None, **kwargs): self.call_count += 1 if self.call_count > 10 and self.framework == "pt" and self.device.type == "cuda": warnings.warn( - "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please" - " use a dataset", + "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a" + " dataset", UserWarning, ) From e57d8f81feba281b2587681c0e5dc627c0f19623 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 18:37:29 +0000 Subject: [PATCH 13/15] proposal --- src/transformers/pipelines/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 80248173487c..c8870b44f87f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -766,6 +766,8 @@ def __init__( self.framework = framework if self.framework == "pt" and device is not None: + if isinstance(device, int) and device == -1: + device = "cpu" self.model = self.model.to(device=device) if device is None: From 23f0608c2a12a3b62010375ab801f6aede50c6c7 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 9 Feb 2023 18:56:05 +0000 Subject: [PATCH 14/15] more elegant solution --- src/transformers/pipelines/automatic_speech_recognition.py | 6 +++--- src/transformers/pipelines/base.py | 2 -- src/transformers/pipelines/question_answering.py | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index 8c552cbdc307..ade6a52b5ff0 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -287,9 +287,9 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): installed. If no framework is specified, will default to the one currently installed. If no framework is specified and both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is provided. - device (`int`, *optional*, defaults to -1): - Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, a positive will run the model on - the associated CUDA device id. + device (Union[`int`, `torch.device`], *optional*, defaults to `None`): + Device ordinal for CPU/GPU supports. Setting this to `None` will leverage CPU, a positive will run the + model on the associated CUDA device id. decoder (`pyctcdecode.BeamSearchDecoderCTC`, *optional*): [PyCTCDecode's BeamSearchDecoderCTC](https://github.com/kensho-technologies/pyctcdecode/blob/2fd33dc37c4111417e08d89ccd23d28e9b308d19/pyctcdecode/decoder.py#L180) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index c8870b44f87f..80248173487c 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -766,8 +766,6 @@ def __init__( self.framework = framework if self.framework == "pt" and device is not None: - if isinstance(device, int) and device == -1: - device = "cpu" self.model = self.model.to(device=device) if device is None: diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index d4bb7f210290..fad64d71ff71 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -255,7 +255,6 @@ def __init__( tokenizer: PreTrainedTokenizer, modelcard: Optional[ModelCard] = None, framework: Optional[str] = None, - device: int = -1, task: str = "", **kwargs, ): @@ -264,7 +263,6 @@ def __init__( tokenizer=tokenizer, modelcard=modelcard, framework=framework, - device=device, task=task, **kwargs, ) From e80be11d51f5b2deac6cf6e6f2476c3a8f930d3a Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Thu, 9 Feb 2023 20:02:47 +0100 Subject: [PATCH 15/15] Update src/transformers/pipelines/automatic_speech_recognition.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/pipelines/automatic_speech_recognition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index ade6a52b5ff0..5075fa6c56e6 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -287,7 +287,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): installed. If no framework is specified, will default to the one currently installed. If no framework is specified and both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is provided. - device (Union[`int`, `torch.device`], *optional*, defaults to `None`): + device (Union[`int`, `torch.device`], *optional*): Device ordinal for CPU/GPU supports. Setting this to `None` will leverage CPU, a positive will run the model on the associated CUDA device id. decoder (`pyctcdecode.BeamSearchDecoderCTC`, *optional*):