From 97c5916925f30a6218ebfbce36a79813f0d85804 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Fri, 7 Jun 2024 10:36:49 -0500 Subject: [PATCH 1/9] do not override config deprefix_prompt Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index e2b94f6f..eaea294d 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -87,7 +87,8 @@ def __init__( do_sample=self.do_sample, device=self.device, ) - self.deprefix_prompt = self.name in models_to_deprefix + if not hasattr(self, "deprefix_prompt"): + self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: if _config.run.deprefix is True: self.deprefix_prompt = True @@ -173,7 +174,8 @@ def __init__( device=self.device, use_fp8=use_fp8, ) - self.deprefix_prompt = name in models_to_deprefix + if not hasattr(self, "deprefix_prompt"): + self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: if _config.run.deprefix is True: self.deprefix_prompt = True @@ -219,7 +221,8 @@ def __init__( device=self.device, ) self.conversation = Conversation() - self.deprefix_prompt = self.name in models_to_deprefix + if not hasattr(self, "deprefix_prompt"): + self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: if _config.run.deprefix is True: self.deprefix_prompt = True @@ -494,7 +497,8 @@ def __init__( config=self.config, ).to(self.init_device) - self.deprefix_prompt = self.name in models_to_deprefix + if not hasattr(self, "deprefix_prompt"): + self.deprefix_prompt = self.name in models_to_deprefix if self.config.tokenizer_class: self.tokenizer = transformers.AutoTokenizer.from_pretrained( From 62a2269d9001799f66d3da5e6c9eccb975f0088f Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Thu, 6 Jun 2024 11:27:07 -0500 Subject: [PATCH 2/9] improve code reuse * consolidate `__init__` where possible * shift generator or model object creation to `_load_client()` Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 65 ++++++++++++++------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index eaea294d..843bf836 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -64,12 +64,17 @@ def __init__( self.generations = generations self.do_sample = do_sample self.device = device - self._load_config(config_root) super().__init__( self.name, generations=self.generations, config_root=config_root ) + self._load_client() + + def _load_client(self): + if hasattr(self, "generator") and self.generator is not None: + return + from transformers import pipeline, set_seed if _config.run.seed is not None: @@ -95,9 +100,13 @@ def __init__( self._set_hf_context_len(self.generator.model.config) + def _clear_client(): + self.generator = None + def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: + self._load_client() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) try: @@ -136,18 +145,9 @@ class OptimumPipeline(Pipeline, HFCompatible): supports_multiple_generations = True doc_uri = "https://huggingface.co/blog/optimum-nvidia" - def __init__( - self, name="", do_sample=True, generations=10, device=0, config_root=_config - ): - self.name = name - - super().__init__( - self.name, - do_sample=do_sample, - generations=generations, - device=device, - config_root=config_root, - ) + def _load_client(self): + if hasattr(self, "generator") and self.generator is not None: + return from optimum.nvidia.pipelines import pipeline from transformers import set_seed @@ -183,23 +183,15 @@ def __init__( self._set_hf_context_len(self.generator.model.config) -class ConversationalPipeline(Generator, HFCompatible): +class ConversationalPipeline(Pipeline, HFCompatible): """Conversational text generation using HuggingFace pipelines""" generator_family_name = "Hugging Face 🤗 pipeline for conversations" supports_multiple_generations = True - def __init__( - self, name="", do_sample=True, generations=10, device=0, config_root=_config - ): - self.name = name - self.do_sample = do_sample - self.generations = generations - self.device = device - - super().__init__( - self.name, generations=self.generations, config_root=config_root - ) + def _load_client(self): + if hasattr(self, "generator") and self.generator is not None: + return from transformers import pipeline, set_seed, Conversation @@ -239,6 +231,7 @@ def _call_model( ) -> List[Union[str, None]]: """Take a conversation as a list of dictionaries and feed it to the model""" + self._load_client() # If conversation is provided as a list of dicts, create the conversation. # Otherwise, maintain state in Generator if isinstance(prompt, str): @@ -451,22 +444,15 @@ def _call_model( return [output] -class Model(Generator, HFCompatible): +class Model(Pipeline, HFCompatible): """Get text generations from a locally-run Hugging Face model""" generator_family_name = "Hugging Face 🤗 model" supports_multiple_generations = True - def __init__( - self, name="", do_sample=True, generations=10, device=0, config_root=_config - ): - self.name = name - self.device = device - self.generations = generations - - super().__init__( - self.name, generations=self.generations, config_root=config_root - ) + def _load_client(self): + if hasattr(self, "model") and self.model is not None: + return import transformers @@ -509,16 +495,21 @@ def __init__( self.name, padding_side="left" ) - self.do_sample = do_sample self.generation_config = transformers.GenerationConfig.from_pretrained( self.name ) self.generation_config.eos_token_id = self.model.config.eos_token_id self.generation_config.pad_token_id = self.model.config.eos_token_id + def _clear_client(self): + self.model = None + self.config = None + self.generation_config = None + def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: + self._load_client() self.generation_config.max_new_tokens = self.max_tokens self.generation_config.do_sample = self.do_sample self.generation_config.num_return_sequences = generations_this_call From 11ebb73dcb020ba7e42e788972bc9c798e9eae33 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Fri, 7 Jun 2024 15:45:37 -0500 Subject: [PATCH 3/9] crude implmentation of limitation on parallel generator call Signed-off-by: Jeffrey Martin --- garak/generators/base.py | 1 + garak/generators/huggingface.py | 7 ++++--- garak/probes/base.py | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/garak/generators/base.py b/garak/generators/base.py index b57c52d1..4ec02a45 100644 --- a/garak/generators/base.py +++ b/garak/generators/base.py @@ -27,6 +27,7 @@ class Generator(Configurable): active = True generator_family_name = None + parallel_capable = True # support mainstream any-to-any large models # legal element for str list `modality['in']`: 'text', 'image', 'audio', 'video', '3d' diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 843bf836..5047bbec 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -56,6 +56,7 @@ class Pipeline(Generator, HFCompatible): generator_family_name = "Hugging Face 🤗 pipeline" supports_multiple_generations = True + parallel_capable = False def __init__( self, name="", do_sample=True, generations=10, device=0, config_root=_config @@ -98,9 +99,9 @@ def _load_client(self): if _config.run.deprefix is True: self.deprefix_prompt = True - self._set_hf_context_len(self.generator.model.config) + self._set_hf_context_len(self.generator.model.config) - def _clear_client(): + def _clear_client(self): self.generator = None def _call_model( @@ -237,7 +238,7 @@ def _call_model( if isinstance(prompt, str): self.conversation.add_message({"role": "user", "content": prompt}) self.conversation = self.generator(self.conversation) - generations = [self.conversation[-1]["content"]] + generations = [self.conversation[-1]["content"]] # what is this doing? elif isinstance(prompt, list): from transformers import Conversation diff --git a/garak/probes/base.py b/garak/probes/base.py index af0aa4c3..bc15a0bd 100644 --- a/garak/probes/base.py +++ b/garak/probes/base.py @@ -150,6 +150,7 @@ def _execute_attempt(self, this_attempt): return copy.deepcopy(this_attempt) def _execute_all(self, attempts) -> Iterable[garak.attempt.Attempt]: + """handles sending a set of attempt to the generator""" attempts_completed: Iterable[garak.attempt.Attempt] = [] if ( @@ -157,6 +158,7 @@ def _execute_all(self, attempts) -> Iterable[garak.attempt.Attempt]: and _config.system.parallel_attempts > 1 and self.parallelisable_attempts and len(attempts) > 1 + and self.generator.parallel_capable ): from multiprocessing import Pool From fd06da18e5d08bb1244224fab2f455a20292e3b0 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 10 Jun 2024 16:00:16 -0500 Subject: [PATCH 4/9] add torch `mps` support & enabled passed pipeline params * detect cuda vs mps vs cpu in a common way * guard import of OptimimPipeline Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 91 +++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 33 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 5047bbec..acd3be00 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -14,6 +14,7 @@ https://huggingface.co/docs/api-inference/quicktour """ +import inspect import logging import re from typing import List, Union @@ -25,7 +26,7 @@ from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration from garak import _config -from garak.exception import ModelNameMissingError +from garak.exception import ModelNameMissingError, GarakException from garak.generators.base import Generator @@ -70,6 +71,10 @@ def __init__( self.name, generations=self.generations, config_root=config_root ) + import torch.multiprocessing as mp + + mp.set_start_method("spawn", force=True) + self._load_client() def _load_client(self): @@ -83,16 +88,17 @@ def _load_client(self): import torch.cuda - if not torch.cuda.is_available(): - logging.debug("Using CPU, torch.cuda.is_available() returned False") - self.device = -1 - - self.generator = pipeline( - "text-generation", - model=self.name, - do_sample=self.do_sample, - device=self.device, + # consider how this could be abstracted well + self.device = ( + "cuda:" + str(self.device) + if torch.cuda.is_available() + else "mps" if torch.backends.mps.is_available() else "cpu" ) + + logging.debug("Using %s, based on torch environment evaluation", self.device) + + pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + self.generator = pipeline("text-generation", **pipline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: @@ -104,6 +110,17 @@ def _load_client(self): def _clear_client(self): self.generator = None + def _gather_pipeline_params(self, pipeline): + # this may be a bit too naive as it will pass any parameter valid for the pipeline signature + args = {} + for k in inspect.signature(pipeline).parameters: + if k == "model": + # special case of known mapping as `model` may be reserved for the class + args[k] = self.name + if hasattr(self, k): + args[k] = getattr(self, k) + return args + def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: @@ -150,8 +167,14 @@ def _load_client(self): if hasattr(self, "generator") and self.generator is not None: return - from optimum.nvidia.pipelines import pipeline - from transformers import set_seed + try: + from optimum.nvidia.pipelines import pipeline + from transformers import set_seed + except Exception as e: + logging.exception(e) + raise GarakException( + f"Missing required dependencies for {self.__class__.__name__}" + ) if _config.run.seed is not None: set_seed(_config.run.seed) @@ -161,20 +184,15 @@ def _load_client(self): if not torch.cuda.is_available(): message = "OptimumPipeline needs CUDA, but torch.cuda.is_available() returned False; quitting" logging.critical(message) - raise ValueError(message) + raise GarakException(message) - use_fp8 = False + self.use_fp8 = False if _config.loaded: if "use_fp8" in _config.plugins.generators.OptimumPipeline: - use_fp8 = True - - self.generator = pipeline( - "text-generation", - model=self.name, - do_sample=self.do_sample, - device=self.device, - use_fp8=use_fp8, - ) + self.use_fp8 = True + + pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + self.generator = pipeline("text-generation", **pipline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix if _config.loaded: @@ -201,18 +219,19 @@ def _load_client(self): import torch.cuda - if not torch.cuda.is_available(): - logging.debug("Using CPU, torch.cuda.is_available() returned False") - self.device = -1 + # consider how this could be abstracted well + self.device = ( + "cuda:" + str(self.device) + if torch.cuda.is_available() + else "mps" if torch.backends.mps.is_available() else "cpu" + ) + + logging.debug("Using %s, based on torch environment evaluation", self.device) # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute # directly from self.generator instead of from the ConversationalPipeline object itself. - self.generator = pipeline( - "conversational", - model=self.name, - do_sample=self.do_sample, - device=self.device, - ) + pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + self.generator = pipeline("conversational", **pipline_kwargs) self.conversation = Conversation() if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -460,9 +479,15 @@ def _load_client(self): if _config.run.seed is not None: transformers.set_seed(_config.run.seed) - self.init_device = "cuda:" + str(self.device) import torch.cuda + # consider how this could be abstracted well + self.init_device = ( + "cuda:" + str(self.device) + if torch.cuda.is_available() + else "mps" if torch.backends.mps.is_available() else "cpu" + ) + if not torch.cuda.is_available(): logging.debug("Using CPU, torch.cuda.is_available() returned False") self.device = -1 From 62f91a9c1211ee898a1ff77b6c90390b7bbfd1f1 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 17 Jun 2024 09:37:54 -0500 Subject: [PATCH 5/9] enable hf model or pipeline config in `hf_args` * support all generic `pipeline` args at all times * adds `do_sample` when `model` is a parameter to the `Callable` * adds `low_cpu_mem_usage` and all `pipeline` for `Callables` without `model` * consolidates optimal device selection & set when not provided by config Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 213 ++++++++++++++++----------- tests/generators/test_huggingface.py | 16 +- 2 files changed, 139 insertions(+), 90 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index acd3be00..f0972920 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -16,8 +16,9 @@ import inspect import logging +import os import re -from typing import List, Union +from typing import Callable, List, Union import warnings import backoff @@ -33,15 +34,15 @@ models_to_deprefix = ["gpt2"] -class HFRateLimitException(Exception): +class HFRateLimitException(GarakException): pass -class HFLoadingException(Exception): +class HFLoadingException(GarakException): pass -class HFInternalServerError(Exception): +class HFInternalServerError(GarakException): pass @@ -51,30 +52,106 @@ def _set_hf_context_len(self, config): if isinstance(config.n_ctx, int): self.context_len = config.n_ctx + def _gather_hf_params(self, hf_constructor: Callable): + # this may be a bit too naive as it will pass any parameter valid for the pipeline signature + # this falls over when passed `from_pretrained` methods as the callable model params are not explicit + params = self.hf_args + if params["device"] is None: + params["device"] = self.device + + args = {} + + parameters = inspect.signature(hf_constructor).parameters + + if "model" in parameters: + args["model"] = self.name + # expand for + parameters = {"do_sample": True} | parameters + else: + # callable is for a Pretrained class also map standard `pipeline` params + from transformers import pipeline + + parameters = ( + {"low_cpu_mem_usage": True} + | parameters + | inspect.signature(pipeline).parameters + ) + + for k in parameters: + if k == "model": + continue # special case `model` comes from `name` in the generator + if k in params: + val = params[k] + if k == "torch_dtype" and hasattr(torch, val): + args[k] = getattr( + torch, val + ) # some model type specific classes do not yet support direct string representation + continue + if ( + k == "device" + and "device_map" in parameters + and "device_map" in params + ): + # per transformers convention hold `device_map` before `device` + continue + args[k] = params[k] + + return args + + def _select_hf_device(self): + """Determine the most efficient device for tensor load, hold any existing `device` already selected""" + import torch.cuda + + selected_device = None + if self.hf_args["device"] is not None: + if isinstance(self.hf_args["device"], int): + # this assumes that indexed only devices selections means `cuda` + selected_device = torch.device("cuda:" + str(self.hf_args["device"])) + else: + selected_device = torch.device(self.hf_args["device"]) + + if selected_device is None: + selected_device = torch.device( + "cuda" + if torch.cuda.is_available() + else "mps" if torch.backends.mps.is_available() else "cpu" + ) + + if isinstance(selected_device, torch.device) and selected_device.type == "mps": + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + logging.debug("Enabled MPS fallback environment variable") + + logging.debug( + "Using %s, based on torch environment evaluation", selected_device + ) + return selected_device + class Pipeline(Generator, HFCompatible): """Get text generations from a locally-run Hugging Face pipeline""" + DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { + "generations": 10, + "hf_args": { + "torch_dtype": "float16", + "do_sample": True, + "device": None, + }, + } generator_family_name = "Hugging Face 🤗 pipeline" supports_multiple_generations = True parallel_capable = False - def __init__( - self, name="", do_sample=True, generations=10, device=0, config_root=_config - ): + def __init__(self, name="", config_root=_config): self.name = name - self.generations = generations - self.do_sample = do_sample - self.device = device - super().__init__( - self.name, generations=self.generations, config_root=config_root - ) + super().__init__(self.name, config_root=config_root) import torch.multiprocessing as mp mp.set_start_method("spawn", force=True) + self.device = self._select_hf_device() self._load_client() def _load_client(self): @@ -86,18 +163,7 @@ def _load_client(self): if _config.run.seed is not None: set_seed(_config.run.seed) - import torch.cuda - - # consider how this could be abstracted well - self.device = ( - "cuda:" + str(self.device) - if torch.cuda.is_available() - else "mps" if torch.backends.mps.is_available() else "cpu" - ) - - logging.debug("Using %s, based on torch environment evaluation", self.device) - - pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("text-generation", **pipline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -110,17 +176,6 @@ def _load_client(self): def _clear_client(self): self.generator = None - def _gather_pipeline_params(self, pipeline): - # this may be a bit too naive as it will pass any parameter valid for the pipeline signature - args = {} - for k in inspect.signature(pipeline).parameters: - if k == "model": - # special case of known mapping as `model` may be reserved for the class - args[k] = self.name - if hasattr(self, k): - args[k] = getattr(self, k) - return args - def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: @@ -191,7 +246,7 @@ def _load_client(self): if "use_fp8" in _config.plugins.generators.OptimumPipeline: self.use_fp8 = True - pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("text-generation", **pipline_kwargs) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -219,18 +274,9 @@ def _load_client(self): import torch.cuda - # consider how this could be abstracted well - self.device = ( - "cuda:" + str(self.device) - if torch.cuda.is_available() - else "mps" if torch.backends.mps.is_available() else "cpu" - ) - - logging.debug("Using %s, based on torch environment evaluation", self.device) - # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute # directly from self.generator instead of from the ConversationalPipeline object itself. - pipline_kwargs = self._gather_pipeline_params(pipeline=pipeline) + pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) self.generator = pipeline("conversational", **pipline_kwargs) self.conversation = Conversation() if not hasattr(self, "deprefix_prompt"): @@ -278,7 +324,7 @@ def _call_model( return [re.sub("^" + re.escape(prompt), "", _o) for _o in outputs] -class InferenceAPI(Generator, HFCompatible): +class InferenceAPI(Generator): """Get text generations from Hugging Face Inference API""" generator_family_name = "Hugging Face 🤗 Inference API" @@ -407,7 +453,7 @@ def _pre_generate_hook(self): self.wait_for_model = False -class InferenceEndpoint(InferenceAPI, HFCompatible): +class InferenceEndpoint(InferenceAPI): """Interface for Hugging Face private endpoints Pass the model URL as the name, e.g. https://xxx.aws.endpoints.huggingface.cloud """ @@ -479,35 +525,22 @@ def _load_client(self): if _config.run.seed is not None: transformers.set_seed(_config.run.seed) - import torch.cuda - - # consider how this could be abstracted well - self.init_device = ( - "cuda:" + str(self.device) - if torch.cuda.is_available() - else "mps" if torch.backends.mps.is_available() else "cpu" - ) - - if not torch.cuda.is_available(): - logging.debug("Using CPU, torch.cuda.is_available() returned False") - self.device = -1 - self.init_device = "cpu" - trust_remote_code = self.name.startswith("mosaicml/mpt-") + model_kwargs = self._gather_hf_params( + hf_constructor=transformers.AutoConfig.from_pretrained + ) # will defer to device_map if device map was `auto` may not match self.device + self.config = transformers.AutoConfig.from_pretrained( - self.name, trust_remote_code=trust_remote_code - ) - self.config.init_device = ( - self.init_device # or "cuda:0" For fast initialization directly on GPU! + self.name, trust_remote_code=trust_remote_code, **model_kwargs ) self._set_hf_context_len(self.config) + self.config.init_device = self.device # determined by Pipeline `__init__`` self.model = transformers.AutoModelForCausalLM.from_pretrained( - self.name, - config=self.config, - ).to(self.init_device) + self.name, config=self.config + ).to(self.device) if not hasattr(self, "deprefix_prompt"): self.deprefix_prompt = self.name in models_to_deprefix @@ -537,7 +570,7 @@ def _call_model( ) -> List[Union[str, None]]: self._load_client() self.generation_config.max_new_tokens = self.max_tokens - self.generation_config.do_sample = self.do_sample + self.generation_config.do_sample = self.hf_args["do_sample"] self.generation_config.num_return_sequences = generations_this_call if self.temperature is not None: self.generation_config.temperature = self.temperature @@ -550,7 +583,7 @@ def _call_model( with torch.no_grad(): inputs = self.tokenizer( prompt, truncation=True, return_tensors="pt" - ).to(self.init_device) + ).to(self.device) try: outputs = self.model.generate( @@ -574,21 +607,23 @@ def _call_model( return [re.sub("^" + re.escape(prompt), "", i) for i in text_output] -class LLaVA(Generator): +class LLaVA(Generator, HFCompatible): """Get LLaVA ([ text + image ] -> text) generations""" DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { + "max_tokens": 4000, # "exist_tokens + max_new_tokens < 4K is the golden rule." # https://github.com/haotian-liu/LLaVA/issues/1095#:~:text=Conceptually%2C%20as%20long%20as%20the%20total%20tokens%20are%20within%204K%2C%20it%20would%20be%20fine%2C%20so%20exist_tokens%20%2B%20max_new_tokens%20%3C%204K%20is%20the%20golden%20rule. - "max_tokens": 4000, - # consider shifting below to kwargs or llava_kwargs that is a dict to allow more customization - "torch_dtype": torch.float16, - "low_cpu_mem_usage": True, - "device_map": "cuda:0", + "hf_args": { + "torch_dtype": "float16", + "low_cpu_mem_usage": True, + "device_map": "auto", + }, } # rewrite modality setting modality = {"in": {"text", "image"}, "out": {"text"}} + parallel_capable = False # Support Image-Text-to-Text models # https://huggingface.co/llava-hf#:~:text=Llava-,Models,-9 @@ -603,20 +638,20 @@ def __init__(self, name="", generations=10, config_root=_config): super().__init__(name, generations=generations, config_root=config_root) if self.name not in self.supported_models: raise ModelNameMissingError( - f"Invalid modal name {self.name}, current support: {self.supported_models}." + f"Invalid model name {self.name}, current support: {self.supported_models}." ) + + self.device = self._select_hf_device() + model_kwargs = self._gather_hf_params( + hf_constructor=LlavaNextForConditionalGeneration.from_pretrained + ) # will defer to device_map if device map was `auto` may not match self.device + self.processor = LlavaNextProcessor.from_pretrained(self.name) self.model = LlavaNextForConditionalGeneration.from_pretrained( - self.name, - torch_dtype=self.torch_dtype, - low_cpu_mem_usage=self.low_cpu_mem_usage, + self.name, **model_kwargs ) - if torch.cuda.is_available(): - self.model.to(self.device_map) - else: - raise RuntimeError( - "CUDA is not supported on this device. Please make sure CUDA is installed and configured properly." - ) + + self.model.to(self.device) def generate( self, prompt: str, generations_this_call: int = 1 @@ -630,7 +665,7 @@ def generate( raise Exception(e) inputs = self.processor(text_prompt, image_prompt, return_tensors="pt").to( - self.device_map + self.device ) exist_token_number: int = inputs.data["input_ids"].shape[1] output = self.model.generate( diff --git a/tests/generators/test_huggingface.py b/tests/generators/test_huggingface.py index f3381c94..d42c6c2a 100644 --- a/tests/generators/test_huggingface.py +++ b/tests/generators/test_huggingface.py @@ -1,11 +1,25 @@ import transformers import garak.generators.huggingface +from garak._config import GarakSubConfig DEFAULT_GENERATIONS_QTY = 10 def test_pipeline(): - g = garak.generators.huggingface.Pipeline("gpt2") + gen_config = { + "huggingface": { + "Pipeline": { + "name": "gpt2", + "hf_args": { + "device": "cpu", + }, + } + } + } + config_root = GarakSubConfig() + setattr(config_root, "generators", gen_config) + + g = garak.generators.huggingface.Pipeline("gpt2", config_root=config_root) assert g.name == "gpt2" assert g.generations == DEFAULT_GENERATIONS_QTY assert isinstance(g.generator, transformers.pipelines.text_generation.Pipeline) From 1fca1196dd69f11dd5dd909e988d2084e5f80d7e Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 17 Jun 2024 13:52:15 -0500 Subject: [PATCH 6/9] amend yaml config example --- tests/test_config.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 8d33cef1..48aac522 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -19,23 +19,27 @@ plugins: generators: huggingface: - dtype: general - gpu: 0 + hf_args: + torch_dtype: float16 Pipeline: - dtype: bfloat16 + hf_args: + device: cuda probes: test: generators: huggingface: Pipeline: - dtype: for_probe + hf_args: + torch_dtype: float16 detector: test: val: tests Blank: generators: huggingface: - gpu: 1 + hf_args: + torch_dtype: float16 + device: cuda:1 Pipeline: dtype: for_detector buffs: @@ -43,7 +47,8 @@ Blank: generators: huggingface: - gpu: 1 + hf_args: + device: cuda:0 Pipeline: dtype: for_detector """.encode( From f4d77b65968ad43466259a69c2b16227c236afec Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Tue, 18 Jun 2024 09:45:53 -0500 Subject: [PATCH 7/9] support merged dictionary in `Configurable` Signed-off-by: Jeffrey Martin --- garak/configurable.py | 5 +++++ tests/test_configurable.py | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/garak/configurable.py b/garak/configurable.py index 7ad768a7..efb9f566 100644 --- a/garak/configurable.py +++ b/garak/configurable.py @@ -88,6 +88,8 @@ def _apply_config(self, config): ) ): continue + if isinstance(v, dict): # if value is an existing dictionary merge + v = getattr(self, k) | v setattr(self, k, v) # This will set attribute to the full dictionary value def _apply_missing_instance_defaults(self): @@ -96,6 +98,9 @@ def _apply_missing_instance_defaults(self): for k, v in self.DEFAULT_PARAMS.items(): if not hasattr(self, k): setattr(self, k, v) + elif isinstance(v, dict): + v = v | getattr(self, k) + setattr(self, k, v) def _validate_env_var(self): if hasattr(self, "key_env_var"): diff --git a/tests/test_configurable.py b/tests/test_configurable.py index 4979beb1..7847f65b 100644 --- a/tests/test_configurable.py +++ b/tests/test_configurable.py @@ -24,7 +24,13 @@ class mockConfigurable(Configurable): # Configurable is coupled to hierarchy of plugin types __module__ = "garak.generators.mock" - DEFAULT_PARAMS = {"class_var": "from_class"} + DEFAULT_PARAMS = { + "class_var": "from_class", + "class_dict_var": { + "dict_a": "dict_val", + "dict_b": "dict_val", + }, + } def __init__( self, @@ -63,6 +69,17 @@ def test_param_provided(generator_sub_config): def test_class_vars_propagate_to_instance(generator_sub_config): m = mockConfigurable(config_root=generator_sub_config) assert m.class_var == m.DEFAULT_PARAMS["class_var"] + assert m.class_dict_var == m.DEFAULT_PARAMS["class_dict_var"] + + +# when a default parameter dictionary is provided merge on the resulting object +def test_class_dict_merge_to_instance(generator_sub_config): + config_dict_var = {"dict_a": "test_val", "dict_c": "test_val"} + generator_sub_config.generators["mock"]["class_dict_var"] = config_dict_var + m = mockConfigurable(config_root=generator_sub_config) + assert m.class_dict_var == m.DEFAULT_PARAMS["class_dict_var"] | config_dict_var + assert m.class_dict_var["dict_a"] == config_dict_var["dict_a"] + assert m.class_dict_var["dict_c"] == config_dict_var["dict_c"] # when a default parameter is provided and not config_root set on the resulting object From 9f19c30c75611e0587447b39596ad3cebc6f243a Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Tue, 18 Jun 2024 10:56:09 -0500 Subject: [PATCH 8/9] free tokenizer in _clear_client Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index f0972920..7fb584a1 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -563,6 +563,7 @@ def _load_client(self): def _clear_client(self): self.model = None self.config = None + self.tokenizer = None self.generation_config = None def _call_model( From 7b1382976d2f4e71c635230cb30a7db4c7cc8075 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Wed, 19 Jun 2024 09:12:43 -0500 Subject: [PATCH 9/9] explicit device support * raise error when passed negative device integer * rename parameter tracking var * remove unused import * add tests for `_select_hf_device()` Signed-off-by: Jeffrey Martin --- garak/generators/huggingface.py | 22 ++++++++++++---------- tests/generators/test_huggingface.py | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 7fb584a1..2da3f9e9 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -61,23 +61,23 @@ def _gather_hf_params(self, hf_constructor: Callable): args = {} - parameters = inspect.signature(hf_constructor).parameters + params_to_process = inspect.signature(hf_constructor).parameters - if "model" in parameters: + if "model" in params_to_process: args["model"] = self.name # expand for - parameters = {"do_sample": True} | parameters + params_to_process = {"do_sample": True} | params_to_process else: # callable is for a Pretrained class also map standard `pipeline` params from transformers import pipeline - parameters = ( + params_to_process = ( {"low_cpu_mem_usage": True} - | parameters + | params_to_process | inspect.signature(pipeline).parameters ) - for k in parameters: + for k in params_to_process: if k == "model": continue # special case `model` comes from `name` in the generator if k in params: @@ -89,7 +89,7 @@ def _gather_hf_params(self, hf_constructor: Callable): continue if ( k == "device" - and "device_map" in parameters + and "device_map" in params_to_process and "device_map" in params ): # per transformers convention hold `device_map` before `device` @@ -103,9 +103,13 @@ def _select_hf_device(self): import torch.cuda selected_device = None - if self.hf_args["device"] is not None: + if self.hf_args.get("device", None) is not None: if isinstance(self.hf_args["device"], int): # this assumes that indexed only devices selections means `cuda` + if self.hf_args["device"] < 0: + msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU." + logging.critical(msg) + raise ValueError(msg) selected_device = torch.device("cuda:" + str(self.hf_args["device"])) else: selected_device = torch.device(self.hf_args["device"]) @@ -272,8 +276,6 @@ def _load_client(self): if _config.run.seed is not None: set_seed(_config.run.seed) - import torch.cuda - # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute # directly from self.generator instead of from the ConversationalPipeline object itself. pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline) diff --git a/tests/generators/test_huggingface.py b/tests/generators/test_huggingface.py index d42c6c2a..6f6d19ec 100644 --- a/tests/generators/test_huggingface.py +++ b/tests/generators/test_huggingface.py @@ -1,3 +1,4 @@ +import pytest import transformers import garak.generators.huggingface from garak._config import GarakSubConfig @@ -68,3 +69,26 @@ def test_model(): assert len(output) == DEFAULT_GENERATIONS_QTY for item in output: assert item is None # gpt2 is known raise exception returning `None` + + +def test_select_hf_device(): + from garak.generators.huggingface import HFCompatible + import torch + + class mockHF(HFCompatible): + def __init__(self, key, value): + self.hf_args = {key: value} + pass + + m = mockHF("device", -1) + with pytest.raises(ValueError) as exc_info: + device = m._select_hf_device() + assert "CUDA device numbering starts" in str(exc_info.value) + + m = mockHF("device", "cpu") + device = m._select_hf_device() + assert device == torch.device("cpu") + + m = mockHF("device_map", "auto") + device = m._select_hf_device() + assert isinstance(device, torch.device)