From 21dd34301eaef3378911223f74356cd2ce09c477 Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Thu, 27 Jun 2024 08:22:15 -0500
Subject: [PATCH] Refactor `huggingface` config support (#742)

* do not override config deprefix_prompt

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* improve code reuse

* consolidate `__init__` where possible
* shift generator or model object creation to `_load_client()`

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* crude implmentation of limitation on parallel generator call

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* add torch `mps` support & enabled passed pipeline params

* detect cuda vs mps vs cpu in a common way
* guard import of OptimimPipeline

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* enable hf model or pipeline config in `hf_args`

* support all generic `pipeline` args at all times
* adds `do_sample` when `model` is a parameter to the `Callable`
* adds `low_cpu_mem_usage` and all `pipeline` for `Callables` without `model`
* consolidates optimal device selection & set when not provided by config

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* amend yaml config example

* support merged dictionary in `Configurable`

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* free tokenizer in _clear_client

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

* explicit device support

* raise error when passed negative device integer
* rename parameter tracking var
* remove unused import
* add tests for `_select_hf_device()`

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>

---------

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>
---
 garak/configurable.py                |   5 +
 garak/generators/base.py             |   1 +
 garak/generators/huggingface.py      | 319 ++++++++++++++++-----------
 garak/probes/base.py                 |   2 +
 tests/generators/test_huggingface.py |  40 +++-
 tests/test_config.py                 |  17 +-
 tests/test_configurable.py           |  19 +-
 7 files changed, 265 insertions(+), 138 deletions(-)

diff --git a/garak/configurable.py b/garak/configurable.py
index 7ad768a7..efb9f566 100644
--- a/garak/configurable.py
+++ b/garak/configurable.py
@@ -88,6 +88,8 @@ def _apply_config(self, config):
                     )
                 ):
                     continue
+                if isinstance(v, dict):  # if value is an existing dictionary merge
+                    v = getattr(self, k) | v
             setattr(self, k, v)  # This will set attribute to the full dictionary value
 
     def _apply_missing_instance_defaults(self):
@@ -96,6 +98,9 @@ def _apply_missing_instance_defaults(self):
             for k, v in self.DEFAULT_PARAMS.items():
                 if not hasattr(self, k):
                     setattr(self, k, v)
+                elif isinstance(v, dict):
+                    v = v | getattr(self, k)
+                    setattr(self, k, v)
 
     def _validate_env_var(self):
         if hasattr(self, "key_env_var"):
diff --git a/garak/generators/base.py b/garak/generators/base.py
index b57c52d1..4ec02a45 100644
--- a/garak/generators/base.py
+++ b/garak/generators/base.py
@@ -27,6 +27,7 @@ class Generator(Configurable):
 
     active = True
     generator_family_name = None
+    parallel_capable = True
 
     # support mainstream any-to-any large models
     # legal element for str list `modality['in']`: 'text', 'image', 'audio', 'video', '3d'
diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py
index f7bffa27..15a8bfe0 100644
--- a/garak/generators/huggingface.py
+++ b/garak/generators/huggingface.py
@@ -14,9 +14,11 @@
  https://huggingface.co/docs/api-inference/quicktour
 """
 
+import inspect
 import logging
+import os
 import re
-from typing import List, Union
+from typing import Callable, List, Union
 import warnings
 
 import backoff
@@ -25,22 +27,22 @@
 from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
 
 from garak import _config
-from garak.exception import ModelNameMissingError
+from garak.exception import ModelNameMissingError, GarakException
 from garak.generators.base import Generator
 
 
 models_to_deprefix = ["gpt2"]
 
 
-class HFRateLimitException(Exception):
+class HFRateLimitException(GarakException):
     pass
 
 
-class HFLoadingException(Exception):
+class HFLoadingException(GarakException):
     pass
 
 
-class HFInternalServerError(Exception):
+class HFInternalServerError(GarakException):
     pass
 
 
@@ -50,53 +52,138 @@ def _set_hf_context_len(self, config):
             if isinstance(config.n_ctx, int):
                 self.context_len = config.n_ctx
 
+    def _gather_hf_params(self, hf_constructor: Callable):
+        # this may be a bit too naive as it will pass any parameter valid for the pipeline signature
+        # this falls over when passed `from_pretrained` methods as the callable model params are not explicit
+        params = self.hf_args
+        if params["device"] is None:
+            params["device"] = self.device
+
+        args = {}
+
+        params_to_process = inspect.signature(hf_constructor).parameters
+
+        if "model" in params_to_process:
+            args["model"] = self.name
+            # expand for
+            params_to_process = {"do_sample": True} | params_to_process
+        else:
+            # callable is for a Pretrained class also map standard `pipeline` params
+            from transformers import pipeline
+
+            params_to_process = (
+                {"low_cpu_mem_usage": True}
+                | params_to_process
+                | inspect.signature(pipeline).parameters
+            )
+
+        for k in params_to_process:
+            if k == "model":
+                continue  # special case `model` comes from `name` in the generator
+            if k in params:
+                val = params[k]
+                if k == "torch_dtype" and hasattr(torch, val):
+                    args[k] = getattr(
+                        torch, val
+                    )  # some model type specific classes do not yet support direct string representation
+                    continue
+                if (
+                    k == "device"
+                    and "device_map" in params_to_process
+                    and "device_map" in params
+                ):
+                    # per transformers convention hold `device_map` before `device`
+                    continue
+                args[k] = params[k]
+
+        return args
+
+    def _select_hf_device(self):
+        """Determine the most efficient device for tensor load, hold any existing `device` already selected"""
+        import torch.cuda
+
+        selected_device = None
+        if self.hf_args.get("device", None) is not None:
+            if isinstance(self.hf_args["device"], int):
+                # this assumes that indexed only devices selections means `cuda`
+                if self.hf_args["device"] < 0:
+                    msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU."
+                    logging.critical(msg)
+                    raise ValueError(msg)
+                selected_device = torch.device("cuda:" + str(self.hf_args["device"]))
+            else:
+                selected_device = torch.device(self.hf_args["device"])
+
+        if selected_device is None:
+            selected_device = torch.device(
+                "cuda"
+                if torch.cuda.is_available()
+                else "mps" if torch.backends.mps.is_available() else "cpu"
+            )
+
+        if isinstance(selected_device, torch.device) and selected_device.type == "mps":
+            os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+            logging.debug("Enabled MPS fallback environment variable")
+
+        logging.debug(
+            "Using %s, based on torch environment evaluation", selected_device
+        )
+        return selected_device
+
 
 class Pipeline(Generator, HFCompatible):
     """Get text generations from a locally-run Hugging Face pipeline"""
 
+    DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | {
+        "generations": 10,
+        "hf_args": {
+            "torch_dtype": "float16",
+            "do_sample": True,
+            "device": None,
+        },
+    }
     generator_family_name = "Hugging Face 🤗 pipeline"
     supports_multiple_generations = True
+    parallel_capable = False
 
-    def __init__(
-        self, name="", do_sample=True, generations=10, device=0, config_root=_config
-    ):
+    def __init__(self, name="", config_root=_config):
         self.name = name
-        self.generations = generations
-        self.do_sample = do_sample
-        self.device = device
-        self._load_config(config_root)
 
-        super().__init__(
-            self.name, generations=self.generations, config_root=config_root
-        )
+        super().__init__(self.name, config_root=config_root)
+
+        import torch.multiprocessing as mp
+
+        mp.set_start_method("spawn", force=True)
+
+        self.device = self._select_hf_device()
+        self._load_client()
+
+    def _load_client(self):
+        if hasattr(self, "generator") and self.generator is not None:
+            return
 
         from transformers import pipeline, set_seed
 
         if _config.run.seed is not None:
             set_seed(_config.run.seed)
 
-        import torch.cuda
-
-        if not torch.cuda.is_available():
-            logging.debug("Using CPU, torch.cuda.is_available() returned False")
-            self.device = -1
-
-        self.generator = pipeline(
-            "text-generation",
-            model=self.name,
-            do_sample=self.do_sample,
-            device=self.device,
-        )
-        self.deprefix_prompt = self.name in models_to_deprefix
+        pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline)
+        self.generator = pipeline("text-generation", **pipline_kwargs)
+        if not hasattr(self, "deprefix_prompt"):
+            self.deprefix_prompt = self.name in models_to_deprefix
         if _config.loaded:
             if _config.run.deprefix is True:
                 self.deprefix_prompt = True
 
-                self._set_hf_context_len(self.generator.model.config)
+        self._set_hf_context_len(self.generator.model.config)
+
+    def _clear_client(self):
+        self.generator = None
 
     def _call_model(
         self, prompt: str, generations_this_call: int = 1
     ) -> List[Union[str, None]]:
+        self._load_client()
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", category=UserWarning)
             try:
@@ -135,21 +222,18 @@ class OptimumPipeline(Pipeline, HFCompatible):
     supports_multiple_generations = True
     doc_uri = "https://huggingface.co/blog/optimum-nvidia"
 
-    def __init__(
-        self, name="", do_sample=True, generations=10, device=0, config_root=_config
-    ):
-        self.name = name
+    def _load_client(self):
+        if hasattr(self, "generator") and self.generator is not None:
+            return
 
-        super().__init__(
-            self.name,
-            do_sample=do_sample,
-            generations=generations,
-            device=device,
-            config_root=config_root,
-        )
-
-        from optimum.nvidia.pipelines import pipeline
-        from transformers import set_seed
+        try:
+            from optimum.nvidia.pipelines import pipeline
+            from transformers import set_seed
+        except Exception as e:
+            logging.exception(e)
+            raise GarakException(
+                f"Missing required dependencies for {self.__class__.__name__}"
+            )
 
         if _config.run.seed is not None:
             set_seed(_config.run.seed)
@@ -159,21 +243,17 @@ def __init__(
         if not torch.cuda.is_available():
             message = "OptimumPipeline needs CUDA, but torch.cuda.is_available() returned False; quitting"
             logging.critical(message)
-            raise ValueError(message)
+            raise GarakException(message)
 
-        use_fp8 = False
+        self.use_fp8 = False
         if _config.loaded:
             if "use_fp8" in _config.plugins.generators.OptimumPipeline:
-                use_fp8 = True
-
-        self.generator = pipeline(
-            "text-generation",
-            model=self.name,
-            do_sample=self.do_sample,
-            device=self.device,
-            use_fp8=use_fp8,
-        )
-        self.deprefix_prompt = name in models_to_deprefix
+                self.use_fp8 = True
+
+        pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline)
+        self.generator = pipeline("text-generation", **pipline_kwargs)
+        if not hasattr(self, "deprefix_prompt"):
+            self.deprefix_prompt = self.name in models_to_deprefix
         if _config.loaded:
             if _config.run.deprefix is True:
                 self.deprefix_prompt = True
@@ -181,45 +261,28 @@ def __init__(
         self._set_hf_context_len(self.generator.model.config)
 
 
-class ConversationalPipeline(Generator, HFCompatible):
+class ConversationalPipeline(Pipeline, HFCompatible):
     """Conversational text generation using HuggingFace pipelines"""
 
     generator_family_name = "Hugging Face 🤗 pipeline for conversations"
     supports_multiple_generations = True
 
-    def __init__(
-        self, name="", do_sample=True, generations=10, device=0, config_root=_config
-    ):
-        self.name = name
-        self.do_sample = do_sample
-        self.generations = generations
-        self.device = device
-
-        super().__init__(
-            self.name, generations=self.generations, config_root=config_root
-        )
+    def _load_client(self):
+        if hasattr(self, "generator") and self.generator is not None:
+            return
 
         from transformers import pipeline, set_seed, Conversation
 
         if _config.run.seed is not None:
             set_seed(_config.run.seed)
 
-        import torch.cuda
-
-        if not torch.cuda.is_available():
-            logging.debug("Using CPU, torch.cuda.is_available() returned False")
-            self.device = -1
-
         # Note that with pipeline, in order to access the tokenizer, model, or device, you must get the attribute
         # directly from self.generator instead of from the ConversationalPipeline object itself.
-        self.generator = pipeline(
-            "conversational",
-            model=self.name,
-            do_sample=self.do_sample,
-            device=self.device,
-        )
+        pipline_kwargs = self._gather_hf_params(hf_constructor=pipeline)
+        self.generator = pipeline("conversational", **pipline_kwargs)
         self.conversation = Conversation()
-        self.deprefix_prompt = self.name in models_to_deprefix
+        if not hasattr(self, "deprefix_prompt"):
+            self.deprefix_prompt = self.name in models_to_deprefix
         if _config.loaded:
             if _config.run.deprefix is True:
                 self.deprefix_prompt = True
@@ -236,12 +299,13 @@ def _call_model(
     ) -> List[Union[str, None]]:
         """Take a conversation as a list of dictionaries and feed it to the model"""
 
+        self._load_client()
         # If conversation is provided as a list of dicts, create the conversation.
         # Otherwise, maintain state in Generator
         if isinstance(prompt, str):
             self.conversation.add_message({"role": "user", "content": prompt})
             self.conversation = self.generator(self.conversation)
-            generations = [self.conversation[-1]["content"]]
+            generations = [self.conversation[-1]["content"]]  # what is this doing?
 
         elif isinstance(prompt, list):
             from transformers import Conversation
@@ -262,7 +326,7 @@ def _call_model(
             return [re.sub("^" + re.escape(prompt), "", _o) for _o in outputs]
 
 
-class InferenceAPI(Generator, HFCompatible):
+class InferenceAPI(Generator):
     """Get text generations from Hugging Face Inference API"""
 
     generator_family_name = "Hugging Face 🤗 Inference API"
@@ -391,7 +455,7 @@ def _pre_generate_hook(self):
         self.wait_for_model = False
 
 
-class InferenceEndpoint(InferenceAPI, HFCompatible):
+class InferenceEndpoint(InferenceAPI):
     """Interface for Hugging Face private endpoints
     Pass the model URL as the name, e.g. https://xxx.aws.endpoints.huggingface.cloud
     """
@@ -448,53 +512,40 @@ def _call_model(
         return [output]
 
 
-class Model(Generator, HFCompatible):
+class Model(Pipeline, HFCompatible):
     """Get text generations from a locally-run Hugging Face model"""
 
     generator_family_name = "Hugging Face 🤗 model"
     supports_multiple_generations = True
 
-    def __init__(
-        self, name="", do_sample=True, generations=10, device=0, config_root=_config
-    ):
-        self.name = name
-        self.device = device
-        self.generations = generations
-
-        super().__init__(
-            self.name, generations=self.generations, config_root=config_root
-        )
+    def _load_client(self):
+        if hasattr(self, "model") and self.model is not None:
+            return
 
         import transformers
 
         if _config.run.seed is not None:
             transformers.set_seed(_config.run.seed)
 
-        self.init_device = "cuda:" + str(self.device)
-        import torch.cuda
-
-        if not torch.cuda.is_available():
-            logging.debug("Using CPU, torch.cuda.is_available() returned False")
-            self.device = -1
-            self.init_device = "cpu"
-
         trust_remote_code = self.name.startswith("mosaicml/mpt-")
 
+        model_kwargs = self._gather_hf_params(
+            hf_constructor=transformers.AutoConfig.from_pretrained
+        )  # will defer to device_map if device map was `auto` may not match self.device
+
         self.config = transformers.AutoConfig.from_pretrained(
-            self.name, trust_remote_code=trust_remote_code
-        )
-        self.config.init_device = (
-            self.init_device  # or "cuda:0" For fast initialization directly on GPU!
+            self.name, trust_remote_code=trust_remote_code, **model_kwargs
         )
 
         self._set_hf_context_len(self.config)
+        self.config.init_device = self.device  # determined by Pipeline `__init__``
 
         self.model = transformers.AutoModelForCausalLM.from_pretrained(
-            self.name,
-            config=self.config,
-        ).to(self.init_device)
+            self.name, config=self.config
+        ).to(self.device)
 
-        self.deprefix_prompt = self.name in models_to_deprefix
+        if not hasattr(self, "deprefix_prompt"):
+            self.deprefix_prompt = self.name in models_to_deprefix
 
         if self.config.tokenizer_class:
             self.tokenizer = transformers.AutoTokenizer.from_pretrained(
@@ -505,18 +556,24 @@ def __init__(
                 self.name, padding_side="left"
             )
 
-        self.do_sample = do_sample
         self.generation_config = transformers.GenerationConfig.from_pretrained(
             self.name
         )
         self.generation_config.eos_token_id = self.model.config.eos_token_id
         self.generation_config.pad_token_id = self.model.config.eos_token_id
 
+    def _clear_client(self):
+        self.model = None
+        self.config = None
+        self.tokenizer = None
+        self.generation_config = None
+
     def _call_model(
         self, prompt: str, generations_this_call: int = 1
     ) -> List[Union[str, None]]:
+        self._load_client()
         self.generation_config.max_new_tokens = self.max_tokens
-        self.generation_config.do_sample = self.do_sample
+        self.generation_config.do_sample = self.hf_args["do_sample"]
         self.generation_config.num_return_sequences = generations_this_call
         if self.temperature is not None:
             self.generation_config.temperature = self.temperature
@@ -529,7 +586,7 @@ def _call_model(
             with torch.no_grad():
                 inputs = self.tokenizer(
                     prompt, truncation=True, return_tensors="pt"
-                ).to(self.init_device)
+                ).to(self.device)
 
                 try:
                     outputs = self.model.generate(
@@ -553,21 +610,23 @@ def _call_model(
             return [re.sub("^" + re.escape(prompt), "", i) for i in text_output]
 
 
-class LLaVA(Generator):
+class LLaVA(Generator, HFCompatible):
     """Get LLaVA ([ text + image ] -> text) generations"""
 
     DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | {
+        "max_tokens": 4000,
         # "exist_tokens + max_new_tokens < 4K is the golden rule."
         # https://github.com/haotian-liu/LLaVA/issues/1095#:~:text=Conceptually%2C%20as%20long%20as%20the%20total%20tokens%20are%20within%204K%2C%20it%20would%20be%20fine%2C%20so%20exist_tokens%20%2B%20max_new_tokens%20%3C%204K%20is%20the%20golden%20rule.
-        "max_tokens": 4000,
-        # consider shifting below to kwargs or llava_kwargs that is a dict to allow more customization
-        "torch_dtype": torch.float16,
-        "low_cpu_mem_usage": True,
-        "device_map": "cuda:0",
+        "hf_args": {
+            "torch_dtype": "float16",
+            "low_cpu_mem_usage": True,
+            "device_map": "auto",
+        },
     }
 
     # rewrite modality setting
     modality = {"in": {"text", "image"}, "out": {"text"}}
+    parallel_capable = False
 
     # Support Image-Text-to-Text models
     # https://huggingface.co/llava-hf#:~:text=Llava-,Models,-9
@@ -582,20 +641,20 @@ def __init__(self, name="", generations=10, config_root=_config):
         super().__init__(name, generations=generations, config_root=config_root)
         if self.name not in self.supported_models:
             raise ModelNameMissingError(
-                f"Invalid modal name {self.name}, current support: {self.supported_models}."
+                f"Invalid model name {self.name}, current support: {self.supported_models}."
             )
+
+        self.device = self._select_hf_device()
+        model_kwargs = self._gather_hf_params(
+            hf_constructor=LlavaNextForConditionalGeneration.from_pretrained
+        )  # will defer to device_map if device map was `auto` may not match self.device
+
         self.processor = LlavaNextProcessor.from_pretrained(self.name)
         self.model = LlavaNextForConditionalGeneration.from_pretrained(
-            self.name,
-            torch_dtype=self.torch_dtype,
-            low_cpu_mem_usage=self.low_cpu_mem_usage,
+            self.name, **model_kwargs
         )
-        if torch.cuda.is_available():
-            self.model.to(self.device_map)
-        else:
-            raise RuntimeError(
-                "CUDA is not supported on this device. Please make sure CUDA is installed and configured properly."
-            )
+
+        self.model.to(self.device)
 
     def generate(
         self, prompt: str, generations_this_call: int = 1
@@ -609,7 +668,7 @@ def generate(
             raise Exception(e)
 
         inputs = self.processor(text_prompt, image_prompt, return_tensors="pt").to(
-            self.device_map
+            self.device
         )
         exist_token_number: int = inputs.data["input_ids"].shape[1]
         output = self.model.generate(
diff --git a/garak/probes/base.py b/garak/probes/base.py
index af0aa4c3..bc15a0bd 100644
--- a/garak/probes/base.py
+++ b/garak/probes/base.py
@@ -150,6 +150,7 @@ def _execute_attempt(self, this_attempt):
         return copy.deepcopy(this_attempt)
 
     def _execute_all(self, attempts) -> Iterable[garak.attempt.Attempt]:
+        """handles sending a set of attempt to the generator"""
         attempts_completed: Iterable[garak.attempt.Attempt] = []
 
         if (
@@ -157,6 +158,7 @@ def _execute_all(self, attempts) -> Iterable[garak.attempt.Attempt]:
             and _config.system.parallel_attempts > 1
             and self.parallelisable_attempts
             and len(attempts) > 1
+            and self.generator.parallel_capable
         ):
             from multiprocessing import Pool
 
diff --git a/tests/generators/test_huggingface.py b/tests/generators/test_huggingface.py
index f3381c94..6f6d19ec 100644
--- a/tests/generators/test_huggingface.py
+++ b/tests/generators/test_huggingface.py
@@ -1,11 +1,26 @@
+import pytest
 import transformers
 import garak.generators.huggingface
+from garak._config import GarakSubConfig
 
 DEFAULT_GENERATIONS_QTY = 10
 
 
 def test_pipeline():
-    g = garak.generators.huggingface.Pipeline("gpt2")
+    gen_config = {
+        "huggingface": {
+            "Pipeline": {
+                "name": "gpt2",
+                "hf_args": {
+                    "device": "cpu",
+                },
+            }
+        }
+    }
+    config_root = GarakSubConfig()
+    setattr(config_root, "generators", gen_config)
+
+    g = garak.generators.huggingface.Pipeline("gpt2", config_root=config_root)
     assert g.name == "gpt2"
     assert g.generations == DEFAULT_GENERATIONS_QTY
     assert isinstance(g.generator, transformers.pipelines.text_generation.Pipeline)
@@ -54,3 +69,26 @@ def test_model():
     assert len(output) == DEFAULT_GENERATIONS_QTY
     for item in output:
         assert item is None  # gpt2 is known raise exception returning `None`
+
+
+def test_select_hf_device():
+    from garak.generators.huggingface import HFCompatible
+    import torch
+
+    class mockHF(HFCompatible):
+        def __init__(self, key, value):
+            self.hf_args = {key: value}
+            pass
+
+    m = mockHF("device", -1)
+    with pytest.raises(ValueError) as exc_info:
+        device = m._select_hf_device()
+    assert "CUDA device numbering starts" in str(exc_info.value)
+
+    m = mockHF("device", "cpu")
+    device = m._select_hf_device()
+    assert device == torch.device("cpu")
+
+    m = mockHF("device_map", "auto")
+    device = m._select_hf_device()
+    assert isinstance(device, torch.device)
diff --git a/tests/test_config.py b/tests/test_config.py
index 8d33cef1..48aac522 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -19,23 +19,27 @@
 plugins:
   generators:
     huggingface:
-      dtype: general
-      gpu: 0
+      hf_args:
+        torch_dtype: float16
       Pipeline:
-        dtype: bfloat16
+        hf_args:
+            device: cuda
   probes:
     test:
       generators:
         huggingface:
             Pipeline:
-              dtype: for_probe
+                hf_args:
+                    torch_dtype: float16
   detector:
       test:
         val: tests
         Blank:
           generators:
             huggingface:
-                gpu: 1
+                hf_args:
+                    torch_dtype: float16
+                    device: cuda:1
                 Pipeline:
                   dtype: for_detector
   buffs:
@@ -43,7 +47,8 @@
         Blank:
           generators:
             huggingface:
-                gpu: 1
+                hf_args:
+                    device: cuda:0
                 Pipeline:
                   dtype: for_detector
 """.encode(
diff --git a/tests/test_configurable.py b/tests/test_configurable.py
index 4979beb1..7847f65b 100644
--- a/tests/test_configurable.py
+++ b/tests/test_configurable.py
@@ -24,7 +24,13 @@ class mockConfigurable(Configurable):
     # Configurable is coupled to hierarchy of plugin types
     __module__ = "garak.generators.mock"
 
-    DEFAULT_PARAMS = {"class_var": "from_class"}
+    DEFAULT_PARAMS = {
+        "class_var": "from_class",
+        "class_dict_var": {
+            "dict_a": "dict_val",
+            "dict_b": "dict_val",
+        },
+    }
 
     def __init__(
         self,
@@ -63,6 +69,17 @@ def test_param_provided(generator_sub_config):
 def test_class_vars_propagate_to_instance(generator_sub_config):
     m = mockConfigurable(config_root=generator_sub_config)
     assert m.class_var == m.DEFAULT_PARAMS["class_var"]
+    assert m.class_dict_var == m.DEFAULT_PARAMS["class_dict_var"]
+
+
+# when a default parameter dictionary is provided merge on the resulting object
+def test_class_dict_merge_to_instance(generator_sub_config):
+    config_dict_var = {"dict_a": "test_val", "dict_c": "test_val"}
+    generator_sub_config.generators["mock"]["class_dict_var"] = config_dict_var
+    m = mockConfigurable(config_root=generator_sub_config)
+    assert m.class_dict_var == m.DEFAULT_PARAMS["class_dict_var"] | config_dict_var
+    assert m.class_dict_var["dict_a"] == config_dict_var["dict_a"]
+    assert m.class_dict_var["dict_c"] == config_dict_var["dict_c"]
 
 
 # when a default parameter is provided and not config_root set on the resulting object