diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
index a9e5dc30..ec38f1fe 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/actuator.py
@@ -3,8 +3,8 @@
 
 import json
 import logging
-import os
 import uuid
+from pathlib import Path
 
 import ray
 import yaml
@@ -54,14 +54,27 @@ def catalog(
     ) -> ExperimentCatalog:
         """Returns the Experiments your actuator provides"""
 
-        # The catalog be formed in code here or read from a file containing the Experiments models
-        # This shows reading from a file
+        # Loading experiment definitions for yaml files contained in the `experiments` directory.
+        # NOTE: Only files can be placed in the experiments directory,
+        #       but each file can contain multiple experiment definitions
+        curr_path = Path(__file__)
+        exp_dir = curr_path.parent / Path("experiments")
+        logger.debug(f"Experiments dir {exp_dir.absolute()}")
+        experiments = []
+        for exp_file in exp_dir.iterdir():
+            if exp_file.is_dir():
+                continue
+
+            logger.debug(f"Loading experiments from {exp_file.name}")
+            try:
+                file_data = exp_file.read_text()
+                data = yaml.safe_load(file_data)
+            except yaml.YAMLError:
+                error_message = f"File {exp_file.name} is a malformed YAML"
+                logger.error(error_message)
+                raise ValueError(error_message)
 
-        path = os.path.abspath(__file__)
-        path = os.path.split(path)[0]
-        with open(os.path.join(path, "experiments.yaml")) as f:
-            data = yaml.safe_load(f)
-            experiments = [Experiment(**data[e]) for e in data]
+            experiments.extend([Experiment.model_validate(data[e]) for e in data])
 
         return ExperimentCatalog(
             catalogIdentifier=cls.identifier,
@@ -176,7 +189,11 @@ async def submit(
         if experiment.deprecated is True:
             raise DeprecatedExperimentError(f"Experiment {experiment} is deprecated")
 
-        if experiment.identifier == "performance-testing-full":
+        if experiment.identifier in [
+            "performance-testing-full",
+            "performance-testing-geospatial-full",
+            "performance-testing-geospatial-full-custom-dataset",
+        ]:
             if not self.env_manager:
                 raise MissingConfigurationForExperimentError(
                     f"Actuator configuration did not contain sufficient information for a kubernetes environment manager to be created. "
@@ -197,7 +214,7 @@ async def submit(
                     )
 
             # Execute experiment
-            # Note: Here the experiment instance is just past for convenience since we retrieved it above
+            # Note: Here the experiment instance is just passed for convenience since we retrieved it above
             run_resource_and_workload_experiment.remote(
                 request=request,
                 experiment=experiment,
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
new file mode 100644
index 00000000..693bbc09
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/india_url_in_b64_out.jsonl
@@ -0,0 +1 @@
+{"prompt":{"data": {"data": "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif","data_format": "url","out_data_format": "b64_json","indices": [1, 2, 3, 8, 11, 12]},"priority": 0,"softmax": false}}
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
similarity index 100%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/geospatial_valencia.jsonl
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/datasets/valencia_url_in_b64_out.jsonl
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
index 3da664a8..d588a884 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiment_executor.py
@@ -7,6 +7,7 @@
 import subprocess
 import sys
 import time
+import traceback
 
 import ray
 from ado_actuators.vllm_performance.actuator_parameters import (
@@ -22,6 +23,7 @@
     VLLMDtype,
 )
 from ado_actuators.vllm_performance.vllm_performance_test.execute_benchmark import (
+    execute_geospatial_benchmark,
     execute_random_benchmark,
 )
 from ray.actor import ActorHandle
@@ -142,6 +144,9 @@ def _create_environment(
                         reuse_deployment=False,
                         pvc_name=actuator.pvc_template,
                         namespace=actuator.namespace,
+                        skip_tokenizer_init=values.get("skip_tokenizer_init"),
+                        enforce_eager=values.get("enforce_eager"),
+                        io_processor_plugin=values.get("io_processor_plugin"),
                     )
                     # Update manager
                     env_manager.done_creating.remote(definition=definition)
@@ -151,6 +156,7 @@ def _create_environment(
                     logger.error(
                         f"Attempt {attempt}. Failed to create test environment {e}"
                     )
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to create test environment {e}"
                     time.sleep(tmout)
                     tmout *= 2
@@ -279,23 +285,42 @@ def run_resource_and_workload_experiment(
                 start = time.time()
                 result = None
                 try:
-                    result = execute_random_benchmark(
-                        base_url=base_url,
-                        model=values.get("model"),
-                        interpreter=actuator_parameters.interpreter,
-                        num_prompts=int(values.get("num_prompts")),
-                        request_rate=request_rate,
-                        max_concurrency=max_concurrency,
-                        hf_token=actuator_parameters.hf_token,
-                        benchmark_retries=actuator_parameters.benchmark_retries,
-                        retries_timeout=actuator_parameters.retries_timeout,
-                        number_input_tokens=int(values.get("number_input_tokens")),
-                        max_output_tokens=int(values.get("max_output_tokens")),
-                        burstiness=float(values.get("burstiness")),
-                    )
+                    if experiment.identifier in [
+                        "performance-testing-geospatial-full",
+                        "performance-testing-geospatial-full-custom-dataset",
+                    ]:
+                        result = execute_geospatial_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            burstiness=float(values.get("burstiness")),
+                            dataset=values.get("dataset"),
+                        )
+                    else:
+                        result = execute_random_benchmark(
+                            base_url=base_url,
+                            model=values.get("model"),
+                            interpreter=actuator_parameters.interpreter,
+                            num_prompts=int(values.get("num_prompts")),
+                            request_rate=request_rate,
+                            max_concurrency=max_concurrency,
+                            hf_token=actuator_parameters.hf_token,
+                            benchmark_retries=actuator_parameters.benchmark_retries,
+                            retries_timeout=actuator_parameters.retries_timeout,
+                            number_input_tokens=int(values.get("number_input_tokens")),
+                            max_output_tokens=int(values.get("max_output_tokens")),
+                            burstiness=float(values.get("burstiness")),
+                            dataset=values.get("dataset"),
+                        )
                     logger.debug(f"benchmark executed in {time.time() - start} sec")
                 except Exception as e:
-                    logger.error(f"Failed to execute VLLM performance test {e}")
+                    logger.error(traceback.format_exception(e))
                     error = f"Failed to execute VLLM performance test {e}"
                 finally:
                     if pf is not None:
@@ -379,20 +404,36 @@ def run_workload_experiment(
         error = None
         measured_values = []
         try:
-            result = execute_random_benchmark(
-                base_url=values.get("endpoint"),
-                model=values.get("model"),
-                interpreter=actuator_parameters.interpreter,
-                num_prompts=int(values.get("num_prompts")),
-                request_rate=request_rate,
-                max_concurrency=max_concurrency,
-                hf_token=actuator_parameters.hf_token,
-                benchmark_retries=actuator_parameters.benchmark_retries,
-                retries_timeout=actuator_parameters.retries_timeout,
-                number_input_tokens=int(values.get("number_input_tokens")),
-                max_output_tokens=int(values.get("max_output_tokens")),
-                burstiness=float(values.get("burstiness")),
-            )
+            if experiment.identifier == "performance-testing-geospatial-endpoint":
+                result = execute_geospatial_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    burstiness=float(values.get("burstiness")),
+                    dataset=values.get("dataset"),
+                )
+            else:
+                result = execute_random_benchmark(
+                    base_url=values.get("endpoint"),
+                    model=values.get("model"),
+                    interpreter=actuator_parameters.interpreter,
+                    num_prompts=int(values.get("num_prompts")),
+                    request_rate=request_rate,
+                    max_concurrency=max_concurrency,
+                    hf_token=actuator_parameters.hf_token,
+                    benchmark_retries=actuator_parameters.benchmark_retries,
+                    retries_timeout=actuator_parameters.retries_timeout,
+                    number_input_tokens=int(values.get("number_input_tokens")),
+                    max_output_tokens=int(values.get("max_output_tokens")),
+                    burstiness=float(values.get("burstiness")),
+                    dataset=values.get("dataset"),
+                )
         except Exception as e:
             logger.error(f"Failed to execute VLLM performance test {e}")
             error = f"Failed to execute VLLM performance test {e}"
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
similarity index 90%
rename from plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
rename to plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
index 1d03b13a..a60a17d4 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing.yaml
@@ -56,6 +56,12 @@ performance_testing-full:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ 1, 10000 ]
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
     - identifier: image
       metadata:
         description: "(deployment) Docker image to use to create vllm deployments"
@@ -120,6 +126,18 @@ performance_testing-full:
       propertyDomain:
         variableType: "CATEGORICAL_VARIABLE_TYPE"
         values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer initialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce PyTorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
   defaultParameterization:
     - property:
         identifier: 'image'
@@ -149,6 +167,9 @@ performance_testing-full:
     - property:
         identifier: 'max_output_tokens'
       value: 128
+    - property:
+        identifier: 'dataset'
+      value: 'random'
     - property:
         identifier: 'gpu_memory_utilization'
       value: .9
@@ -167,6 +188,12 @@ performance_testing-full:
     - property:
         identifier: 'gpu_type'
       value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: False
+    - property:
+        identifier: 'enforce_eager'
+      value: False
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -221,6 +248,7 @@ performance_testing-endpoint:
         description: 'The endpoint(s) to test'
       propertyDomain:
         variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
     - identifier: 'request_rate'
       metadata:
         description: "The number of requests to send per second"
@@ -264,6 +292,12 @@ performance_testing-endpoint:
         variableType: 'DISCRETE_VARIABLE_TYPE'
         domainRange: [ -1, 500 ] # -1 means no concurrency control
         interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'random' ]
   defaultParameterization:
     - value: 1000
       property:
@@ -280,6 +314,9 @@ performance_testing-endpoint:
     - value: 128
       property:
         identifier: 'max_output_tokens'
+    - property:
+        identifier: 'dataset'
+      value: 'random'
   # measurements
   targetProperties:
     - identifier: "duration"
@@ -318,4 +355,4 @@ performance_testing-endpoint:
     - identifier: "p75_e2el_ms"
     - identifier: "p99_e2el_ms"
   metadata:
-    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
+    description: 'Test inference performance of a model served by vLLM endpoint across inference workload configurations'
\ No newline at end of file
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
new file mode 100644
index 00000000..65ee2733
--- /dev/null
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/experiments/performance_testing_geospatial.yaml
@@ -0,0 +1,474 @@
+# Copyright (c) IBM Corporation
+# SPDX-License-Identifier: MIT
+
+# The input to an experiment is an Entity. For the Entity to be a valid input
+# it's properties which  match what is defined here
+performance_testing-geospatial-endpoint:
+  identifier: performance-testing-geospatial-endpoint
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
+    - identifier: 'endpoint'
+      metadata:
+        description: 'The endpoint(s) to test'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["http://localhost:8000"]
+    - identifier: 'request_rate'
+      metadata:
+        description: "The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
+  defaultParameterization:
+    - value: 100
+      property:
+        identifier: 'num_prompts'
+    - value: -1
+      property:
+        identifier: 'max_concurrency'
+    - value: 1.0
+      property:
+        identifier: 'burstiness'
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'Test inference performance of a geospatial model served by vLLM endpoint across inference workload configurations'
+performance_testing-geospatial-full:
+  identifier: performance-testing-geospatial-full
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'india_url_in_b64_out', 'valencia_url_in_b64_out' ]
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer initialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce pytorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Processor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ None, "terratorch_segmentation" ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+    - property:
+        identifier: 'dataset'
+      value: 'india_url_in_b64_out'
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
+performance_testing-geospatial-full-custom-dataset:
+  identifier: performance-testing-geospatial-full-custom-dataset
+  actuatorIdentifier: "vllm_performance"
+  requiredProperties: # Any entity passed to this experiment must have constitutive properties with these values
+    - identifier: 'model'
+      metadata:
+        description: 'model to use for testing. Assumed to be served by all endpoints tested. Required to obtain correct tokenizer for benchmarking metrics calculation'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" ]
+    - identifier: 'request_rate'
+      metadata:
+        description: "(benchmark) The number of requests to send per second"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [-1,1000]
+        interval: 1  # -1 means send all requests at time 0
+    - identifier: 'dataset'
+      metadata:
+        description: "(benchmark) The dataset to be used for the experiment"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: ["custom_dataset.jsonl"]
+  optionalProperties:
+    - identifier: 'num_prompts'
+      metadata:
+        description: "(benchmark) The number of prompts to send (total number of requests)"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,10001]
+        interval: 1
+    - identifier: 'max_concurrency'
+      metadata:
+        description: "(benchmark) The maximum number of concurrent requests to send"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ -1, 500 ] # -1 means no concurrency control
+        interval: 1
+    - identifier: 'burstiness'
+      metadata:
+        description: "(benchmark) The burstiness of the requests - 1.0 is a Poisson distribution with rate = request_rate. Others are gamma distributions with lambda = request_rate and shape = burstiness."
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 0, 10 ]
+        interval: 1
+    - identifier: image
+      metadata:
+        description: "(deployment) Docker image to use to create vllm deployments"
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "quay.io/dataprep1/data-prep-kit/vllm_image:0.1" ]
+    - identifier: n_cpus
+      metadata:
+        description: "(deployment) the number of CPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 1,17 ]
+        interval: 1
+    - identifier: memory
+      metadata:
+        description: "(deployment) the amount of memory to allocate to vLLM pod"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "64Gi", "128Gi", "256Gi" ]
+    - identifier: dtype
+      metadata:
+        description: "(deployment) data type for model weights and activations. “auto” will use FP16 precision for FP32 and FP16 models, and BF16 precision for BF16 models."
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ "auto", "half", "float16",  "bfloat16", "float", "float32" ]
+    - identifier: 'gpu_memory_utilization'
+      metadata:
+        description: "(deployment) The fraction of GPU memory to be used for the model executor,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ .5, .75, .9 ]
+    - identifier: 'cpu_offload'
+      metadata:
+        description: "(deployment) The amount of model weights in GB to offload to the CPU per GPU. 0 means all weights are on GPU,"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        values: [ 0, 8, 16, 24, 32 ]
+    - identifier: 'max_num_seq'
+      metadata:
+        description: "(deployment) Maximum number of sequences per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [32,2049]
+        interval: 32
+    - identifier: 'max_batch_tokens'
+      metadata:
+        description: "(deployment) maximum number of batched tokens per iteration"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [ 8192, 32769]
+        interval: 1024
+    - identifier: 'n_gpus'
+      metadata:
+        description: "(deployment) Number of GPUs to use"
+      propertyDomain:
+        variableType: 'DISCRETE_VARIABLE_TYPE'
+        domainRange: [1,9]
+        interval: 1
+    - identifier: 'gpu_type'
+      metadata:
+        description: "(deployment) The GPU type to use"
+      propertyDomain:
+        variableType: "CATEGORICAL_VARIABLE_TYPE"
+        values: [ 'NVIDIA-A100-80GB-PCIe', 'NVIDIA-A100-SXM4-80GB' ]
+    - identifier: 'skip_tokenizer_init'
+      metadata:
+        description: "(deployment) skip tokenizer initialization"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'enforce_eager'
+      metadata:
+        description: "(deployment) enforce PyTorch eager mode"
+      propertyDomain:
+        variableType: BINARY_VARIABLE_TYPE 
+        values: [True, False]
+    - identifier: 'io_processor_plugin'
+      metadata:
+        description: 'IO Processor plugin to load for the model'
+      propertyDomain:
+        variableType: "OPEN_CATEGORICAL_VARIABLE_TYPE"
+        values: [ "terratorch_segmentation" ]
+  defaultParameterization:
+    - property:
+        identifier: 'image'
+      value: "quay.io/dataprep1/data-prep-kit/vllm_image:0.1"
+    - property:
+        identifier: n_cpus
+      value: 8
+    - property:
+        identifier:
+          memory
+      value:  "128Gi"
+    - property:
+        identifier: dtype
+      value: "auto"
+    - property:
+        identifier: 'num_prompts'
+      value: 500
+    - property:
+        identifier: 'max_concurrency'
+      value: -1
+    - property:
+        identifier: 'burstiness'
+      value: 1.0
+    - property:
+        identifier: 'gpu_memory_utilization'
+      value: .9
+    - property:
+        identifier: 'cpu_offload'
+      value: 0
+    - property:
+        identifier: 'max_num_seq'
+      value: 256
+    - property:
+        identifier: 'max_batch_tokens'
+      value: 16384
+    - property:
+        identifier: 'n_gpus'
+      value: 1
+    - property:
+        identifier: 'gpu_type'
+      value: 'NVIDIA-A100-80GB-PCIe'
+    - property:
+        identifier: 'skip_tokenizer_init'
+      value: True
+    - property:
+        identifier: 'enforce_eager'
+      value: True
+    - property:
+        identifier: 'io_processor_plugin'
+      value: "terratorch_segmentation"
+  # measurements
+  targetProperties:
+    - identifier: "duration"
+    - identifier: "completed"
+    - identifier: "total_input_tokens"
+    - identifier: "total_output_tokens"
+    - identifier: "request_throughput"
+    - identifier: "mean_e2el_ms"
+    - identifier: "median_e2el_ms"
+    - identifier: "std_e2el_ms"
+    - identifier: "p25_e2el_ms"
+    - identifier: "p50_e2el_ms"
+    - identifier: "p75_e2el_ms"
+    - identifier: "p99_e2el_ms"
+  metadata:
+    description: 'VLLM performance testing across compute resource and workload configuration'
\ No newline at end of file
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
index 87ee719d..c3a2a2b1 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/create_environment.py
@@ -40,6 +40,9 @@ def create_test_environment(
     reuse_pvc: bool = True,
     pvc_name: str = "vllm-support",
     namespace: str = "vllm-testing",
+    enforce_eager: bool = False,
+    skip_tokenizer_init: bool = False,
+    io_processor_plugin: str | None = None,
 ) -> None:
     """
     Create test deployment
@@ -113,15 +116,13 @@ def create_test_environment(
         n_gpus=n_gpus,
         n_cpus=n_cpus,
         memory=memory,
-        max_batch_tokens=max_batch_tokens,
-        gpu_memory_utilization=gpu_memory_utilization,
-        dtype=dtype,
-        cpu_offload=cpu_offload,
-        max_num_seq=max_num_seq,
         template=deployment_template,
         claim_name=pvc_name,
         hf_token=hf_token,
         reuse=reuse_deployment,
+        enforce_eager=enforce_eager,
+        skip_tokenizer_init=skip_tokenizer_init,
+        io_processor_plugin=io_processor_plugin,
     )
     logger.debug("deployment created")
     c_manager.wait_deployment_ready(k8_name=k8_name)
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
index dfef4725..9fddc978 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/manage_components.py
@@ -231,6 +231,9 @@ def create_deployment(
         claim_name: str | None = None,
         hf_token: str | None = None,
         reuse: bool = False,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None,
     ) -> None:
         """
         create deployment for model
@@ -293,6 +296,9 @@ def create_deployment(
                     template=template,
                     claim_name=claim_name,
                     hf_token=hf_token,
+                    enforce_eager=enforce_eager,
+                    skip_tokenizer_init=skip_tokenizer_init,
+                    io_processor_plugin=io_processor_plugin,
                 ),
             )
         except ApiException as e:
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
index 7fb29b17..36ab4fe0 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/build_components.py
@@ -1,6 +1,7 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
 
+import json
 import logging
 import os
 import sys
@@ -77,6 +78,9 @@ def deployment_yaml(
         template: str = "deployment.yaml",
         claim_name: str | None = None,
         hf_token: str | None = None,
+        enforce_eager: bool = False,
+        skip_tokenizer_init: bool = False,
+        io_processor_plugin: str | None = None,
     ) -> dict[str, Any]:
         """
         Generate deployment yaml
@@ -138,6 +142,30 @@ def deployment_yaml(
                 [{"name": PVC_NAME, "persistentVolumeClaim": {"claimName": claim_name}}]
             )
 
+        vllm_serve_args = [
+            model,
+            "--max-num-batched-tokens",
+            f"{max_batch_tokens}",
+            "--gpu-memory-utilization",
+            f"{gpu_memory_utilization}",
+            "--cpu-offload-gb",
+            f"{cpu_offload}",
+            "--max-num-seq",
+            f"{max_num_seq}",
+            "--tensor-parallel-size",
+            f"{n_gpus}",
+            "--dtype",
+            dtype.value,
+        ]
+
+        if enforce_eager:
+            vllm_serve_args.append("--enforce-eager")
+        if skip_tokenizer_init:
+            vllm_serve_args.append("--skip-tokenizer-init")
+        if io_processor_plugin is not None:
+            vllm_serve_args.append("--io-processor-plugin")
+            vllm_serve_args.append(io_processor_plugin)
+
         # container
         container = spec["containers"][0]
         # image
@@ -151,19 +179,16 @@ def deployment_yaml(
         limits["cpu"] = str(n_cpus)
         limits["memory"] = memory
         limits["nvidia.com/gpu"] = str(n_gpus)
-        # env variables to to set parameters for docker execution
-        container["env"] = [
-            {"name": "MODEL", "value": model},
-            {"name": "GPU_MEMORY_UTILIZATION", "value": str(gpu_memory_utilization)},
-            {"name": "DTYPE", "value": dtype.value},
-            {"name": "CPU_OFFLOAD_GB", "value": str(cpu_offload)},
-            {"name": "MAX_NUM_BATCHED_TOKENS", "value": str(max_batch_tokens)},
-            {"name": "MAX_NUM_SEQ", "value": str(max_num_seq)},
-            {"name": "TENSOR_PARALLEL_SIZE", "value": str(n_gpus)},
-        ]
+
+        # command
+        container["command"] = ["vllm", "serve"]
+        container["args"] = vllm_serve_args
+
         if hf_token is not None:
-            container["env"].extend([{"name": "HF_TOKEN", "value": hf_token}])
+            container["env"] = [{"name": "HF_TOKEN", "value": hf_token}]
         if claim_name is not None:
+            if "env" not in container:
+                container["env"] = []
             container["env"].extend(
                 [
                     {
@@ -180,7 +205,7 @@ def deployment_yaml(
                 ]
             )
 
-        # return
+        logger.debug(json.dumps(deployment_yaml, indent=2))
         return deployment_yaml
 
     @staticmethod
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
index 2b90302a..2659550d 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/k8/yaml_support/deployment.yaml
@@ -68,6 +68,4 @@ spec:
           emptyDir:
             medium: Memory
       nodeSelector:
-        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
-        #nvidia.com/gpu.product: Tesla-V100-PCIE-16GB
-        #kubernetes.io/hostname: cpu15
\ No newline at end of file
+        nvidia.com/gpu.product: NVIDIA-A100-80GB-PCIe
\ No newline at end of file
diff --git a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
index 12a05754..839aa528 100644
--- a/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
+++ b/plugins/actuators/vllm_performance/ado_actuators/vllm_performance/vllm_performance_test/execute_benchmark.py
@@ -12,11 +12,19 @@
     get_results,
 )
 
+logger = logging.getLogger("vllm-bench")
+
+default_geospatial_datasets_filenames = {
+    "india_url_in_b64_out": "india_url_in_b64_out.jsonl",
+    "valencia_url_in_b64_out": "valencia_url_in_b64_out.jsonl",
+}
+
 
 def execute_benchmark(
     base_url: str,
     model: str,
-    data_set: str,
+    dataset: str,
+    backend: str = "openai",
     interpreter: str = "python",
     num_prompts: int = 500,
     request_rate: int | None = None,
@@ -24,7 +32,7 @@ def execute_benchmark(
     hf_token: str | None = None,
     benchmark_retries: int = 3,
     retries_timeout: int = 5,
-    data_set_path: str | None = None,
+    dataset_path: str | None = None,
     custom_args: dict[str, Any] | None = None,
     burstiness: float = 1,
 ) -> dict[str, Any]:
@@ -32,57 +40,51 @@ def execute_benchmark(
     Execute benchmark
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
-    :param interpreter - name of Python interpreter
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param backend: name of the vLLM benchmark backend to be used ["vllm", "openai", "openai-chat", "openai-audio", "openai-embeddings"]
+    :param interpreter: name of Python interpreter
     :param num_prompts: number of prompts
     :param request_rate: request rate
-    :param max_concurrency: max concurrency
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param data_set_path: path to the dataset
+    :param dataset_path: path to the dataset
     :param custom_args: custom arguments to pass to the benchmark.
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
     keys are vllm benchmark arguments. values are the values to pass to the arguments
+
     :return: results dictionary
     """
-    logger = logging.getLogger("vllm-bench")
 
     logger.debug(
         f"executing benchmark, invoking service at {base_url} with the parameters: "
     )
     logger.debug(
-        f"model {model}, data set {data_set}, python {interpreter}, num prompts {num_prompts}"
+        f"model {model}, data set {dataset}, python {interpreter}, num prompts {num_prompts}"
     )
     logger.debug(
         f"request_rate {request_rate}, max_concurrency {max_concurrency}, benchmark retries {benchmark_retries}"
     )
-    # The code below is commented as we are switching from a script invocation to command line
-    # invocation. If we want to bring back script execution for any reason, this code must be
-    # uncommented
-    # parameters
-    # code = os.path.abspath(
-    #    os.path.join(os.path.dirname(__file__), "benchmark_serving.py")
-    # )
+
     request = f"export HF_TOKEN={hf_token} && " if hf_token is not None else ""
     f_name = f"{uuid.uuid4().hex}.json"
     request += (
-        # changing from script invocation to cli invocation
-        # f"{interpreter} {code} --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"vllm bench serve --backend openai --base-url {base_url} --dataset-name {data_set} "
-        f"--model {model} --seed 12345 --num-prompts {num_prompts!s} --save-result --metric-percentiles "
+        f"vllm bench serve --backend {backend} --base-url {base_url} --dataset-name {dataset} "
+        f"--model {model} --seed 12345 --num-prompts 10 --save-result --metric-percentiles "
         f'"25,75,99" --percentile-metrics "ttft,tpot,itl,e2el" --result-dir . --result-filename {f_name} '
         f"--burstiness {burstiness} "
     )
 
-    if data_set_path is not None:
-        request += f"--dataset-path {data_set_path} "
+    if dataset_path is not None:
+        request += f" --dataset-path {dataset_path} "
     if request_rate is not None:
-        request += f"--request-rate {request_rate!s} "
+        request += f" --request-rate {request_rate!s} "
     if max_concurrency is not None:
         request += f"--max-concurrency {max_concurrency!s} "
     if custom_args is not None:
         for key, value in custom_args.items():
-            request += f"{key} {value!s} "
+            request += f" {key} {value!s} "
     timeout = retries_timeout
 
     logger.debug(f"Command line: {request}")
@@ -106,6 +108,7 @@ def execute_benchmark(
 def execute_random_benchmark(
     base_url: str,
     model: str,
+    dataset: str,
     num_prompts: int = 500,
     request_rate: int | None = None,
     max_concurrency: int | None = None,
@@ -121,19 +124,25 @@ def execute_random_benchmark(
     Execute benchmark with random dataset
     :param base_url: url for vllm endpoint
     :param model: model
-    :param data_set: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
     :param hf_token: huggingface token
     :param benchmark_retries: number of benchmark execution retries
     :param retries_timeout: timeout between initial retry
-    :param input_token_length: length of input tokens
-    :param output_token_length: length of output tokens
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param number_input_tokens: maximum number of input tokens for each request,
+    :param max_output_tokens: maximum number of output tokens for each request,
+    :param interpreter: name of Python interpreter
+
     :return: results dictionary
     """
     # Call execute_benchmark with the appropriate arguments
     return execute_benchmark(
         base_url=base_url,
         model=model,
-        data_set="random",
+        dataset=dataset,
         interpreter=interpreter,
         num_prompts=num_prompts,
         request_rate=request_rate,
@@ -149,14 +158,86 @@ def execute_random_benchmark(
     )
 
 
+def execute_geospatial_benchmark(
+    base_url: str,
+    model: str,
+    dataset: str,
+    num_prompts: int = 500,
+    request_rate: int | None = None,
+    max_concurrency: int | None = None,
+    hf_token: str | None = None,
+    benchmark_retries: int = 3,
+    retries_timeout: int = 5,
+    burstiness: float = 1,
+    interpreter: str = "python",
+) -> dict[str, Any]:
+    """
+    Execute benchmark with random dataset
+    :param base_url: url for vllm endpoint
+    :param model: model
+    :param dataset: data set name ["sharegpt", "sonnet", "random", "hf"]
+    :param num_prompts: number of prompts
+    :param request_rate: request rate
+    :param max_concurrency: maximum number of concurrent requests
+    :param hf_token: huggingface token
+    :param benchmark_retries: number of benchmark execution retries
+    :param retries_timeout: timeout between initial retry
+    :param burstiness: burstiness factor of the request generation, 0 < burstiness < 1
+    :param interpreter: python interpreter to use
+
+    :return: results dictionary
+    """
+    from pathlib import Path
+
+    if dataset in default_geospatial_datasets_filenames:
+        dataset_filename = default_geospatial_datasets_filenames[dataset]
+        parent_path = Path(__file__).parents[1]
+        dataset_path = parent_path / "datasets" / dataset_filename
+    else:
+        # This can only happen with the performance-testing-geospatial-full-custom-dataset
+        # experiment, otherwise the dataset name is always one of the allowed ones.
+        # Here the assumption is that the dataset file is placed in the  process working directory.
+        ray_working_dir = Path.cwd()
+        dataset_path = ray_working_dir / dataset
+
+    if not dataset_path.is_file():
+        error_string = (
+            "The dataset filename provided does not exist or "
+            f"does not point to a valid file: {dataset_path}"
+        )
+        logger.warning(error_string)
+        raise ValueError(error_string)
+
+    logger.debug(f"Dataset path {dataset_path}")
+
+    return execute_benchmark(
+        base_url=base_url,
+        backend="io-processor-plugin",
+        model=model,
+        dataset="custom",
+        interpreter=interpreter,
+        num_prompts=num_prompts,
+        request_rate=request_rate,
+        max_concurrency=max_concurrency,
+        hf_token=hf_token,
+        benchmark_retries=benchmark_retries,
+        retries_timeout=retries_timeout,
+        burstiness=burstiness,
+        custom_args={
+            "--dataset-path": f"{dataset_path.resolve()}",
+            "--endpoint": "/pooling",
+            "--skip-tokenizer-init": True,
+        },
+    )
+
+
 if __name__ == "__main__":
-    results = execute_benchmark(
+    results = execute_geospatial_benchmark(
         interpreter="python3.10",
-        base_url="http://localhost:28015",
-        data_set="random",
-        model="openai/gpt-oss-20b",
-        request_rate=None,
-        max_concurrency=None,
+        base_url="http://localhost:8000",
+        model="ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
+        request_rate=2,
+        max_concurrency=10,
         hf_token=os.getenv("HF_TOKEN"),
         num_prompts=100,
     )