HabanaAI · michalkuligowski · Apr 30, 2025 · Apr 28, 2025
@@ -42,7 +42,7 @@
 class HpuModelAdapterEncoderDecoder(HpuModelAdapter):
 
     def __init__(self, model, vllm_config, layer_names, is_causal):
-        super().__init__(model, vllm_config, layer_names, False)
+        super().__init__(model, vllm_config, layer_names, is_causal)
 
         # We only wrap the language model in HPU graph because some Ops in
         # vision model will fallback to CPU and cause the graph building fail.

@@ -92,7 +92,6 @@ def __init__(
             ModelRunnerClass = HPUPoolingModelRunner
         elif is_encoder_decoder_model:
             ModelRunnerClass = HPUEncoderDecoderModelRunner
-            is_causal = False
         self.model_runner: HPUModelRunnerBase = ModelRunnerClass(
             vllm_config=vllm_config,
             kv_cache_dtype=self.cache_config.cache_dtype,