From a999acf0e89865694967701e2881130d608039c2 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Fri, 30 Jan 2026 15:59:35 -0800 Subject: [PATCH 1/5] Fix Qwen2.5-VL conversion --- src/megatron/bridge/inference/vlm/base.py | 2 ++ src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/megatron/bridge/inference/vlm/base.py b/src/megatron/bridge/inference/vlm/base.py index f592c47a6f..2d99032b48 100644 --- a/src/megatron/bridge/inference/vlm/base.py +++ b/src/megatron/bridge/inference/vlm/base.py @@ -131,6 +131,8 @@ def setup_inference_wrapper( wrapper_cls = QwenVLInferenceWrapper if isinstance(config, Qwen25VLModelProvider): hidden_size = config.hidden_size + # Expose decoder for MCore Infernce Engine compatibility (used by get_mamba_inference_state_config_from_model) + mcore_model.module.decoder = mcore_model.module.language_model.decoder else: hidden_size = config.language_transformer_config.hidden_size else: diff --git a/src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py b/src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py index d00fd7ab9f..6250ee9ab7 100644 --- a/src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py +++ b/src/megatron/bridge/models/qwen_vl/modeling_qwen25_vl.py @@ -111,9 +111,6 @@ def __init__( self.share_embeddings_and_output_weights = config.share_embeddings_and_output_weights self.shared_embedding_or_output_weight = self.language_model.shared_embedding_or_output_weight - # Expose decoder for MCore Infernce Engine compatibility (used by get_mamba_inference_state_config_from_model) - self.decoder = self.language_model.decoder - # Bind methods from HF's Qwen2_5_VLModel to this instance # get_placeholder_mask is only available in transformers 4.55+ if is_transformers_min_version("4.55.0"): From 6ae2d7511eec49eb1073e7e6583ad521c53cb007 Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Mon, 2 Feb 2026 10:11:32 -0800 Subject: [PATCH 2/5] Use recursive check for module --- src/megatron/bridge/inference/vlm/base.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/megatron/bridge/inference/vlm/base.py b/src/megatron/bridge/inference/vlm/base.py index 2d99032b48..14faf08ca5 100644 --- a/src/megatron/bridge/inference/vlm/base.py +++ b/src/megatron/bridge/inference/vlm/base.py @@ -112,6 +112,17 @@ def setup_model_and_tokenizer( return inference_wrapped_model, processor +def _expose_decoder_from_language_model(model): + """Recursively get language_model from model and expose decoder, handling wrapped modules.""" + current = model + while hasattr(current, "module"): + current = current.module + + if hasattr(current, "language_model"): + language_model = current.language_model + current.decoder = language_model.decoder + + def setup_inference_wrapper( model, tokenizer, @@ -132,7 +143,7 @@ def setup_inference_wrapper( if isinstance(config, Qwen25VLModelProvider): hidden_size = config.hidden_size # Expose decoder for MCore Infernce Engine compatibility (used by get_mamba_inference_state_config_from_model) - mcore_model.module.decoder = mcore_model.module.language_model.decoder + _expose_decoder_from_language_model(mcore_model) else: hidden_size = config.language_transformer_config.hidden_size else: From 9d9cc6be03427d807a2c6b8717336afbf15ed61e Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Mon, 2 Feb 2026 10:59:21 -0800 Subject: [PATCH 3/5] Fix test --- tests/unit_tests/inference/vlm/test_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit_tests/inference/vlm/test_base.py b/tests/unit_tests/inference/vlm/test_base.py index 1ad296a82c..05abd6307e 100644 --- a/tests/unit_tests/inference/vlm/test_base.py +++ b/tests/unit_tests/inference/vlm/test_base.py @@ -240,6 +240,12 @@ def test_setup_inference_wrapper_qwen25(self, mock_wrapper_cls, mock_tokenizer): mock_model.config = MagicMock(spec=Qwen25VLModelProvider) mock_model.config.hidden_size = 1024 + # Setup mock structure for _expose_decoder_from_language_model + mock_decoder = MagicMock() + mock_model.module = MagicMock() + mock_model.module.language_model = MagicMock() + mock_model.module.language_model.decoder = mock_decoder + _wrapper = setup_inference_wrapper(mock_model, mock_tokenizer) mock_wrapper_cls.assert_called_once() From eab31f25e6f55f89f94da98ab1ae9503b04b75fd Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Mon, 2 Feb 2026 11:45:38 -0800 Subject: [PATCH 4/5] Fix test --- tests/unit_tests/inference/vlm/test_base.py | 33 +++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/inference/vlm/test_base.py b/tests/unit_tests/inference/vlm/test_base.py index 05abd6307e..b04556fc12 100644 --- a/tests/unit_tests/inference/vlm/test_base.py +++ b/tests/unit_tests/inference/vlm/test_base.py @@ -236,15 +236,16 @@ class TestSetupInferenceWrapper: @patch("megatron.bridge.inference.vlm.base.QwenVLInferenceWrapper") def test_setup_inference_wrapper_qwen25(self, mock_wrapper_cls, mock_tokenizer): - mock_model = MagicMock() + # Create a simple object without module attribute to avoid infinite loop + class MockModel: + pass + + mock_model = MockModel() mock_model.config = MagicMock(spec=Qwen25VLModelProvider) mock_model.config.hidden_size = 1024 - - # Setup mock structure for _expose_decoder_from_language_model - mock_decoder = MagicMock() - mock_model.module = MagicMock() - mock_model.module.language_model = MagicMock() - mock_model.module.language_model.decoder = mock_decoder + mock_model.cuda = MagicMock(return_value=mock_model) + mock_model.to = MagicMock(return_value=mock_model) + mock_model.eval = MagicMock() _wrapper = setup_inference_wrapper(mock_model, mock_tokenizer) @@ -257,10 +258,17 @@ def test_setup_inference_wrapper_qwen25(self, mock_wrapper_cls, mock_tokenizer): @patch("megatron.bridge.inference.vlm.base.QwenVLInferenceWrapper") def test_setup_inference_wrapper_qwen3(self, mock_wrapper_cls, mock_tokenizer): - mock_model = MagicMock() + # Create a simple object without module attribute to avoid infinite loop + class MockModel: + pass + + mock_model = MockModel() mock_model.config = MagicMock(spec=Qwen3VLModelProvider) mock_model.config.language_transformer_config = MagicMock() mock_model.config.language_transformer_config.hidden_size = 2048 + mock_model.cuda = MagicMock(return_value=mock_model) + mock_model.to = MagicMock(return_value=mock_model) + mock_model.eval = MagicMock() _wrapper = setup_inference_wrapper(mock_model, mock_tokenizer) @@ -272,8 +280,15 @@ def test_setup_inference_wrapper_qwen3(self, mock_wrapper_cls, mock_tokenizer): assert inference_config.hidden_size == 2048 def test_setup_inference_wrapper_invalid(self, mock_tokenizer): - mock_model = MagicMock() + # Create a simple object without module attribute to avoid infinite loop + class MockModel: + pass + + mock_model = MockModel() mock_model.config = MagicMock() # Not Qwen config + mock_model.cuda = MagicMock(return_value=mock_model) + mock_model.to = MagicMock(return_value=mock_model) + mock_model.eval = MagicMock() with pytest.raises(ValueError): setup_inference_wrapper(mock_model, mock_tokenizer) From 23c68ad8a4927f57fbbc301758e488290108d19e Mon Sep 17 00:00:00 2001 From: Bobby Chen Date: Tue, 3 Feb 2026 10:26:26 -0800 Subject: [PATCH 5/5] Add test --- tests/unit_tests/inference/vlm/test_base.py | 33 +++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/inference/vlm/test_base.py b/tests/unit_tests/inference/vlm/test_base.py index b04556fc12..b86968f383 100644 --- a/tests/unit_tests/inference/vlm/test_base.py +++ b/tests/unit_tests/inference/vlm/test_base.py @@ -236,11 +236,23 @@ class TestSetupInferenceWrapper: @patch("megatron.bridge.inference.vlm.base.QwenVLInferenceWrapper") def test_setup_inference_wrapper_qwen25(self, mock_wrapper_cls, mock_tokenizer): - # Create a simple object without module attribute to avoid infinite loop - class MockModel: + """Test Qwen25 setup with module.language_model.decoder structure.""" + + # Create mock objects with nested structure + class MockObject: pass - mock_model = MockModel() + mock_decoder = MagicMock() + + # Build the nested structure: model.module.language_model.decoder + mock_language_model = MockObject() + mock_language_model.decoder = mock_decoder + + mock_module = MockObject() + mock_module.language_model = mock_language_model + + mock_model = MockObject() + mock_model.module = mock_module mock_model.config = MagicMock(spec=Qwen25VLModelProvider) mock_model.config.hidden_size = 1024 mock_model.cuda = MagicMock(return_value=mock_model) @@ -249,20 +261,23 @@ class MockModel: _wrapper = setup_inference_wrapper(mock_model, mock_tokenizer) + # Verify decoder was exposed at module level + assert hasattr(mock_module, "decoder") + assert mock_module.decoder is mock_decoder + mock_wrapper_cls.assert_called_once() # Check InferenceWrapperConfig was created with correct hidden_size - # Args are positional: (model, InferenceWrapperConfig) call_args = mock_wrapper_cls.call_args - inference_config = call_args[0][1] # Second positional argument + inference_config = call_args[0][1] assert inference_config.hidden_size == 1024 @patch("megatron.bridge.inference.vlm.base.QwenVLInferenceWrapper") def test_setup_inference_wrapper_qwen3(self, mock_wrapper_cls, mock_tokenizer): # Create a simple object without module attribute to avoid infinite loop - class MockModel: + class MockObject: pass - mock_model = MockModel() + mock_model = MockObject() mock_model.config = MagicMock(spec=Qwen3VLModelProvider) mock_model.config.language_transformer_config = MagicMock() mock_model.config.language_transformer_config.hidden_size = 2048 @@ -281,10 +296,10 @@ class MockModel: def test_setup_inference_wrapper_invalid(self, mock_tokenizer): # Create a simple object without module attribute to avoid infinite loop - class MockModel: + class MockObject: pass - mock_model = MockModel() + mock_model = MockObject() mock_model.config = MagicMock() # Not Qwen config mock_model.cuda = MagicMock(return_value=mock_model) mock_model.to = MagicMock(return_value=mock_model)