From 11f04fb28b45a32ad2580b2d1b7186b2858ae797 Mon Sep 17 00:00:00 2001
From: Rishi-Dave <rishipdave@gmail.com>
Date: Thu, 26 Feb 2026 13:53:11 -0800
Subject: [PATCH 1/2] Fix Cast node naming collisions and opset 10 Resize in
 float16 conversion

Fix two bugs in convert_float_to_float16:

1. Cast node naming collision: When node.name is empty (common in
   PyTorch-exported models), generated Cast nodes all get identical
   names like "_input_cast_2", corrupting the graph. Use unique
   tensor names (input_name/output) as the naming base instead.

2. Opset 10 Resize scales protection: ALWAYS_FLOAT_INPUTS only
   protected index 2 (scales in opset 11+). Opset 10 Resize has
   scales at index 1, which was unprotected. Add index 1 to the
   protected list.

Also fix a misleading comment in the output Cast section.
---
 .../python/tools/transformers/float16.py      |  22 +-
 .../test/python/transformers/test_float16.py  | 273 ++++++++++++++++++
 2 files changed, 284 insertions(+), 11 deletions(-)
 create mode 100644 onnxruntime/test/python/transformers/test_float16.py

diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py
index 349f5bb51fe47..4c28a1200db13 100644
--- a/onnxruntime/python/tools/transformers/float16.py
+++ b/onnxruntime/python/tools/transformers/float16.py
@@ -146,7 +146,7 @@ def make_value_info_from_tensor(tensor):
 
 # Some operators has data type fixed as float for some inputs. Key is op_type, value is list of input indices
 # Note that DirectML allows float16 gamma and beta in GroupNorm. Use force_fp16_inputs parameter could overwrite this.
-ALWAYS_FLOAT_INPUTS = {"Resize": [2], "GroupNorm": [1, 2], "SkipGroupNorm": [1, 2]}
+ALWAYS_FLOAT_INPUTS = {"Resize": [1, 2], "GroupNorm": [1, 2], "SkipGroupNorm": [1, 2]}
 
 
 class InitializerTracker:
@@ -424,11 +424,11 @@ def convert_float_to_float16(
                     # create new value_info for current node's new input name
                     new_value_info = model.graph.value_info.add()
                     new_value_info.CopyFrom(value_info)
-                    output_name = node.name + "_input_cast_" + str(i)
+                    output_name = input_name + "_cast_to_fp32"
                     new_value_info.name = output_name
                     new_value_info.type.tensor_type.elem_type = TensorProto.FLOAT
                     # add Cast node (from tensor(float16) to tensor(float) before current node
-                    node_name = node.name + "_input_cast" + str(i)
+                    node_name = input_name + "_cast_to_fp32_node"
                     new_node = [helper.make_node("Cast", [input_name], [output_name], to=1, name=node_name)]
                     model.graph.node.extend(new_node)
                     # change current node's input name
@@ -448,11 +448,11 @@ def convert_float_to_float16(
                     # create new value_info for current node's new input name
                     new_value_info = model.graph.value_info.add()
                     new_value_info.CopyFrom(value_info)
-                    output_name = node.name + "_input_cast_" + str(i)
+                    output_name = input_name + "_cast_to_fp32"
                     new_value_info.name = output_name
                     new_value_info.type.tensor_type.elem_type = accuracy_type
                     # add Cast node (from tensor(float16) to tensor(float) before current node
-                    node_name = node.name + "_input_cast" + str(i)
+                    node_name = input_name + "_cast_to_fp32_node"
                     new_node = [helper.make_node("Cast", [input_name], [output_name], to=accuracy_type, name=node_name)]
                     model.graph.node.extend(new_node)
                     # change current node's input name
@@ -467,15 +467,15 @@ def convert_float_to_float16(
                     # create new value_info for current node's new output
                     new_value_info = model.graph.value_info.add()
                     new_value_info.CopyFrom(value_info)
-                    input_name = node.name + "_output_cast_" + str(i)
-                    new_value_info.name = input_name
+                    output_cast_name = output + "_cast_to_fp16"
+                    new_value_info.name = output_cast_name
                     new_value_info.type.tensor_type.elem_type = accuracy_type
                     # add Cast node (from tensor(float) to tensor(float16) after current node
-                    node_name = node.name + "_output_cast" + str(i)
-                    new_node = [helper.make_node("Cast", [input_name], [output], to=10, name=node_name)]
+                    node_name = output + "_cast_to_fp16_node"
+                    new_node = [helper.make_node("Cast", [output_cast_name], [output], to=10, name=node_name)]
                     model.graph.node.extend(new_node)
-                    # change current node's input name
-                    node.output[i] = input_name
+                    # change current node's output name
+                    node.output[i] = output_cast_name
                     break
     return model
 
diff --git a/onnxruntime/test/python/transformers/test_float16.py b/onnxruntime/test/python/transformers/test_float16.py
new file mode 100644
index 0000000000000..764c46a5a3396
--- /dev/null
+++ b/onnxruntime/test/python/transformers/test_float16.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+"""Tests for float16 conversion (convert_float_to_float16)."""
+
+import unittest
+
+import numpy as np
+import onnx
+from onnx import TensorProto, helper, numpy_helper
+from parity_utilities import find_transformers_source
+
+if find_transformers_source():
+    from float16 import convert_float_to_float16
+else:
+    from onnxruntime.transformers.float16 import convert_float_to_float16
+
+
+def _make_resize_model_opset11(num_resize_nodes=2, use_empty_names=True):
+    """Create a minimal ONNX model with multiple Resize nodes (opset 11+).
+
+    Resize opset 11+: inputs are [X, roi, scales, sizes].
+    Scales (index 2) and roi (index 1) must stay float32 per ALWAYS_FLOAT_INPUTS.
+    """
+    graph_input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 1, 4, 4])
+    graph_output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1, 8, 8])
+
+    nodes = []
+    prev_output = "input"
+    for idx in range(num_resize_nodes):
+        roi_name = f"roi_{idx}"
+        scales_name = f"scales_{idx}"
+        output_name = f"resize_out_{idx}" if idx < num_resize_nodes - 1 else "output"
+
+        node = helper.make_node(
+            "Resize",
+            inputs=[prev_output, roi_name, scales_name],
+            outputs=[output_name],
+            name="" if use_empty_names else f"Resize_{idx}",
+            mode="nearest",
+        )
+        nodes.append(node)
+        prev_output = output_name
+
+    initializers = []
+    for idx in range(num_resize_nodes):
+        roi = numpy_helper.from_array(np.array([], dtype=np.float32), name=f"roi_{idx}")
+        scales = numpy_helper.from_array(np.array([1.0, 1.0, 2.0, 2.0], dtype=np.float32), name=f"scales_{idx}")
+        initializers.extend([roi, scales])
+
+    graph = helper.make_graph(nodes, "resize_test", [graph_input], [graph_output], initializer=initializers)
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)])
+    model = onnx.shape_inference.infer_shapes(model)
+    return model
+
+
+def _make_resize_model_opset10(num_resize_nodes=1, use_empty_names=True):
+    """Create a minimal ONNX model with Resize nodes using opset 10.
+
+    Resize opset 10: inputs are [X, scales].
+    Scales (index 1) must stay float32.
+    """
+    graph_input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 1, 4, 4])
+    graph_output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1, 8, 8])
+
+    nodes = []
+    prev_output = "input"
+    initializers = []
+    for idx in range(num_resize_nodes):
+        scales_name = f"scales_{idx}"
+        output_name = f"resize_out_{idx}" if idx < num_resize_nodes - 1 else "output"
+
+        node = helper.make_node(
+            "Resize",
+            inputs=[prev_output, scales_name],
+            outputs=[output_name],
+            name="" if use_empty_names else f"Resize_{idx}",
+            mode="nearest",
+        )
+        nodes.append(node)
+        prev_output = output_name
+
+        scales = numpy_helper.from_array(np.array([1.0, 1.0, 2.0, 2.0], dtype=np.float32), name=scales_name)
+        initializers.append(scales)
+
+    graph = helper.make_graph(nodes, "resize_opset10_test", [graph_input], [graph_output], initializer=initializers)
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 10)])
+    model = onnx.shape_inference.infer_shapes(model)
+    return model
+
+
+def _make_blocked_node_model(num_nodes=2, use_empty_names=True):
+    """Create a model with multiple blocked op nodes (using Upsample, which is in DEFAULT_OP_BLOCK_LIST).
+
+    Tests that Cast nodes for blocked ops also get unique names.
+    """
+    graph_input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 1, 4, 4])
+    graph_output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1, 16, 16])
+
+    nodes = []
+    prev_output = "input"
+    for idx in range(num_nodes):
+        scales_name = f"scales_{idx}"
+        output_name = f"upsample_out_{idx}" if idx < num_nodes - 1 else "output"
+
+        node = helper.make_node(
+            "Upsample",
+            inputs=[prev_output, scales_name],
+            outputs=[output_name],
+            name="" if use_empty_names else f"Upsample_{idx}",
+            mode="nearest",
+        )
+        nodes.append(node)
+        prev_output = output_name
+
+    initializers = []
+    for idx in range(num_nodes):
+        scales = numpy_helper.from_array(np.array([1.0, 1.0, 2.0, 2.0], dtype=np.float32), name=f"scales_{idx}")
+        initializers.append(scales)
+
+    graph = helper.make_graph(nodes, "blocked_node_test", [graph_input], [graph_output], initializer=initializers)
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 9)])
+    model = onnx.shape_inference.infer_shapes(model)
+    return model
+
+
+class TestFloat16Conversion(unittest.TestCase):
+    """Tests for convert_float_to_float16 correctness."""
+
+    def _get_all_node_names(self, model):
+        """Return all node names in the model graph."""
+        return [n.name for n in model.graph.node]
+
+    def _get_all_output_names(self, model):
+        """Return all output tensor names from all nodes."""
+        names = []
+        for n in model.graph.node:
+            names.extend(n.output)
+        return names
+
+    def _get_initializer(self, model, name):
+        """Find an initializer by name."""
+        for init in model.graph.initializer:
+            if init.name == name:
+                return init
+        return None
+
+    def test_resize_opset11_cast_naming_unique(self):
+        """Multiple unnamed Resize nodes should produce uniquely named Cast nodes."""
+        model = _make_resize_model_opset11(num_resize_nodes=3, use_empty_names=True)
+        converted = convert_float_to_float16(model, keep_io_types=True)
+
+        node_names = self._get_all_node_names(converted)
+        # Filter to only non-empty names (original nodes may have empty names)
+        cast_names = [n for n in node_names if n and "cast" in n.lower()]
+        self.assertEqual(len(cast_names), len(set(cast_names)), f"Duplicate Cast node names found: {cast_names}")
+
+        output_names = self._get_all_output_names(converted)
+        cast_outputs = [n for n in output_names if "cast" in n.lower()]
+        self.assertEqual(
+            len(cast_outputs), len(set(cast_outputs)), f"Duplicate Cast output names found: {cast_outputs}"
+        )
+
+    def test_resize_opset11_scales_initializer_stays_fp32(self):
+        """Resize scales initializer (input index 2) should stay float32 after conversion.
+
+        When scales is an initializer and ALWAYS_FLOAT_INPUTS protects index 2,
+        the initializer should not be converted to float16.
+        """
+        model = _make_resize_model_opset11(num_resize_nodes=1, use_empty_names=False)
+        converted = convert_float_to_float16(model, keep_io_types=True)
+
+        # The scales initializer should remain float32 (not converted to fp16)
+        scales_init = self._get_initializer(converted, "scales_0")
+        self.assertIsNotNone(scales_init, "scales_0 initializer not found")
+        self.assertEqual(
+            scales_init.data_type,
+            TensorProto.FLOAT,
+            "Resize scales initializer should stay float32",
+        )
+
+        # The roi initializer should also remain float32 (index 1 is protected)
+        roi_init = self._get_initializer(converted, "roi_0")
+        self.assertIsNotNone(roi_init, "roi_0 initializer not found")
+        self.assertEqual(
+            roi_init.data_type,
+            TensorProto.FLOAT,
+            "Resize roi initializer should stay float32",
+        )
+
+    def test_resize_opset10_scales_initializer_stays_fp32(self):
+        """Resize opset 10 scales initializer (input index 1) should stay float32.
+
+        Before the fix, ALWAYS_FLOAT_INPUTS only protected index 2, so opset 10
+        Resize (where scales is at index 1) would incorrectly convert scales to fp16.
+        """
+        model = _make_resize_model_opset10()
+        converted = convert_float_to_float16(model, keep_io_types=True)
+
+        # The scales initializer should remain float32
+        scales_init = self._get_initializer(converted, "scales_0")
+        self.assertIsNotNone(scales_init, "scales_0 initializer not found")
+        self.assertEqual(
+            scales_init.data_type,
+            TensorProto.FLOAT,
+            "Opset 10 Resize scales initializer should stay float32 (index 1 protected)",
+        )
+
+    def test_resize_opset10_multiple_unnamed_unique_names(self):
+        """Multiple unnamed opset 10 Resize nodes should produce uniquely named Cast nodes."""
+        model = _make_resize_model_opset10(num_resize_nodes=3, use_empty_names=True)
+        converted = convert_float_to_float16(model, keep_io_types=True)
+
+        node_names = self._get_all_node_names(converted)
+        cast_names = [n for n in node_names if n and "cast" in n.lower()]
+        self.assertEqual(len(cast_names), len(set(cast_names)), f"Duplicate Cast node names found: {cast_names}")
+
+    def test_blocked_node_cast_naming_unique(self):
+        """Multiple unnamed blocked-op nodes should produce uniquely named Cast nodes."""
+        model = _make_blocked_node_model(num_nodes=2, use_empty_names=True)
+        converted = convert_float_to_float16(model, keep_io_types=True)
+
+        node_names = self._get_all_node_names(converted)
+        cast_names = [n for n in node_names if n and "cast" in n.lower()]
+        self.assertEqual(len(cast_names), len(set(cast_names)), f"Duplicate Cast node names found: {cast_names}")
+
+        output_names = self._get_all_output_names(converted)
+        cast_outputs = [n for n in output_names if "cast" in n.lower()]
+        self.assertEqual(
+            len(cast_outputs), len(set(cast_outputs)), f"Duplicate Cast output names found: {cast_outputs}"
+        )
+
+    def test_resize_with_op_block_list(self):
+        """When Resize is in op_block_list, Cast nodes should have unique names."""
+        model = _make_resize_model_opset11(num_resize_nodes=2, use_empty_names=True)
+        converted = convert_float_to_float16(model, keep_io_types=True, op_block_list=["Resize"])
+
+        # All Cast node names should be unique
+        node_names = self._get_all_node_names(converted)
+        cast_names = [n for n in node_names if n and "cast" in n.lower()]
+        self.assertEqual(len(cast_names), len(set(cast_names)), f"Duplicate Cast node names found: {cast_names}")
+
+    def test_data_input_converted_to_fp16(self):
+        """Resize data input (index 0) should be converted to float16."""
+        model = _make_resize_model_opset11(num_resize_nodes=1, use_empty_names=False)
+        converted = convert_float_to_float16(model, keep_io_types=False)
+
+        # Graph input should be float16
+        graph_input = converted.graph.input[0]
+        self.assertEqual(graph_input.type.tensor_type.elem_type, TensorProto.FLOAT16)
+
+    def test_force_fp16_initializers(self):
+        """With force_fp16_initializers=True, scales should be converted to fp16."""
+        model = _make_resize_model_opset11(num_resize_nodes=1, use_empty_names=False)
+        converted = convert_float_to_float16(model, keep_io_types=True, force_fp16_initializers=True)
+
+        # With force_fp16_initializers, even protected initializers get converted
+        # but Cast nodes are inserted to feed them back as fp32
+        scales_init = self._get_initializer(converted, "scales_0")
+        self.assertIsNotNone(scales_init)
+        self.assertEqual(
+            scales_init.data_type,
+            TensorProto.FLOAT16,
+            "With force_fp16_initializers, scales should be converted to fp16",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

From df7e9ecfa088b70153243e8b3c28192988515f48 Mon Sep 17 00:00:00 2001
From: Rishi-Dave <rishipdave@gmail.com>
Date: Thu, 26 Feb 2026 15:31:23 -0800
Subject: [PATCH 2/2] Make Resize ALWAYS_FLOAT_INPUTS protection opset-aware

Address review feedback: instead of unconditionally protecting both
indices 1 and 2, detect the ONNX opset version from the model and
adjust accordingly:
- Opset 10 (Resize inputs [X, scales]): protect index 1
- Opset 11+ (Resize inputs [X, roi, scales, sizes]): protect index 2
  only; roi at index 1 allows fp16 per the ONNX spec

Update test to reflect that opset 11+ roi is not over-protected.
---
 onnxruntime/python/tools/transformers/float16.py | 16 ++++++++++++----
 .../test/python/transformers/test_float16.py     | 12 +++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py
index 4c28a1200db13..da4e292703f5d 100644
--- a/onnxruntime/python/tools/transformers/float16.py
+++ b/onnxruntime/python/tools/transformers/float16.py
@@ -146,7 +146,7 @@ def make_value_info_from_tensor(tensor):
 
 # Some operators has data type fixed as float for some inputs. Key is op_type, value is list of input indices
 # Note that DirectML allows float16 gamma and beta in GroupNorm. Use force_fp16_inputs parameter could overwrite this.
-ALWAYS_FLOAT_INPUTS = {"Resize": [1, 2], "GroupNorm": [1, 2], "SkipGroupNorm": [1, 2]}
+ALWAYS_FLOAT_INPUTS = {"Resize": [2], "GroupNorm": [1, 2], "SkipGroupNorm": [1, 2]}
 
 
 class InitializerTracker:
@@ -238,6 +238,14 @@ def convert_float_to_float16(
     op_block_list = set(op_block_list)
     node_block_list = set(node_block_list)
 
+    # Build opset-aware always_float_inputs: Resize input layout differs between opset 10 and 11+.
+    # Opset 10: [X, scales] — scales at index 1 must stay float32.
+    # Opset 11+: [X, roi, scales, sizes] — scales at index 2 must stay float32; roi (index 1) allows fp16.
+    onnx_opset = max((o.version for o in model.opset_import if o.domain in ("", "ai.onnx")), default=11)
+    always_float_inputs = dict(ALWAYS_FLOAT_INPUTS)
+    if onnx_opset <= 10:
+        always_float_inputs["Resize"] = [1]
+
     logger.debug(
         f"fp16 parameters: min_positive_val={min_positive_val} max_finite_val={max_finite_val} keep_io_types={keep_io_types} disable_shape_infer={disable_shape_infer} op_block_list={op_block_list} node_block_list={node_block_list} force_fp16_initializers={force_fp16_initializers}"
     )
@@ -334,7 +342,7 @@ def convert_float_to_float16(
                         if input_name in fp32_initializers:
                             # For Resize/GroupNorm, only the first input can be float16
                             use_fp32_weight = is_node_blocked or (
-                                i in ALWAYS_FLOAT_INPUTS.get(n.op_type, [])
+                                i in always_float_inputs.get(n.op_type, [])
                                 and i not in force_fp16_inputs_dict.get(n.op_type, [])
                             )
                             fp32_initializers[input_name].add_node(n, use_fp32_weight)
@@ -371,7 +379,7 @@ def convert_float_to_float16(
                                 n.attribute.extend([helper.make_attribute("dtype", TensorProto.FLOAT16)])
 
                         # For Resize/GroupNorm, attribute data type cannot be changed
-                        if n.op_type not in ALWAYS_FLOAT_INPUTS or n.op_type in force_fp16_inputs_dict:
+                        if n.op_type not in always_float_inputs or n.op_type in force_fp16_inputs_dict:
                             for attr in n.attribute:
                                 next_level.append(attr)  # noqa: PERF402
                         else:
@@ -417,7 +425,7 @@ def convert_float_to_float16(
     # Some operators have data type fixed as float for some input. Add a float16 to float cast for those inputs.
     for node in mixed_float_type_node_list:
         for i, input_name in enumerate(node.input):
-            if i not in ALWAYS_FLOAT_INPUTS[node.op_type] or i in force_fp16_inputs_dict.get(node.op_type, []):
+            if i not in always_float_inputs[node.op_type] or i in force_fp16_inputs_dict.get(node.op_type, []):
                 continue
             for value_info in value_info_list:
                 if input_name == value_info.name:
diff --git a/onnxruntime/test/python/transformers/test_float16.py b/onnxruntime/test/python/transformers/test_float16.py
index 764c46a5a3396..343efa4c6ac3e 100644
--- a/onnxruntime/test/python/transformers/test_float16.py
+++ b/onnxruntime/test/python/transformers/test_float16.py
@@ -24,7 +24,7 @@ def _make_resize_model_opset11(num_resize_nodes=2, use_empty_names=True):
     """Create a minimal ONNX model with multiple Resize nodes (opset 11+).
 
     Resize opset 11+: inputs are [X, roi, scales, sizes].
-    Scales (index 2) and roi (index 1) must stay float32 per ALWAYS_FLOAT_INPUTS.
+    Scales (index 2) must stay float32 per ALWAYS_FLOAT_INPUTS; roi (index 1) allows fp16.
     """
     graph_input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 1, 4, 4])
     graph_output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1, 8, 8])
@@ -170,6 +170,7 @@ def test_resize_opset11_scales_initializer_stays_fp32(self):
 
         When scales is an initializer and ALWAYS_FLOAT_INPUTS protects index 2,
         the initializer should not be converted to float16.
+        Roi (index 1) is NOT protected for opset 11+ and may be converted to fp16.
         """
         model = _make_resize_model_opset11(num_resize_nodes=1, use_empty_names=False)
         converted = convert_float_to_float16(model, keep_io_types=True)
@@ -183,13 +184,14 @@ def test_resize_opset11_scales_initializer_stays_fp32(self):
             "Resize scales initializer should stay float32",
         )
 
-        # The roi initializer should also remain float32 (index 1 is protected)
+        # Roi (index 1) is NOT protected for opset 11+ — the ONNX spec allows fp16 roi.
+        # The initializer may be converted to fp16 (it is not in always_float_inputs).
         roi_init = self._get_initializer(converted, "roi_0")
         self.assertIsNotNone(roi_init, "roi_0 initializer not found")
-        self.assertEqual(
+        self.assertIn(
             roi_init.data_type,
-            TensorProto.FLOAT,
-            "Resize roi initializer should stay float32",
+            (TensorProto.FLOAT, TensorProto.FLOAT16),
+            "Opset 11+ Resize roi is not protected — may be fp32 or fp16",
         )
 
     def test_resize_opset10_scales_initializer_stays_fp32(self):