fix(compression): insert DECODE per consumer for alt decompression memory

rkuester · rkuester · commit cf785009da28 · 2025-12-01T23:36:45.000-06:00
Insert a separate DECODE immediately before each consumer of a
compressed tensor, rather than sharing one DECODE output among all
consumers.

The interpreter's alternate decompression memory resets its allocation
offset for each DECODE's Prepare, causing all DECODE outputs to be
allocated at the same address. If two consumers share one DECODE and
another DECODE runs between them, the intervening DECODE overwrites the
shared output, corrupting data for the second consumer.

Update test expectations to reflect the new DECODE-per-consumer
behavior and change the integration test from expected-failure to
expected-pass.
diff --git a/tensorflow/lite/micro/compression/compression_integration_test.py b/tensorflow/lite/micro/compression/compression_integration_test.py
@@ -287,7 +287,6 @@ class AltDecompressionMemoryTest(tf.test.TestCase):
   between multiple operators and alternate decompression memory is enabled.
   """
 
-  @unittest.expectedFailure
   def test_shared_compressed_tensor_with_alt_memory(self):
     """Verify correct results when a shared compressed tensor is used with alt
     decompression memory.
@@ -301,12 +300,9 @@ def test_shared_compressed_tensor_with_alt_memory(self):
     DECODE outputs are allocated at the same address, so they overwrite each
     other. A DECODE output can only be used until the next DECODE runs.
 
-    To work around this limitation, the DECODE insertion code must insert a
+    To work around this limitation, the DECODE insertion code inserts a
     separate DECODE immediately before each consumer of a compressed tensor,
     rather than sharing one DECODE output among all consumers.
-
-    This test is expected to fail because the current insertion code does not
-    yet implement this workaround.
     """
     flatbuffer = _build_shared_weights_model()
 
diff --git a/tensorflow/lite/micro/compression/decode_insert.py b/tensorflow/lite/micro/compression/decode_insert.py
@@ -144,11 +144,19 @@ def insert_decode_operators(
   This function modifies the model in-place, inserting DECODE operators
   before any operator that uses a compressed tensor as input.
 
-  For each compressed tensor:
+  A separate DECODE is inserted before each consumer, rather than sharing one
+  DECODE output among all consumers. This is required because the interpreter's
+  alternate decompression memory resets its allocation offset for each DECODE's
+  Prepare, causing all DECODE outputs to be allocated at the same address. If
+  two consumers share one DECODE and another DECODE runs between them, the
+  intervening DECODE overwrites the shared output, corrupting data for the
+  second consumer.
+
+  For each consumer of a compressed tensor:
   1. Create an ancillary data tensor containing DCM + type-specific data
   2. Create an output tensor with the same shape/dtype as the decoded tensor
-  3. Insert a DECODE operator before the first consumer
-  4. Rewire all consumers to use the DECODE output instead of the encoded tensor
+  3. Insert a DECODE operator immediately before the consumer
+  4. Rewire the consumer to use the DECODE output
 
   Args:
     model: The model to modify in-place.
@@ -180,23 +188,27 @@ def insert_decode_operators(
   for sg_idx, tensor_infos in by_subgraph.items():
     subgraph = model.subgraphs[sg_idx]
 
-    # Sort by earliest consumer position (process in reverse order to maintain
-    # valid positions as we insert)
-    tensor_infos.sort(
-        key=lambda info: _find_earliest_consumer_position(
-            subgraph, info.consumers),
+    # Collect all (consumer, tensor_info) pairs and sort by consumer position
+    # in reverse order so insertions don't invalidate positions
+    consumer_pairs = []
+    for info in tensor_infos:
+      for consumer in info.consumers:
+        consumer_pairs.append((consumer, info))
+
+    consumer_pairs.sort(
+        key=lambda pair: subgraph.operators.index(pair[0]),
         reverse=True,
     )
 
-    for info in tensor_infos:
-      # Create ancillary data tensor
+    for consumer, info in consumer_pairs:
+      # Create ancillary data tensor (one per DECODE)
       ancillary_tensor = _create_ancillary_tensor(
           info.ancillary_data,
           info.tensor,
       )
       subgraph.tensors.append(ancillary_tensor)
 
-      # Create output tensor
+      # Create output tensor (one per DECODE)
       output_tensor = _create_output_tensor(info.tensor)
       subgraph.tensors.append(output_tensor)
 
@@ -208,11 +220,9 @@ def insert_decode_operators(
           outputs=[output_tensor],
       )
 
-      # Find insertion position (before first consumer)
-      insert_pos = _find_earliest_consumer_position(subgraph, info.consumers)
-
-      # Insert DECODE operator
+      # Insert DECODE immediately before this consumer
+      insert_pos = subgraph.operators.index(consumer)
       subgraph.operators.insert(insert_pos, decode_op)
 
-      # Rewire all consumers to use the decoded output
-      _rewire_consumers(info.consumers, info.tensor, output_tensor)
+      # Rewire only this consumer to use the decoded output
+      _rewire_consumers([consumer], info.tensor, output_tensor)
diff --git a/tensorflow/lite/micro/compression/decode_insert_test.py b/tensorflow/lite/micro/compression/decode_insert_test.py
@@ -224,8 +224,8 @@ def test_consumer_rewired_to_decode_output(self):
     # Original weights tensor should NOT be in FC inputs
     self.assertNotIn(weights_tensor, fc_op.inputs)
 
-  def test_shared_tensor_single_decode(self):
-    """Tensor used by multiple ops gets single DECODE, both rewired."""
+  def test_shared_tensor_decode_per_consumer(self):
+    """Tensor used by multiple ops gets separate DECODE for each consumer."""
     model = _build_shared_weights_model()
     weights_tensor = model.subgraphs[0].tensors[0]
 
@@ -241,17 +241,25 @@ def test_shared_tensor_single_decode(self):
 
     sg = model.subgraphs[0]
 
-    # Should have 3 operators: 1 DECODE + 2 FC
-    self.assertEqual(len(sg.operators), 3)
+    # Should have 4 operators: 2 DECODEs + 2 FCs (DECODE before each FC)
+    self.assertEqual(len(sg.operators), 4)
     self.assertEqual(sg.operators[0].opcode, tflite.BuiltinOperator.CUSTOM)
+    self.assertEqual(sg.operators[1].opcode,
+                     tflite.BuiltinOperator.FULLY_CONNECTED)
+    self.assertEqual(sg.operators[2].opcode, tflite.BuiltinOperator.CUSTOM)
+    self.assertEqual(sg.operators[3].opcode,
+                     tflite.BuiltinOperator.FULLY_CONNECTED)
 
-    decode_op = sg.operators[0]
+    decode_op1 = sg.operators[0]
     fc_op1 = sg.operators[1]
-    fc_op2 = sg.operators[2]
-
-    # Both FCs should use DECODE's output
-    self.assertIs(fc_op1.inputs[1], decode_op.outputs[0])
-    self.assertIs(fc_op2.inputs[1], decode_op.outputs[0])
+    decode_op2 = sg.operators[2]
+    fc_op2 = sg.operators[3]
+
+    # Each FC should use its own DECODE's output
+    self.assertIs(fc_op1.inputs[1], decode_op1.outputs[0])
+    self.assertIs(fc_op2.inputs[1], decode_op2.outputs[0])
+    # The two DECODEs should have different outputs
+    self.assertIsNot(decode_op1.outputs[0], decode_op2.outputs[0])
 
   def test_ancillary_tensor_contains_dcm(self):
     """Ancillary tensor data contains valid DCM header."""