[Compressed Tensors] Always clone output for compile robustness (#26849)

kylesayrs · mgoin · web-flow · commit a5464dcf92bb · 2025-10-16T19:29:59.000Z
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
Co-authored-by: Michael Goin &lt;mgoin64@gmail.com&gt;
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py
@@ -163,7 +163,7 @@ def apply(
         if self.output_transform is not None:
             for part_id, (start, length) in enumerate(self.partition_ranges):
                 x[:, start : start + length] = self.output_transform(
-                    x[:, start : start + length].contiguous(), part_id=part_id
+                    x[:, start : start + length].clone(), part_id=part_id
                 )
 
         return x

Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ def apply(`
`163`	`163`	`if self.output_transform is not None:`
`164`	`164`	`for part_id, (start, length) in enumerate(self.partition_ranges):`
`165`	`165`	`x[:, start : start + length] = self.output_transform(`
`166`		`- x[:, start : start + length].contiguous(), part_id=part_id`
	`166`	`+ x[:, start : start + length].clone(), part_id=part_id`
`167`	`167`	`)`
`168`	`168`
`169`	`169`	`return x`