From 8e40754c337dba30dc41d87e9e0807a2f121b63c Mon Sep 17 00:00:00 2001 From: b-shi Date: Wed, 6 Aug 2025 17:59:44 -0500 Subject: [PATCH] Fix cvt hazard --- projects/hipblaslt/tensilelite/Tensile/KernelWriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipblaslt/tensilelite/Tensile/KernelWriter.py b/projects/hipblaslt/tensilelite/Tensile/KernelWriter.py index 95f3ae248b6..41a0b19a266 100644 --- a/projects/hipblaslt/tensilelite/Tensile/KernelWriter.py +++ b/projects/hipblaslt/tensilelite/Tensile/KernelWriter.py @@ -1445,7 +1445,7 @@ def calculateRangeAndUpdateCounter(itemCounter, writeCounters, length): iterCode.add(SNop(waitState=1, comment="VALU packing writes to be consumed by matrix instruction")) curPackIdx += 1 break - if not kernel["SourceSwap"] and kernel["UseF32XEmulation"]: + if kernel["UseF32XEmulation"]: # HACK add dummy waits btween swap and mfmas. TODO: improve pack scheduling to avoid this numDummy = 1 if kernel["MatrixInstM"] == 16 and kernel["MatrixInstK"] == 16 else 2 for numd in range(numDummy):