dotnet · kunalspathak · Jun 24, 2025 · tannergooding · Jun 30, 2025 · tannergooding
diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -2010,10 +2010,19 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 break;
 
             case NI_Sve_ConvertVectorToMask:
+            {
                 // PMOV would be ideal here, but it is in SVE2.1.
-                // Instead, use a compare: CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0
-                GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
+                //
+                // Instead, to test if lowest bit is set, we LSL elementWidthInBits - 1
+                // and then compare: CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0
+
+                int       elementWidthInBits = 8 * genTypeSize(intrin.baseType);
+                regNumber ztemp              = internalRegisters.GetSingle(node);
+
+                GetEmitter()->emitIns_R_R_I(INS_sve_lsl, emitSize, ztemp, op2Reg, elementWidthInBits - 1, opt);
+                GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, ztemp, 0, opt);
                 break;
+            }
 
             case NI_Sve_Count16BitElements:
             case NI_Sve_Count32BitElements:

diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
@@ -1400,6 +1400,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
         delayFreeOp = getDelayFreeOperand(embeddedOp, /* embedded */ true);
     }
 
+    if (intrin.id == NI_Sve_ConvertVectorToMask)
+    {
+        // Need an extra temp to test LSB of source vector
+        buildInternalFloatRegisterDefForNode(intrinsicTree);
+    }
+
     // Build any immediates
     BuildHWIntrinsicImmediate(intrinsicTree, intrin);
 

diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp
@@ -20,8 +20,8 @@ struct MaskConversionsWeight
     static constexpr const weight_t costOfConvertMaskToVector = 1.0;
 
 #if defined(TARGET_ARM64)
-    // Conversion of vector to mask is two instructions.
-    static constexpr const weight_t costOfConvertVectorToMask = 2.0;
+    // Conversion of vector to mask is three instructions.
+    static constexpr const weight_t costOfConvertVectorToMask = 3.0;
 #else
     // Conversion of vector to mask is one instructions.
     static constexpr const weight_t costOfConvertVectorToMask = 1.0;