diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 80efd01391556..da344305f39d9 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1829,8 +1829,8 @@ void AArch64AsmPrinter::emitMOVK(Register Dest, uint64_t Imm, unsigned Shift) {
 
 void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
   Register DestReg = MI.getOperand(0).getReg();
-  if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround() &&
-      STI->isNeonAvailable()) {
+  if (STI->hasZeroCycleZeroingFPR64() &&
+      !STI->hasZeroCycleZeroingFPWorkaround() && STI->isNeonAvailable()) {
     // Convert H/S register to corresponding D register
     if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
       DestReg = AArch64::D0 + (DestReg - AArch64::H0);
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 55aea17d29f55..00cf039096d32 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -630,19 +630,18 @@ def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFP
 def FeatureZCRegMoveFPR32 : SubtargetFeature<"zcm-fpr32", "HasZeroCycleRegMoveFPR32", "true",
                                         "Has zero-cycle register moves for FPR32 registers">;
 
-def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
-                                        "Has zero-cycle zeroing instructions for generic registers">;
+def FeatureZCZeroingGPR64 : SubtargetFeature<"zcz-gpr64", "HasZeroCycleZeroingGPR64", "true",
+                                        "Has zero-cycle zeroing instructions for GPR64 registers">;
+
+def FeatureZCZeroingGPR32 : SubtargetFeature<"zcz-gpr32", "HasZeroCycleZeroingGPR32", "true",
+                                        "Has zero-cycle zeroing instructions for GPR32 registers">;
 
 // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
 // as movi is more efficient across all cores. Newer cores can eliminate
 // fmovs early and there is no difference with movi, but this not true for
 // all implementations.
-def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
-                                        "Has no zero-cycle zeroing instructions for FP registers">;
-
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
-                                        "Has zero-cycle zeroing instructions",
-                                        [FeatureZCZeroingGP]>;
+def FeatureNoZCZeroingFPR64 : SubtargetFeature<"no-zcz-fpr64", "HasZeroCycleZeroingFPR64", "false",
+                                        "Has no zero-cycle zeroing instructions for FPR64 registers">;
 
 /// ... but the floating-point version doesn't quite work in rare cases on older
 /// CPUs.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1aa180688dcdd..68f708c25a241 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5075,7 +5075,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
             .addImm(0)
             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
       }
-    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
+    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -5202,7 +5202,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
           .addReg(SrcReg, getKillRegState(KillSrc))
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
-    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
+    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index b7e08dbe7c792..e4a7d83e7f807 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -322,7 +322,8 @@ def TuneAppleA7  : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing,
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64,
                                     FeatureZCZeroingFPWorkaround]>;
 
 def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
@@ -336,7 +337,8 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
                                     "Apple A11", [
@@ -349,7 +351,8 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
                                     "Apple A12", [
@@ -362,7 +365,8 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
                                     "Apple A13", [
@@ -375,7 +379,8 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
                                     "Apple A14", [
@@ -393,7 +398,8 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
                                     "Apple A15", [
@@ -411,7 +417,8 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
                                     "Apple A16", [
@@ -429,7 +436,8 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
                                     "Apple A17", [
@@ -447,7 +455,8 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
                                     FeatureStorePairSuppress,
                                     FeatureZCRegMoveGPR64,
                                     FeatureZCRegMoveFPR128,
-                                    FeatureZCZeroing]>;
+                                    FeatureZCZeroingGPR32,
+                                    FeatureZCZeroingGPR64]>;
 
 def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
                                      "Apple M4", [
@@ -464,8 +473,8 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
                                      FeatureFuseLiterals,
                                      FeatureZCRegMoveGPR64,
                                      FeatureZCRegMoveFPR128,
-                                     FeatureZCZeroing
-                                     ]>;
+                                     FeatureZCZeroingGPR32,
+                                     FeatureZCZeroingGPR64]>;
 
 def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                     "Samsung Exynos-M3 processors",
@@ -497,13 +506,15 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
                                      FeatureStorePairSuppress,
                                      FeatureALULSLFast,
                                      FeaturePostRAScheduler,
-                                     FeatureZCZeroing]>;
+                                     FeatureZCZeroingGPR32,
+                                     FeatureZCZeroingGPR64]>;
 
 def TuneKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
                                    "Qualcomm Kryo processors", [
                                    FeaturePostRAScheduler,
                                    FeaturePredictableSelectIsExpensive,
-                                   FeatureZCZeroing,
+                                   FeatureZCZeroingGPR32,
+                                   FeatureZCZeroingGPR64,
                                    FeatureALULSLFast,
                                    FeatureStorePairSuppress]>;
 
@@ -511,7 +522,8 @@ def TuneFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
                                    "Qualcomm Falkor processors", [
                                    FeaturePostRAScheduler,
                                    FeaturePredictableSelectIsExpensive,
-                                   FeatureZCZeroing,
+                                   FeatureZCZeroingGPR32,
+                                   FeatureZCZeroingGPR64,
                                    FeatureStorePairSuppress,
                                    FeatureALULSLFast,
                                    FeatureSlowSTRQro]>;
@@ -607,7 +619,8 @@ def TuneSaphira  : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
                                    "Qualcomm Saphira processors", [
                                    FeaturePostRAScheduler,
                                    FeaturePredictableSelectIsExpensive,
-                                   FeatureZCZeroing,
+                                   FeatureZCZeroingGPR32,
+                                   FeatureZCZeroingGPR64,
                                    FeatureStorePairSuppress,
                                    FeatureALULSLFast]>;
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir
index 76b5b76130657..284d624a4e68f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir
+++ b/llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir
@@ -1,15 +1,15 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,-zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-NO-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ %s
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,-zcm-gpr64,-zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,-zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-ZCM-GPR32-NO-ZCM-GPR64-NO-ZCZ %s
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,+zcm-gpr64,-zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,+zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ %s
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,-zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,-zcz-gpr32,-zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ %s
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,+zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="-zcm-gpr32,-zcm-gpr64,+zcz-gpr32,+zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-NO-ZCM-ZCZ %s
-# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,+zcz" %s \
+# RUN: llc -o - -mtriple=arm64-apple-ios -run-pass=postrapseudos -simplify-mir -verify-machineinstrs -mattr="+zcm-gpr32,+zcm-gpr64,+zcz-gpr32,+zcz-gpr64" %s \
 # RUN:    | FileCheck --check-prefix=CHECK-ZCM-ZCZ %s
 
 --- |
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-fpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-fpr.ll
new file mode 100644
index 0000000000000..2a75976d58549
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-fpr.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+no-zcz-fpr64 | FileCheck %s -check-prefixes=ALL,NOZCZ-FPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+no-zcz-fpr64,+fullfp16 | FileCheck %s -check-prefixes=ALL,NOZCZ-FPR64-FULLFP16
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=ALL,FP-WORKAROUND
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZCZ-FPR64
+
+define half @tf16() {
+entry:
+; ALL-LABEL: tf16:
+; FP-WORKAROUND: mov s0, wzr
+; NOZCZ-FPR64: mov s0, wzr
+; NOZCZ-FPR64-FULLFP16: mov h0, wzr
+; ZCZ-FPR64: movi d0, #0
+  ret half 0.0
+}
+
+define float @tf32() {
+entry:
+; ALL-LABEL: tf32:
+; FP-WORKAROUND: mov s0, wzr
+; NOZCZ-FPR64: mov s0, wzr
+; ZCZ-FPR64: movi d0, #0
+  ret float 0.0
+}
+
+define double @td64() {
+entry:
+; ALL-LABEL: td64:
+; FP-WORKAROUND: mov d0, xzr
+; NOZCZ-FPR64: mov d0, xzr
+; ZCZ-FPR64: movi d0, #0
+  ret double 0.0
+}
+
+define <8 x i8> @tv8i8() {
+entry:
+; ALL-LABEL: tv8i8:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+}
+
+define <4 x i16> @tv4i16() {
+entry:
+; ALL-LABEL: tv4i16:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <4 x i16> <i16 0, i16 0, i16 0, i16 0>
+}
+
+define <2 x i32> @tv2i32() {
+entry:
+; ALL-LABEL: tv2i32:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <2 x i32> <i32 0, i32 0>
+}
+
+define <2 x float> @tv2f32() {
+entry:
+; ALL-LABEL: tv2f32:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <2 x float> <float 0.0, float 0.0>
+}
+
+define <16 x i8> @tv16i8() {
+entry:
+; ALL-LABEL: tv16i8:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+}
+
+define <8 x i16> @tv8i16() {
+entry:
+; ALL-LABEL: tv8i16:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+define <4 x i32> @tv4i32() {
+entry:
+; ALL-LABEL: tv4i32:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+}
+
+define <2 x i64> @tv2i64() {
+entry:
+; ALL-LABEL: tv2i64:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <2 x i64> <i64 0, i64 0>
+}
+
+define <4 x float> @tv4f32() {
+entry:
+; ALL-LABEL: tv4f32:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+}
+
+define <2 x double> @tv2d64() {
+entry:
+; ALL-LABEL: tv2d64:
+; FP-WORKAROUND: movi{{(.16b)?}} v0{{(.16b)?}}, #0
+; NOZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+; ZCZ-FPR64: movi{{(.2d)?}} v0{{(.2d)?}}, #0
+  ret <2 x double> <double 0.0, double 0.0>
+}
+
+; We used to produce spills+reloads for a Q register with zero cycle zeroing
+; enabled.
+; ALL-LABEL: foo:
+; ALL-NOT: str q{{[0-9]+}}
+; ALL-NOT: ldr q{{[0-9]+}}
+define double @foo(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %phi0 = phi double [ 1.0, %entry ], [ %v0, %for.body ]
+  %i.076 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %conv21 = sitofp i32 %i.076 to double
+  %call = tail call fast double @sin(double %conv21)
+  %cmp.i = fcmp fast olt double %phi0, %call
+  %v0 = select i1 %cmp.i, double %call, double %phi0
+  %inc = add nuw nsw i32 %i.076, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret double %v0
+}
+
+declare double @sin(double)
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll
new file mode 100644
index 0000000000000..dc643062d8697
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing-gpr.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gpr32 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gpr64 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR64
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR
+; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64
+; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,NOZCZ-GPR
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZCZ-GPR32,ZCZ-GPR64
+
+define i8 @ti8() {
+entry:
+; ALL-LABEL: ti8:
+; NOZCZ-GPR: mov w0, wzr
+; ZCZ-GPR32: mov w0, #0
+  ret i8 0
+}
+
+define i16 @ti16() {
+entry:
+; ALL-LABEL: ti16:
+; NOZCZ-GPR: mov w0, wzr
+; ZCZ-GPR32: mov w0, #0
+  ret i16 0
+}
+
+define i32 @ti32() {
+entry:
+; ALL-LABEL: ti32:
+; NOZCZ-GPR: mov w0, wzr
+; ZCZ-GPR32: mov w0, #0
+  ret i32 0
+}
+
+define i64 @ti64() {
+entry:
+; ALL-LABEL: ti64:
+; NOZCZ-GPR: mov x0, xzr
+; ZCZ-GPR64: mov x0, #0
+  ret i64 0
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
deleted file mode 100644
index 6c3cd4766d799..0000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ /dev/null
@@ -1,231 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=-zcz-gp,+no-zcz-fp      | FileCheck %s -check-prefixes=ALL,NONEGP,NONEFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz                    | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz -mattr=+fullfp16   | FileCheck %s -check-prefixes=ALL,ZEROGP,ZERO16
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gp,+no-zcz-fp      | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu                                | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP
-; RUN: llc < %s -mtriple=arm64-apple-ios   -mcpu=cyclone                  | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP
-; RUN: llc < %s -mtriple=arm64-linux-gnu   -mcpu=apple-a10                | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
-; RUN: llc < %s -mtriple=arm64-apple-ios   -mcpu=cyclone -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZEROGP,NONE16
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3                | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo                     | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor                   | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
-
-declare void @bar(half, float, double, <2 x double>)
-declare void @bari(i32, i32)
-declare void @barl(i64, i64)
-declare void @barf(float, float)
-
-define void @t1() nounwind ssp {
-entry:
-; ALL-LABEL: t1:
-; ALL-NOT: fmov
-; NONEFP-DAG: fmov s0, wzr
-; NONEFP-DAG: fmov s1, wzr
-; NONEFP-DAG: fmov d2, xzr
-; NONEFP-DAG: movi{{(.16b)?}} v3{{(.2d)?}}, #0
-; NONE16: fmov h0, wzr
-; NONE16: fmov s1, wzr
-; NONE16: fmov d2, xzr
-; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
-; ZEROFP-DAG: movi d0, #0
-; ZEROFP-DAG: movi d1, #0
-; ZEROFP-DAG: movi d2, #0
-; ZEROFP-DAG: movi v3.2d, #0
-; ZERO16: movi d0, #0
-; ZERO16: movi d1, #0
-; ZERO16: movi d2, #0
-; ZERO16: movi v3.2d, #0
-  tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
-  ret void
-}
-
-define void @t2() nounwind ssp {
-entry:
-; ALL-LABEL: t2:
-; NONEGP: mov w0, wzr
-; NONEGP: mov w1, wzr
-; ZEROGP: mov w0, #0
-; ZEROGP: mov w1, #0
-  tail call void @bari(i32 0, i32 0) nounwind
-  ret void
-}
-
-define void @t3() nounwind ssp {
-entry:
-; ALL-LABEL: t3:
-; NONEGP: mov x0, xzr
-; NONEGP: mov x1, xzr
-; ZEROGP: mov x0, #0
-; ZEROGP: mov x1, #0
-  tail call void @barl(i64 0, i64 0) nounwind
-  ret void
-}
-
-define void @t4() nounwind ssp {
-; ALL-LABEL: t4:
-; NONEFP: fmov s{{[0-3]+}}, wzr
-; NONEFP: fmov s{{[0-3]+}}, wzr
-; ZEROFP: movi d0, #0
-; ZEROFP: movi d1, #0
-  tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
-  ret void
-}
-
-declare double @sin(double)
-
-; We used to produce spills+reloads for a Q register with zero cycle zeroing
-; enabled.
-; ALL-LABEL: foo:
-; ALL-NOT: str q{{[0-9]+}}
-; ALL-NOT: ldr q{{[0-9]+}}
-define double @foo(i32 %n) {
-entry:
-  br label %for.body
-
-for.body:
-  %phi0 = phi double [ 1.0, %entry ], [ %v0, %for.body ]
-  %i.076 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %conv21 = sitofp i32 %i.076 to double
-  %call = tail call fast double @sin(double %conv21)
-  %cmp.i = fcmp fast olt double %phi0, %call
-  %v0 = select i1 %cmp.i, double %call, double %phi0
-  %inc = add nuw nsw i32 %i.076, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
-
-for.end:
-  ret double %v0
-}
-
-define <2 x i64> @t6() {
-; ALL-LABEL: t6:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <2 x i64> zeroinitializer
-}
-
-define i1 @ti1() {
-entry:
-; ALL-LABEL: ti1:
-; NONEGP: mov w0, wzr
-; ZEROGP: mov w0, #0
-  ret i1 false
-}
-
-define i8 @ti8() {
-entry:
-; ALL-LABEL: ti8:
-; NONEGP: mov w0, wzr
-; ZEROGP: mov w0, #0
-  ret i8 0
-}
-
-define i16 @ti16() {
-entry:
-; ALL-LABEL: ti16:
-; NONEGP: mov w0, wzr
- ; ZEROGP: mov w0, #0
-  ret i16 0
-}
-
-define i32 @ti32() {
-entry:
-; ALL-LABEL: ti32:
-; NONEGP: mov w0, wzr
-; ZEROGP: mov w0, #0
-  ret i32 0
-}
-
-define i64 @ti64() {
-entry:
-; ALL-LABEL: ti64:
-; NONEGP: mov x0, xzr
-; ZEROGP: mov x0, #0
-  ret i64 0
-}
-
-define float @tf32() {
-entry:
-; ALL-LABEL: tf32:
-; NONEFP: mov s0, wzr
-; ZEROFP: movi d0, #0
-  ret float 0.0
-}
-
-define double @td64() {
-entry:
-; ALL-LABEL: td64:
-; NONEFP: mov d0, xzr
-; ZEROFP: movi d0, #0
-  ret double 0.0
-}
-
-define <8 x i8> @tv8i8() {
-entry:
-; ALL-LABEL: tv8i8:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-}
-
-define <4 x i16> @tv4i16() {
-entry:
-; ALL-LABEL: tv4i16:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <4 x i16> <i16 0, i16 0, i16 0, i16 0>
-}
-
-define <2 x i32> @tv2i32() {
-entry:
-; ALL-LABEL: tv2i32:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <2 x i32> <i32 0, i32 0>
-}
-
-define <2 x float> @tv2f32() {
-entry:
-; ALL-LABEL: tv2f32:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <2 x float> <float 0.0, float 0.0>
-}
-
-define <16 x i8> @tv16i8() {
-entry:
-; ALL-LABEL: tv16i8:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-}
-
-define <8 x i16> @tv8i16() {
-entry:
-; ALL-LABEL: tv8i16:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-}
-
-define <4 x i32> @tv4i32() {
-entry:
-; ALL-LABEL: tv4i32:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <4 x i32> <i32 0, i32 0, i32 0, i32 0>
-}
-
-define <2 x i64> @tv2i64() {
-entry:
-; ALL-LABEL: tv2i64:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <2 x i64> <i64 0, i64 0>
-}
-
-define <4 x float> @tv4f32() {
-entry:
-; ALL-LABEL: tv4f32:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
-}
-
-define <2 x double> @tv2d64() {
-entry:
-; ALL-LABEL: tv2d64:
-; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
-  ret <2 x double> <double 0.0, double 0.0>
-}
-
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index 58793bf19f3a6..68873f9b7c3de 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-NOZCZ
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-ZCZ
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+no-zcz-fpr64 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-NOZCZ
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+zcz-gpr32,+zcz-gpr64 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-ZCZ
 ; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFP16
 
 define half @Const0() {