@@ -42,6 +42,7 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
4242STATISTIC (NumUnscaledPairCreated,
4343 " Number of load/store from unscaled generated" );
4444STATISTIC (NumNarrowLoadsPromoted, " Number of narrow loads promoted" );
45+ STATISTIC (NumZeroStoresPromoted, " Number of narrow zero stores promoted" );
4546
4647static cl::opt<unsigned > ScanLimit (" aarch64-load-store-scan-limit" ,
4748 cl::init (20 ), cl::Hidden);
@@ -152,6 +153,8 @@ static bool isUnscaledLdSt(unsigned Opc) {
152153 case AArch64::STURSi:
153154 case AArch64::STURDi:
154155 case AArch64::STURQi:
156+ case AArch64::STURBBi:
157+ case AArch64::STURHHi:
155158 case AArch64::STURWi:
156159 case AArch64::STURXi:
157160 case AArch64::LDURSi:
@@ -189,6 +192,22 @@ static unsigned getBitExtrOpcode(MachineInstr *MI) {
189192 }
190193}
191194
195+ static bool isNarrowStore (unsigned Opc) {
196+ switch (Opc) {
197+ default :
198+ return false ;
199+ case AArch64::STRBBui:
200+ case AArch64::STURBBi:
201+ case AArch64::STRHHui:
202+ case AArch64::STURHHi:
203+ return true ;
204+ }
205+ }
206+
207+ static bool isNarrowStore (MachineInstr *MI) {
208+ return isNarrowStore (MI->getOpcode ());
209+ }
210+
192211static bool isNarrowLoad (unsigned Opc) {
193212 switch (Opc) {
194213 default :
@@ -219,12 +238,14 @@ static int getMemScale(MachineInstr *MI) {
219238 case AArch64::LDRSBWui:
220239 case AArch64::LDURSBWi:
221240 case AArch64::STRBBui:
241+ case AArch64::STURBBi:
222242 return 1 ;
223243 case AArch64::LDRHHui:
224244 case AArch64::LDURHHi:
225245 case AArch64::LDRSHWui:
226246 case AArch64::LDURSHWi:
227247 case AArch64::STRHHui:
248+ case AArch64::STURHHi:
228249 return 2 ;
229250 case AArch64::LDRSui:
230251 case AArch64::LDURSi:
@@ -278,6 +299,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
278299 case AArch64::STURDi:
279300 case AArch64::STRQui:
280301 case AArch64::STURQi:
302+ case AArch64::STRBBui:
303+ case AArch64::STURBBi:
304+ case AArch64::STRHHui:
305+ case AArch64::STURHHi:
281306 case AArch64::STRWui:
282307 case AArch64::STURWi:
283308 case AArch64::STRXui:
@@ -327,6 +352,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
327352 case AArch64::STRQui:
328353 case AArch64::STURQi:
329354 return AArch64::STPQi;
355+ case AArch64::STRBBui:
356+ return AArch64::STRHHui;
357+ case AArch64::STRHHui:
358+ return AArch64::STRWui;
359+ case AArch64::STURBBi:
360+ return AArch64::STURHHi;
361+ case AArch64::STURHHi:
362+ return AArch64::STURWi;
330363 case AArch64::STRWui:
331364 case AArch64::STURWi:
332365 return AArch64::STPWi;
@@ -681,17 +714,33 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
681714 return NextI;
682715 }
683716
684- // Handle Unscaled
685- if (IsUnscaled)
686- OffsetImm /= OffsetStride;
687-
688717 // Construct the new instruction.
689- MachineInstrBuilder MIB = BuildMI (*I->getParent (), InsertionPoint,
690- I->getDebugLoc (), TII->get (NewOpc))
691- .addOperand (getLdStRegOp (RtMI))
692- .addOperand (getLdStRegOp (Rt2MI))
693- .addOperand (BaseRegOp)
694- .addImm (OffsetImm);
718+ MachineInstrBuilder MIB;
719+ if (isNarrowStore (Opc)) {
720+ // Change the scaled offset from small to large type.
721+ if (!IsUnscaled) {
722+ assert (((OffsetImm & 1 ) == 0 ) && " Unexpected offset to merge" );
723+ OffsetImm /= 2 ;
724+ }
725+ MIB = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
726+ TII->get (NewOpc))
727+ .addOperand (getLdStRegOp (I))
728+ .addOperand (BaseRegOp)
729+ .addImm (OffsetImm);
730+ // Copy MachineMemOperands from the original stores.
731+ concatenateMemOperands (MIB, I, Paired);
732+ } else {
733+ // Handle Unscaled
734+ if (IsUnscaled)
735+ OffsetImm /= OffsetStride;
736+ MIB = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
737+ TII->get (NewOpc))
738+ .addOperand (getLdStRegOp (RtMI))
739+ .addOperand (getLdStRegOp (Rt2MI))
740+ .addOperand (BaseRegOp)
741+ .addImm (OffsetImm);
742+ }
743+
695744 (void )MIB;
696745
697746 // FIXME: Do we need/want to copy the mem operands from the source
@@ -830,6 +879,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
830879 unsigned Reg = getLdStRegOp (FirstMI).getReg ();
831880 unsigned BaseReg = getLdStBaseOp (FirstMI).getReg ();
832881 int Offset = getLdStOffsetOp (FirstMI).getImm ();
882+ bool IsNarrowStore = isNarrowStore (Opc);
883+
884+ // For narrow stores, find only the case where the stored value is WZR.
885+ if (IsNarrowStore && Reg != AArch64::WZR)
886+ return E;
833887
834888 // Early exit if the first instruction modifies the base register.
835889 // e.g., ldr x0, [x0]
@@ -840,7 +894,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
840894 // range, plus allow an extra one in case we find a later insn that matches
841895 // with Offset-1)
842896 int OffsetStride = IsUnscaled ? getMemScale (FirstMI) : 1 ;
843- if (!isNarrowLoad (Opc) && !inBoundsForPair (IsUnscaled, Offset, OffsetStride))
897+ if (!(isNarrowLoad (Opc) || IsNarrowStore) &&
898+ !inBoundsForPair (IsUnscaled, Offset, OffsetStride))
844899 return E;
845900
846901 // Track which registers have been modified and used between the first insn
@@ -907,9 +962,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
907962 continue ;
908963 }
909964
910- if (IsNarrowLoad) {
911- // If the alignment requirements of the larger type scaled load
912- // instruction can't express the scaled offset of the smaller type
965+ if (IsNarrowLoad || IsNarrowStore ) {
966+ // If the alignment requirements of the scaled wide load/store
967+ // instruction can't express the offset of the scaled narrow
913968 // input, bail and keep looking.
914969 if (!IsUnscaled && alignTo (MinOffset, 2 ) != MinOffset) {
915970 trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
@@ -929,7 +984,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
929984 // If the destination register of the loads is the same register, bail
930985 // and keep looking. A load-pair instruction with both destination
931986 // registers the same is UNPREDICTABLE and will result in an exception.
932- if (MayLoad && Reg == getLdStRegOp (MI).getReg ()) {
987+ // For narrow stores, allow only when the stored value is the same
988+ // (i.e., WZR).
989+ if ((MayLoad && Reg == getLdStRegOp (MI).getReg ()) ||
990+ (IsNarrowStore && Reg != getLdStRegOp (MI).getReg ())) {
933991 trackRegDefsUses (MI, ModifiedRegs, UsedRegs, TRI);
934992 MemInsns.push_back (MI);
935993 continue ;
@@ -1228,6 +1286,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
12281286 if (Paired != E) {
12291287 if (isNarrowLoad (MI)) {
12301288 ++NumNarrowLoadsPromoted;
1289+ } else if (isNarrowStore (MI)) {
1290+ ++NumZeroStoresPromoted;
12311291 } else {
12321292 ++NumPairCreated;
12331293 if (isUnscaledLdSt (MI))
@@ -1284,11 +1344,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
12841344 case AArch64::LDRHHui:
12851345 case AArch64::LDRSBWui:
12861346 case AArch64::LDRSHWui:
1347+ case AArch64::STRBBui:
1348+ case AArch64::STRHHui:
12871349 // Unscaled instructions.
12881350 case AArch64::LDURBBi:
12891351 case AArch64::LDURHHi:
12901352 case AArch64::LDURSBWi:
1291- case AArch64::LDURSHWi: {
1353+ case AArch64::LDURSHWi:
1354+ case AArch64::STURBBi:
1355+ case AArch64::STURHHi: {
12921356 if (tryToMergeLdStInst (MBBI)) {
12931357 Modified = true ;
12941358 break ;
0 commit comments