@@ -161,6 +161,9 @@ static bool isUnscaledLdSt(unsigned Opc) {
161161 case AArch64::LDURXi:
162162 case AArch64::LDURSWi:
163163 case AArch64::LDURHHi:
164+ case AArch64::LDURBBi:
165+ case AArch64::LDURSBWi:
166+ case AArch64::LDURSHWi:
164167 return true ;
165168 }
166169}
@@ -169,16 +172,39 @@ static bool isUnscaledLdSt(MachineInstr *MI) {
169172 return isUnscaledLdSt (MI->getOpcode ());
170173}
171174
175+ static unsigned getBitExtrOpcode (MachineInstr *MI) {
176+ switch (MI->getOpcode ()) {
177+ default :
178+ llvm_unreachable (" Unexpected opcode." );
179+ case AArch64::LDRBBui:
180+ case AArch64::LDURBBi:
181+ case AArch64::LDRHHui:
182+ case AArch64::LDURHHi:
183+ return AArch64::UBFMWri;
184+ case AArch64::LDRSBWui:
185+ case AArch64::LDURSBWi:
186+ case AArch64::LDRSHWui:
187+ case AArch64::LDURSHWi:
188+ return AArch64::SBFMWri;
189+ }
190+ }
191+
172192static bool isSmallTypeLdMerge (unsigned Opc) {
173193 switch (Opc) {
174194 default :
175195 return false ;
176196 case AArch64::LDRHHui:
177197 case AArch64::LDURHHi:
198+ case AArch64::LDRBBui:
199+ case AArch64::LDURBBi:
200+ case AArch64::LDRSHWui:
201+ case AArch64::LDURSHWi:
202+ case AArch64::LDRSBWui:
203+ case AArch64::LDURSBWi:
178204 return true ;
179- // FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
180205 }
181206}
207+
182208static bool isSmallTypeLdMerge (MachineInstr *MI) {
183209 return isSmallTypeLdMerge (MI->getOpcode ());
184210}
@@ -189,10 +215,15 @@ static int getMemScale(MachineInstr *MI) {
189215 default :
190216 llvm_unreachable (" Opcode has unknown scale!" );
191217 case AArch64::LDRBBui:
218+ case AArch64::LDURBBi:
219+ case AArch64::LDRSBWui:
220+ case AArch64::LDURSBWi:
192221 case AArch64::STRBBui:
193222 return 1 ;
194223 case AArch64::LDRHHui:
195224 case AArch64::LDURHHi:
225+ case AArch64::LDRSHWui:
226+ case AArch64::LDURSHWi:
196227 case AArch64::STRHHui:
197228 return 2 ;
198229 case AArch64::LDRSui:
@@ -265,11 +296,21 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
265296 case AArch64::LDURSi:
266297 case AArch64::LDRHHui:
267298 case AArch64::LDURHHi:
299+ case AArch64::LDRBBui:
300+ case AArch64::LDURBBi:
268301 return Opc;
269302 case AArch64::LDRSWui:
270303 return AArch64::LDRWui;
271304 case AArch64::LDURSWi:
272305 return AArch64::LDURWi;
306+ case AArch64::LDRSBWui:
307+ return AArch64::LDRBBui;
308+ case AArch64::LDRSHWui:
309+ return AArch64::LDRHHui;
310+ case AArch64::LDURSBWi:
311+ return AArch64::LDURBBi;
312+ case AArch64::LDURSHWi:
313+ return AArch64::LDURHHi;
273314 }
274315}
275316
@@ -311,9 +352,17 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
311352 case AArch64::LDURSWi:
312353 return AArch64::LDPSWi;
313354 case AArch64::LDRHHui:
355+ case AArch64::LDRSHWui:
314356 return AArch64::LDRWui;
315357 case AArch64::LDURHHi:
358+ case AArch64::LDURSHWi:
316359 return AArch64::LDURWi;
360+ case AArch64::LDRBBui:
361+ case AArch64::LDRSBWui:
362+ return AArch64::LDRHHui;
363+ case AArch64::LDURBBi:
364+ case AArch64::LDURSBWi:
365+ return AArch64::LDURHHi;
317366 }
318367}
319368
@@ -535,16 +584,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
535584
536585 if (isSmallTypeLdMerge (Opc)) {
537586 // Change the scaled offset from small to large type.
538- if (!IsUnscaled)
587+ if (!IsUnscaled) {
588+ assert (((OffsetImm & 1 ) == 0 ) && " Unexpected offset to merge" );
539589 OffsetImm /= 2 ;
590+ }
540591 MachineInstr *RtNewDest = MergeForward ? I : Paired;
541592 // When merging small (< 32 bit) loads for big-endian targets, the order of
542593 // the component parts gets swapped.
543594 if (!Subtarget->isLittleEndian ())
544595 std::swap (RtMI, Rt2MI);
545596 // Construct the new load instruction.
546- // FIXME: currently we support only halfword unsigned load. We need to
547- // handle byte type, signed, and store instructions as well.
548597 MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
549598 NewMemMI = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
550599 TII->get (NewOpc))
@@ -564,35 +613,61 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
564613 DEBUG (dbgs () << " with instructions:\n " );
565614 DEBUG ((NewMemMI)->print (dbgs ()));
566615
616+ int Width = getMemScale (I) == 1 ? 8 : 16 ;
617+ int LSBLow = 0 ;
618+ int LSBHigh = Width;
619+ int ImmsLow = LSBLow + Width - 1 ;
620+ int ImmsHigh = LSBHigh + Width - 1 ;
567621 MachineInstr *ExtDestMI = MergeForward ? Paired : I;
568622 if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian ()) {
569- // Create the bitfield extract for high half .
623+ // Create the bitfield extract for high bits .
570624 BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
571- TII->get (AArch64::UBFMWri ))
625+ TII->get (getBitExtrOpcode (Rt2MI) ))
572626 .addOperand (getLdStRegOp (Rt2MI))
573627 .addReg (getLdStRegOp (RtNewDest).getReg ())
574- .addImm (16 )
575- .addImm (31 );
576- // Create the bitfield extract for low half.
577- BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
578- TII->get (AArch64::ANDWri))
579- .addOperand (getLdStRegOp (RtMI))
580- .addReg (getLdStRegOp (RtNewDest).getReg ())
581- .addImm (15 );
628+ .addImm (LSBHigh)
629+ .addImm (ImmsHigh);
630+ // Create the bitfield extract for low bits.
631+ if (RtMI->getOpcode () == getMatchingNonSExtOpcode (RtMI->getOpcode ())) {
632+ // For unsigned, prefer to use AND for low bits.
633+ BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
634+ TII->get (AArch64::ANDWri))
635+ .addOperand (getLdStRegOp (RtMI))
636+ .addReg (getLdStRegOp (RtNewDest).getReg ())
637+ .addImm (ImmsLow);
638+ } else {
639+ BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
640+ TII->get (getBitExtrOpcode (RtMI)))
641+ .addOperand (getLdStRegOp (RtMI))
642+ .addReg (getLdStRegOp (RtNewDest).getReg ())
643+ .addImm (LSBLow)
644+ .addImm (ImmsLow);
645+ }
582646 } else {
583- // Create the bitfield extract for low half.
584- BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
585- TII->get (AArch64::ANDWri))
586- .addOperand (getLdStRegOp (RtMI))
587- .addReg (getLdStRegOp (RtNewDest).getReg ())
588- .addImm (15 );
589- // Create the bitfield extract for high half.
647+ // Create the bitfield extract for low bits.
648+ if (RtMI->getOpcode () == getMatchingNonSExtOpcode (RtMI->getOpcode ())) {
649+ // For unsigned, prefer to use AND for low bits.
650+ BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
651+ TII->get (AArch64::ANDWri))
652+ .addOperand (getLdStRegOp (RtMI))
653+ .addReg (getLdStRegOp (RtNewDest).getReg ())
654+ .addImm (ImmsLow);
655+ } else {
656+ BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
657+ TII->get (getBitExtrOpcode (RtMI)))
658+ .addOperand (getLdStRegOp (RtMI))
659+ .addReg (getLdStRegOp (RtNewDest).getReg ())
660+ .addImm (LSBLow)
661+ .addImm (ImmsLow);
662+ }
663+
664+ // Create the bitfield extract for high bits.
590665 BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
591- TII->get (AArch64::UBFMWri ))
666+ TII->get (getBitExtrOpcode (Rt2MI) ))
592667 .addOperand (getLdStRegOp (Rt2MI))
593668 .addReg (getLdStRegOp (RtNewDest).getReg ())
594- .addImm (16 )
595- .addImm (31 );
669+ .addImm (LSBHigh )
670+ .addImm (ImmsHigh );
596671 }
597672 DEBUG (dbgs () << " " );
598673 DEBUG ((BitExtMI1)->print (dbgs ()));
@@ -1173,7 +1248,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
11731248 bool enableNarrowLdOpt) {
11741249 bool Modified = false ;
11751250 // Three tranformations to do here:
1176- // 1) Find halfword loads that can be merged into a single 32-bit word load
1251+ // 1) Find narrow loads that can be converted into a single wider load
11771252 // with bitfield extract instructions.
11781253 // e.g.,
11791254 // ldrh w0, [x2]
@@ -1206,9 +1281,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
12061281 ++MBBI;
12071282 break ;
12081283 // Scaled instructions.
1284+ case AArch64::LDRBBui:
12091285 case AArch64::LDRHHui:
1286+ case AArch64::LDRSBWui:
1287+ case AArch64::LDRSHWui:
12101288 // Unscaled instructions.
1211- case AArch64::LDURHHi: {
1289+ case AArch64::LDURBBi:
1290+ case AArch64::LDURHHi:
1291+ case AArch64::LDURSBWi:
1292+ case AArch64::LDURSHWi: {
12121293 if (tryToMergeLdStInst (MBBI)) {
12131294 Modified = true ;
12141295 break ;
0 commit comments