@@ -7373,9 +7373,8 @@ static bool getMiscPatterns(MachineInstr &Root,
73737373}
73747374
73757375static bool getGatherPattern (MachineInstr &Root,
7376- SmallVectorImpl<unsigned > &Patterns,
7377- unsigned LoadLaneOpCode,
7378- unsigned NumLanes) {
7376+ SmallVectorImpl<unsigned > &Patterns,
7377+ unsigned LoadLaneOpCode, unsigned NumLanes) {
73797378 const MachineRegisterInfo &MRI = Root.getMF ()->getRegInfo ();
73807379 const TargetRegisterInfo *TRI =
73817380 Root.getMF ()->getSubtarget ().getRegisterInfo ();
@@ -7417,17 +7416,17 @@ static bool getGatherPattern(MachineInstr &Root,
74177416 return false ;
74187417
74197418 switch (NumLanes) {
7420- case 4 :
7421- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7422- break ;
7423- case 8 :
7424- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7425- break ;
7426- case 16 :
7427- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7428- break ;
7429- default :
7430- llvm_unreachable (" Got bad number of lanes for gather pattern." );
7419+ case 4 :
7420+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7421+ break ;
7422+ case 8 :
7423+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7424+ break ;
7425+ case 16 :
7426+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7427+ break ;
7428+ default :
7429+ llvm_unreachable (" Got bad number of lanes for gather pattern." );
74317430 }
74327431
74337432 return true ;
@@ -7441,23 +7440,24 @@ static bool getLoadPatterns(MachineInstr &Root,
74417440
74427441 // The pattern searches for loads into single lanes.
74437442 switch (Root.getOpcode ()) {
7444- case AArch64::LD1i32:
7445- return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7446- case AArch64::LD1i16:
7447- return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7448- case AArch64::LD1i8:
7449- return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7450- default :
7451- return false ;
7443+ case AArch64::LD1i32:
7444+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7445+ case AArch64::LD1i16:
7446+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7447+ case AArch64::LD1i8:
7448+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7449+ default :
7450+ return false ;
74527451 }
74537452}
74547453
7455- static void generateGatherPattern (
7456- MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7457- SmallVectorImpl<MachineInstr *> &DelInstrs,
7458- DenseMap<Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern,
7459- unsigned NumLanes) {
7460-
7454+ static void
7455+ generateGatherPattern (MachineInstr &Root,
7456+ SmallVectorImpl<MachineInstr *> &InsInstrs,
7457+ SmallVectorImpl<MachineInstr *> &DelInstrs,
7458+ DenseMap<Register, unsigned > &InstrIdxForVirtReg,
7459+ unsigned Pattern, unsigned NumLanes) {
7460+
74617461 MachineFunction &MF = *Root.getParent ()->getParent ();
74627462 MachineRegisterInfo &MRI = MF.getRegInfo ();
74637463 const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
@@ -7469,7 +7469,7 @@ static void generateGatherPattern(
74697469 LoadToLaneInstrs.push_back (CurrInstr);
74707470 CurrInstr = MRI.getUniqueVRegDef (CurrInstr->getOperand (1 ).getReg ());
74717471 }
7472-
7472+
74737473 MachineInstr *SubregToReg = CurrInstr;
74747474 LoadToLaneInstrs.push_back (
74757475 MRI.getUniqueVRegDef (SubregToReg->getOperand (2 ).getReg ()));
@@ -7494,26 +7494,27 @@ static void generateGatherPattern(
74947494 };
74957495
74967496 // Helper to create load instruction based on opcode
7497- auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7498- Register OffsetReg) -> MachineInstrBuilder {
7499- unsigned Opcode;
7500- switch (NumLanes) {
7501- case 4 :
7502- Opcode = AArch64::LDRSui;
7503- break ;
7504- case 8 :
7505- Opcode = AArch64::LDRHui;
7506- break ;
7507- case 16 :
7508- Opcode = AArch64::LDRBui;
7509- break ;
7510- default :
7511- llvm_unreachable (" Got unsupported number of lanes in machine-combiner gather pattern" );
7512- }
7513- // Immediate offset load
7514- return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7515- .addReg (OffsetReg)
7516- .addImm (0 ); // immediate offset
7497+ auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7498+ Register OffsetReg) -> MachineInstrBuilder {
7499+ unsigned Opcode;
7500+ switch (NumLanes) {
7501+ case 4 :
7502+ Opcode = AArch64::LDRSui;
7503+ break ;
7504+ case 8 :
7505+ Opcode = AArch64::LDRHui;
7506+ break ;
7507+ case 16 :
7508+ Opcode = AArch64::LDRBui;
7509+ break ;
7510+ default :
7511+ llvm_unreachable (
7512+ " Got unsupported number of lanes in machine-combiner gather pattern" );
7513+ }
7514+ // Immediate offset load
7515+ return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7516+ .addReg (OffsetReg)
7517+ .addImm (0 ); // immediate offset
75177518 };
75187519
75197520 // Load the remaining lanes into register 0.
@@ -7522,7 +7523,8 @@ static void generateGatherPattern(
75227523 LoadToLaneInstrsAscending.begin () + NumLanes / 2 );
75237524 auto PrevReg = SubregToReg->getOperand (0 ).getReg ();
75247525 for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg0)) {
7525- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7526+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7527+ LoadInstr->getOperand (3 ).getReg ());
75267528 DelInstrs.push_back (LoadInstr);
75277529 }
75287530 auto LastLoadReg0 = PrevReg;
@@ -7532,30 +7534,32 @@ static void generateGatherPattern(
75327534 auto OriginalSplitLoad = *std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
75337535 auto DestRegForMiddleIndex = MRI.createVirtualRegister (
75347536 MRI.getRegClass (Lane0Load->getOperand (0 ).getReg ()));
7535-
7536- MachineInstrBuilder MiddleIndexLoadInstr = CreateLoadInstruction (
7537- NumLanes, DestRegForMiddleIndex,
7538- OriginalSplitLoad->getOperand (3 ).getReg ());
7539-
7540- InstrIdxForVirtReg.insert (std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
7537+
7538+ MachineInstrBuilder MiddleIndexLoadInstr =
7539+ CreateLoadInstruction (NumLanes, DestRegForMiddleIndex,
7540+ OriginalSplitLoad->getOperand (3 ).getReg ());
7541+
7542+ InstrIdxForVirtReg.insert (
7543+ std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
75417544 InsInstrs.push_back (MiddleIndexLoadInstr);
75427545 DelInstrs.push_back (OriginalSplitLoad);
75437546
75447547 // Subreg To Reg instruction for register 1.
75457548 auto DestRegForSubregToReg = MRI.createVirtualRegister (FPR128RegClass);
75467549 unsigned SubregType;
75477550 switch (NumLanes) {
7548- case 4 :
7549- SubregType = AArch64::ssub;
7550- break ;
7551- case 8 :
7552- SubregType = AArch64::hsub;
7553- break ;
7554- case 16 :
7555- SubregType = AArch64::bsub;
7556- break ;
7557- default :
7558- llvm_unreachable (" Got invalid NumLanes for machine-combiner gather pattern" );
7551+ case 4 :
7552+ SubregType = AArch64::ssub;
7553+ break ;
7554+ case 8 :
7555+ SubregType = AArch64::hsub;
7556+ break ;
7557+ case 16 :
7558+ SubregType = AArch64::bsub;
7559+ break ;
7560+ default :
7561+ llvm_unreachable (
7562+ " Got invalid NumLanes for machine-combiner gather pattern" );
75597563 }
75607564
75617565 auto SubRegToRegInstr =
@@ -7573,7 +7577,8 @@ static void generateGatherPattern(
75737577 LoadToLaneInstrsAscending.begin () + NumLanes / 2 + 1 , LoadToLaneInstrsAscending.end ());
75747578 PrevReg = SubRegToRegInstr->getOperand (0 ).getReg ();
75757579 for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg1)) {
7576- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7580+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7581+ LoadInstr->getOperand (3 ).getReg ());
75777582 if (Index == NumLanes / 2 - 2 ) {
75787583 break ;
75797584 }
@@ -8967,11 +8972,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
89678972 break ;
89688973 }
89698974 case AArch64MachineCombinerPattern::GATHER_i16: {
8970- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 8 );
8975+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8976+ Pattern, 8 );
89718977 break ;
89728978 }
89738979 case AArch64MachineCombinerPattern::GATHER_i8: {
8974- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 16 );
8980+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8981+ Pattern, 16 );
89758982 break ;
89768983 }
89778984
0 commit comments