@@ -7373,9 +7373,8 @@ static bool getMiscPatterns(MachineInstr &Root,
73737373}
73747374
73757375static bool getGatherPattern (MachineInstr &Root,
7376- SmallVectorImpl<unsigned > &Patterns,
7377- unsigned LoadLaneOpCode,
7378- unsigned NumLanes) {
7376+ SmallVectorImpl<unsigned > &Patterns,
7377+ unsigned LoadLaneOpCode, unsigned NumLanes) {
73797378 const MachineRegisterInfo &MRI = Root.getMF ()->getRegInfo ();
73807379 const TargetRegisterInfo *TRI =
73817380 Root.getMF ()->getSubtarget ().getRegisterInfo ();
@@ -7386,7 +7385,8 @@ static bool getGatherPattern(MachineInstr &Root,
73867385
73877386 // Check that we have load into all lanes except lane 0.
73887387 // For each load we also want to check that:
7389- // 1. It has a single debug use (since we will be replacing the virtual register)
7388+ // 1. It has a single debug use (since we will be replacing the virtual
7389+ // register)
73907390 // 2. That the addressing mode only uses a single offset register.
73917391 auto *CurrInstr = MRI.getUniqueVRegDef (Root.getOperand (1 ).getReg ());
73927392 auto Range = llvm::seq<unsigned >(1 , NumLanes - 1 );
@@ -7417,17 +7417,17 @@ static bool getGatherPattern(MachineInstr &Root,
74177417 return false ;
74187418
74197419 switch (NumLanes) {
7420- case 4 :
7421- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7422- break ;
7423- case 8 :
7424- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7425- break ;
7426- case 16 :
7427- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7428- break ;
7429- default :
7430- llvm_unreachable (" Got bad number of lanes for gather pattern." );
7420+ case 4 :
7421+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7422+ break ;
7423+ case 8 :
7424+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7425+ break ;
7426+ case 16 :
7427+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7428+ break ;
7429+ default :
7430+ llvm_unreachable (" Got bad number of lanes for gather pattern." );
74317431 }
74327432
74337433 return true ;
@@ -7441,23 +7441,24 @@ static bool getLoadPatterns(MachineInstr &Root,
74417441
74427442 // The pattern searches for loads into single lanes.
74437443 switch (Root.getOpcode ()) {
7444- case AArch64::LD1i32:
7445- return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7446- case AArch64::LD1i16:
7447- return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7448- case AArch64::LD1i8:
7449- return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7450- default :
7451- return false ;
7444+ case AArch64::LD1i32:
7445+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7446+ case AArch64::LD1i16:
7447+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7448+ case AArch64::LD1i8:
7449+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7450+ default :
7451+ return false ;
74527452 }
74537453}
74547454
7455- static void generateGatherPattern (
7456- MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7457- SmallVectorImpl<MachineInstr *> &DelInstrs,
7458- DenseMap<Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern,
7459- unsigned NumLanes) {
7460-
7455+ static void
7456+ generateGatherPattern (MachineInstr &Root,
7457+ SmallVectorImpl<MachineInstr *> &InsInstrs,
7458+ SmallVectorImpl<MachineInstr *> &DelInstrs,
7459+ DenseMap<Register, unsigned > &InstrIdxForVirtReg,
7460+ unsigned Pattern, unsigned NumLanes) {
7461+
74617462 MachineFunction &MF = *Root.getParent ()->getParent ();
74627463 MachineRegisterInfo &MRI = MF.getRegInfo ();
74637464 const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
@@ -7469,7 +7470,7 @@ static void generateGatherPattern(
74697470 LoadToLaneInstrs.push_back (CurrInstr);
74707471 CurrInstr = MRI.getUniqueVRegDef (CurrInstr->getOperand (1 ).getReg ());
74717472 }
7472-
7473+
74737474 MachineInstr *SubregToReg = CurrInstr;
74747475 LoadToLaneInstrs.push_back (
74757476 MRI.getUniqueVRegDef (SubregToReg->getOperand (2 ).getReg ()));
@@ -7494,26 +7495,27 @@ static void generateGatherPattern(
74947495 };
74957496
74967497 // Helper to create load instruction based on opcode
7497- auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7498- Register OffsetReg) -> MachineInstrBuilder {
7499- unsigned Opcode;
7500- switch (NumLanes) {
7501- case 4 :
7502- Opcode = AArch64::LDRSui;
7503- break ;
7504- case 8 :
7505- Opcode = AArch64::LDRHui;
7506- break ;
7507- case 16 :
7508- Opcode = AArch64::LDRBui;
7509- break ;
7510- default :
7511- llvm_unreachable (" Got unsupported number of lanes in machine-combiner gather pattern" );
7512- }
7513- // Immediate offset load
7514- return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7515- .addReg (OffsetReg)
7516- .addImm (0 ); // immediate offset
7498+ auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7499+ Register OffsetReg) -> MachineInstrBuilder {
7500+ unsigned Opcode;
7501+ switch (NumLanes) {
7502+ case 4 :
7503+ Opcode = AArch64::LDRSui;
7504+ break ;
7505+ case 8 :
7506+ Opcode = AArch64::LDRHui;
7507+ break ;
7508+ case 16 :
7509+ Opcode = AArch64::LDRBui;
7510+ break ;
7511+ default :
7512+ llvm_unreachable (
7513+ " Got unsupported number of lanes in machine-combiner gather pattern" );
7514+ }
7515+ // Immediate offset load
7516+ return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7517+ .addReg (OffsetReg)
7518+ .addImm (0 ); // immediate offset
75177519 };
75187520
75197521 // Load the remaining lanes into register 0.
@@ -7522,40 +7524,45 @@ static void generateGatherPattern(
75227524 LoadToLaneInstrsAscending.begin () + NumLanes / 2 );
75237525 auto PrevReg = SubregToReg->getOperand (0 ).getReg ();
75247526 for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg0)) {
7525- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7527+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7528+ LoadInstr->getOperand (3 ).getReg ());
75267529 DelInstrs.push_back (LoadInstr);
75277530 }
75287531 auto LastLoadReg0 = PrevReg;
75297532
7530- // First load into register 1. Perform a LDRSui to zero out the upper lanes in a single instruction.
7533+ // First load into register 1. Perform a LDRSui to zero out the upper lanes in
7534+ // a single instruction.
75317535 auto Lane0Load = *LoadToLaneInstrsAscending.begin ();
7532- auto OriginalSplitLoad = *std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
7536+ auto OriginalSplitLoad =
7537+ *std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
75337538 auto DestRegForMiddleIndex = MRI.createVirtualRegister (
75347539 MRI.getRegClass (Lane0Load->getOperand (0 ).getReg ()));
7535-
7536- MachineInstrBuilder MiddleIndexLoadInstr = CreateLoadInstruction (
7537- NumLanes, DestRegForMiddleIndex,
7538- OriginalSplitLoad->getOperand (3 ).getReg ());
7539-
7540- InstrIdxForVirtReg.insert (std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
7540+
7541+ MachineInstrBuilder MiddleIndexLoadInstr =
7542+ CreateLoadInstruction (NumLanes, DestRegForMiddleIndex,
7543+ OriginalSplitLoad->getOperand (3 ).getReg ());
7544+
7545+ InstrIdxForVirtReg.insert (
7546+ std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
75417547 InsInstrs.push_back (MiddleIndexLoadInstr);
75427548 DelInstrs.push_back (OriginalSplitLoad);
75437549
75447550 // Subreg To Reg instruction for register 1.
75457551 auto DestRegForSubregToReg = MRI.createVirtualRegister (FPR128RegClass);
75467552 unsigned SubregType;
75477553 switch (NumLanes) {
7548- case 4 :
7549- SubregType = AArch64::ssub;
7550- break ;
7551- case 8 :
7552- SubregType = AArch64::hsub;
7553- break ;
7554- case 16 :
7555- SubregType = AArch64::bsub;
7556- break ;
7557- default :
7558- llvm_unreachable (" Got invalid NumLanes for machine-combiner gather pattern" );
7554+ case 4 :
7555+ SubregType = AArch64::ssub;
7556+ break ;
7557+ case 8 :
7558+ SubregType = AArch64::hsub;
7559+ break ;
7560+ case 16 :
7561+ SubregType = AArch64::bsub;
7562+ break ;
7563+ default :
7564+ llvm_unreachable (
7565+ " Got invalid NumLanes for machine-combiner gather pattern" );
75597566 }
75607567
75617568 auto SubRegToRegInstr =
@@ -7569,11 +7576,13 @@ static void generateGatherPattern(
75697576 InsInstrs.push_back (SubRegToRegInstr);
75707577
75717578 // Load remaining lanes into register 1.
7572- auto LanesToLoadToReg1 = llvm::make_range (
7573- LoadToLaneInstrsAscending.begin () + NumLanes / 2 + 1 , LoadToLaneInstrsAscending.end ());
7579+ auto LanesToLoadToReg1 =
7580+ llvm::make_range (LoadToLaneInstrsAscending.begin () + NumLanes / 2 + 1 ,
7581+ LoadToLaneInstrsAscending.end ());
75747582 PrevReg = SubRegToRegInstr->getOperand (0 ).getReg ();
75757583 for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg1)) {
7576- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7584+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7585+ LoadInstr->getOperand (3 ).getReg ());
75777586 if (Index == NumLanes / 2 - 2 ) {
75787587 break ;
75797588 }
@@ -8967,11 +8976,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
89678976 break ;
89688977 }
89698978 case AArch64MachineCombinerPattern::GATHER_i16: {
8970- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 8 );
8979+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8980+ Pattern, 8 );
89718981 break ;
89728982 }
89738983 case AArch64MachineCombinerPattern::GATHER_i8: {
8974- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 16 );
8984+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8985+ Pattern, 16 );
89758986 break ;
89768987 }
89778988
0 commit comments