|
20 | 20 | #include "Utils/AArch64BaseInfo.h" |
21 | 21 | #include "llvm/ADT/ArrayRef.h" |
22 | 22 | #include "llvm/ADT/STLExtras.h" |
23 | | -#include "llvm/ADT/SmallSet.h" |
24 | 23 | #include "llvm/ADT/SmallVector.h" |
25 | 24 | #include "llvm/CodeGen/CFIInstBuilder.h" |
26 | 25 | #include "llvm/CodeGen/LivePhysRegs.h" |
|
36 | 35 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | 36 | #include "llvm/CodeGen/RegisterScavenging.h" |
38 | 37 | #include "llvm/CodeGen/StackMaps.h" |
39 | | -#include "llvm/CodeGen/TargetOpcodes.h" |
40 | 38 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | 39 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
42 | 40 | #include "llvm/IR/DebugInfoMetadata.h" |
@@ -7354,9 +7352,6 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const { |
7354 | 7352 | case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: |
7355 | 7353 | case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: |
7356 | 7354 | case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: |
7357 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i32: |
7358 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i16: |
7359 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i8: |
7360 | 7355 | return true; |
7361 | 7356 | } // end switch (Pattern) |
7362 | 7357 | return false; |
@@ -7397,252 +7392,11 @@ static bool getMiscPatterns(MachineInstr &Root, |
7397 | 7392 | return false; |
7398 | 7393 | } |
7399 | 7394 |
|
7400 | | -static bool getGatherPattern(MachineInstr &Root, |
7401 | | - SmallVectorImpl<unsigned> &Patterns, |
7402 | | - unsigned LoadLaneOpCode, unsigned NumLanes) { |
7403 | | - const MachineFunction *MF = Root.getMF(); |
7404 | | - |
7405 | | - // Early exit if optimizing for size. |
7406 | | - if (MF->getFunction().hasMinSize()) |
7407 | | - return false; |
7408 | | - |
7409 | | - const MachineRegisterInfo &MRI = MF->getRegInfo(); |
7410 | | - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); |
7411 | | - |
7412 | | - // The root of the pattern must load into the last lane of the vector. |
7413 | | - if (Root.getOperand(2).getImm() != NumLanes - 1) |
7414 | | - return false; |
7415 | | - |
7416 | | - // Check that we have load into all lanes except lane 0. |
7417 | | - // For each load we also want to check that: |
7418 | | - // 1. It has a single non-debug use (since we will be replacing the virtual |
7419 | | - // register) |
7420 | | - // 2. That the addressing mode only uses a single offset register. |
7421 | | - auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); |
7422 | | - auto Range = llvm::seq<unsigned>(1, NumLanes - 1); |
7423 | | - SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end()); |
7424 | | - while (!RemainingLanes.empty() && CurrInstr && |
7425 | | - CurrInstr->getOpcode() == LoadLaneOpCode && |
7426 | | - MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) && |
7427 | | - CurrInstr->getNumOperands() == 4) { |
7428 | | - RemainingLanes.erase(CurrInstr->getOperand(2).getImm()); |
7429 | | - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); |
7430 | | - } |
7431 | | - |
7432 | | - if (!RemainingLanes.empty()) |
7433 | | - return false; |
7434 | | - |
7435 | | - // Match the SUBREG_TO_REG sequence. |
7436 | | - if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG) |
7437 | | - return false; |
7438 | | - |
7439 | | - // Verify that the subreg to reg loads an integer into the first lane. |
7440 | | - auto Lane0LoadReg = CurrInstr->getOperand(2).getReg(); |
7441 | | - unsigned SingleLaneSizeInBits = 128 / NumLanes; |
7442 | | - if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits) |
7443 | | - return false; |
7444 | | - |
7445 | | - // Verify that it also has a single non debug use. |
7446 | | - if (!MRI.hasOneNonDBGUse(Lane0LoadReg)) |
7447 | | - return false; |
7448 | | - |
7449 | | - switch (NumLanes) { |
7450 | | - case 4: |
7451 | | - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32); |
7452 | | - break; |
7453 | | - case 8: |
7454 | | - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16); |
7455 | | - break; |
7456 | | - case 16: |
7457 | | - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8); |
7458 | | - break; |
7459 | | - default: |
7460 | | - llvm_unreachable("Got bad number of lanes for gather pattern."); |
7461 | | - } |
7462 | | - |
7463 | | - return true; |
7464 | | -} |
7465 | | - |
7466 | | -/// Search for patterns where we use LD1 instructions to load into |
7467 | | -/// separate lanes of an 128 bit Neon register. We can increase Memory Level |
7468 | | -/// Parallelism by loading into 2 Neon registers instead. |
7469 | | -static bool getLoadPatterns(MachineInstr &Root, |
7470 | | - SmallVectorImpl<unsigned> &Patterns) { |
7471 | | - |
7472 | | - // The pattern searches for loads into single lanes. |
7473 | | - switch (Root.getOpcode()) { |
7474 | | - case AArch64::LD1i32: |
7475 | | - return getGatherPattern(Root, Patterns, Root.getOpcode(), 4); |
7476 | | - case AArch64::LD1i16: |
7477 | | - return getGatherPattern(Root, Patterns, Root.getOpcode(), 8); |
7478 | | - case AArch64::LD1i8: |
7479 | | - return getGatherPattern(Root, Patterns, Root.getOpcode(), 16); |
7480 | | - default: |
7481 | | - return false; |
7482 | | - } |
7483 | | -} |
7484 | | - |
7485 | | -static void |
7486 | | -generateGatherPattern(MachineInstr &Root, |
7487 | | - SmallVectorImpl<MachineInstr *> &InsInstrs, |
7488 | | - SmallVectorImpl<MachineInstr *> &DelInstrs, |
7489 | | - DenseMap<Register, unsigned> &InstrIdxForVirtReg, |
7490 | | - unsigned Pattern, unsigned NumLanes) { |
7491 | | - |
7492 | | - MachineFunction &MF = *Root.getParent()->getParent(); |
7493 | | - MachineRegisterInfo &MRI = MF.getRegInfo(); |
7494 | | - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
7495 | | - |
7496 | | - // Gather the initial load instructions to build the pattern |
7497 | | - SmallVector<MachineInstr *, 16> LoadToLaneInstrs; |
7498 | | - MachineInstr *CurrInstr = &Root; |
7499 | | - for (unsigned i = 0; i < NumLanes - 1; ++i) { |
7500 | | - LoadToLaneInstrs.push_back(CurrInstr); |
7501 | | - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); |
7502 | | - } |
7503 | | - |
7504 | | - // Sort the load instructions according to the lane. |
7505 | | - llvm::sort(LoadToLaneInstrs, |
7506 | | - [](const MachineInstr *A, const MachineInstr *B) { |
7507 | | - return A->getOperand(2).getImm() > B->getOperand(2).getImm(); |
7508 | | - }); |
7509 | | - |
7510 | | - MachineInstr *SubregToReg = CurrInstr; |
7511 | | - LoadToLaneInstrs.push_back( |
7512 | | - MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg())); |
7513 | | - auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs); |
7514 | | - |
7515 | | - const TargetRegisterClass *FPR128RegClass = |
7516 | | - MRI.getRegClass(Root.getOperand(0).getReg()); |
7517 | | - |
7518 | | - auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr, |
7519 | | - Register SrcRegister, unsigned Lane, |
7520 | | - Register OffsetRegister) { |
7521 | | - auto NewRegister = MRI.createVirtualRegister(FPR128RegClass); |
7522 | | - MachineInstrBuilder LoadIndexIntoRegister = |
7523 | | - BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()), |
7524 | | - NewRegister) |
7525 | | - .addReg(SrcRegister) |
7526 | | - .addImm(Lane) |
7527 | | - .addReg(OffsetRegister, getKillRegState(true)); |
7528 | | - InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size())); |
7529 | | - InsInstrs.push_back(LoadIndexIntoRegister); |
7530 | | - return NewRegister; |
7531 | | - }; |
7532 | | - |
7533 | | - // Helper to create load instruction based on opcode |
7534 | | - auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg, |
7535 | | - Register OffsetReg) -> MachineInstrBuilder { |
7536 | | - unsigned Opcode; |
7537 | | - switch (NumLanes) { |
7538 | | - case 4: |
7539 | | - Opcode = AArch64::LDRSui; |
7540 | | - break; |
7541 | | - case 8: |
7542 | | - Opcode = AArch64::LDRHui; |
7543 | | - break; |
7544 | | - case 16: |
7545 | | - Opcode = AArch64::LDRBui; |
7546 | | - break; |
7547 | | - default: |
7548 | | - llvm_unreachable( |
7549 | | - "Got unsupported number of lanes in machine-combiner gather pattern"); |
7550 | | - } |
7551 | | - // Immediate offset load |
7552 | | - return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg) |
7553 | | - .addReg(OffsetReg) |
7554 | | - .addImm(0); // immediate offset |
7555 | | - }; |
7556 | | - |
7557 | | - // Load the remaining lanes into register 0. |
7558 | | - auto LanesToLoadToReg0 = |
7559 | | - llvm::make_range(LoadToLaneInstrsAscending.begin() + 1, |
7560 | | - LoadToLaneInstrsAscending.begin() + NumLanes / 2); |
7561 | | - auto PrevReg = SubregToReg->getOperand(0).getReg(); |
7562 | | - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) { |
7563 | | - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, |
7564 | | - LoadInstr->getOperand(3).getReg()); |
7565 | | - DelInstrs.push_back(LoadInstr); |
7566 | | - } |
7567 | | - auto LastLoadReg0 = PrevReg; |
7568 | | - |
7569 | | - // First load into register 1. Perform a LDRSui to zero out the upper lanes in |
7570 | | - // a single instruction. |
7571 | | - auto Lane0Load = *LoadToLaneInstrsAscending.begin(); |
7572 | | - auto OriginalSplitLoad = |
7573 | | - *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2); |
7574 | | - auto DestRegForMiddleIndex = MRI.createVirtualRegister( |
7575 | | - MRI.getRegClass(Lane0Load->getOperand(0).getReg())); |
7576 | | - |
7577 | | - MachineInstrBuilder MiddleIndexLoadInstr = |
7578 | | - CreateLoadInstruction(NumLanes, DestRegForMiddleIndex, |
7579 | | - OriginalSplitLoad->getOperand(3).getReg()); |
7580 | | - |
7581 | | - InstrIdxForVirtReg.insert( |
7582 | | - std::make_pair(DestRegForMiddleIndex, InsInstrs.size())); |
7583 | | - InsInstrs.push_back(MiddleIndexLoadInstr); |
7584 | | - DelInstrs.push_back(OriginalSplitLoad); |
7585 | | - |
7586 | | - // Subreg To Reg instruction for register 1. |
7587 | | - auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass); |
7588 | | - unsigned SubregType; |
7589 | | - switch (NumLanes) { |
7590 | | - case 4: |
7591 | | - SubregType = AArch64::ssub; |
7592 | | - break; |
7593 | | - case 8: |
7594 | | - SubregType = AArch64::hsub; |
7595 | | - break; |
7596 | | - case 16: |
7597 | | - SubregType = AArch64::bsub; |
7598 | | - break; |
7599 | | - default: |
7600 | | - llvm_unreachable( |
7601 | | - "Got invalid NumLanes for machine-combiner gather pattern"); |
7602 | | - } |
7603 | | - |
7604 | | - auto SubRegToRegInstr = |
7605 | | - BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()), |
7606 | | - DestRegForSubregToReg) |
7607 | | - .addImm(0) |
7608 | | - .addReg(DestRegForMiddleIndex, getKillRegState(true)) |
7609 | | - .addImm(SubregType); |
7610 | | - InstrIdxForVirtReg.insert( |
7611 | | - std::make_pair(DestRegForSubregToReg, InsInstrs.size())); |
7612 | | - InsInstrs.push_back(SubRegToRegInstr); |
7613 | | - |
7614 | | - // Load remaining lanes into register 1. |
7615 | | - auto LanesToLoadToReg1 = |
7616 | | - llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1, |
7617 | | - LoadToLaneInstrsAscending.end()); |
7618 | | - PrevReg = SubRegToRegInstr->getOperand(0).getReg(); |
7619 | | - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) { |
7620 | | - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, |
7621 | | - LoadInstr->getOperand(3).getReg()); |
7622 | | - if (Index == NumLanes / 2 - 2) { |
7623 | | - break; |
7624 | | - } |
7625 | | - DelInstrs.push_back(LoadInstr); |
7626 | | - } |
7627 | | - auto LastLoadReg1 = PrevReg; |
7628 | | - |
7629 | | - // Create the final zip instruction to combine the results. |
7630 | | - MachineInstrBuilder ZipInstr = |
7631 | | - BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64), |
7632 | | - Root.getOperand(0).getReg()) |
7633 | | - .addReg(LastLoadReg0) |
7634 | | - .addReg(LastLoadReg1); |
7635 | | - InsInstrs.push_back(ZipInstr); |
7636 | | -} |
7637 | | - |
7638 | 7395 | CombinerObjective |
7639 | 7396 | AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const { |
7640 | 7397 | switch (Pattern) { |
7641 | 7398 | case AArch64MachineCombinerPattern::SUBADD_OP1: |
7642 | 7399 | case AArch64MachineCombinerPattern::SUBADD_OP2: |
7643 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i32: |
7644 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i16: |
7645 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i8: |
7646 | 7400 | return CombinerObjective::MustReduceDepth; |
7647 | 7401 | default: |
7648 | 7402 | return TargetInstrInfo::getCombinerObjective(Pattern); |
@@ -7672,10 +7426,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( |
7672 | 7426 | if (getMiscPatterns(Root, Patterns)) |
7673 | 7427 | return true; |
7674 | 7428 |
|
7675 | | - // Load patterns |
7676 | | - if (getLoadPatterns(Root, Patterns)) |
7677 | | - return true; |
7678 | | - |
7679 | 7429 | return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, |
7680 | 7430 | DoRegPressureReduce); |
7681 | 7431 | } |
@@ -8931,21 +8681,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( |
8931 | 8681 | MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs); |
8932 | 8682 | break; |
8933 | 8683 | } |
8934 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i32: { |
8935 | | - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8936 | | - Pattern, 4); |
8937 | | - break; |
8938 | | - } |
8939 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i16: { |
8940 | | - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8941 | | - Pattern, 8); |
8942 | | - break; |
8943 | | - } |
8944 | | - case AArch64MachineCombinerPattern::GATHER_LANE_i8: { |
8945 | | - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, |
8946 | | - Pattern, 16); |
8947 | | - break; |
8948 | | - } |
8949 | 8684 |
|
8950 | 8685 | } // end switch (Pattern) |
8951 | 8686 | // Record MUL and ADD/SUB for deletion |
|
0 commit comments