diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index f610ea2f21682..5c945907785ac 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -170,6 +170,20 @@ option specifies "``-``", then the output will also be sent to standard output. Enable extra scheduler statistics. This view collects and analyzes instruction issue events. This view is disabled by default. +.. option:: -scheduling-info + + Enable scheduling info view. This view reports scheduling information defined + in LLVM target description in the form: + uOps | Latency | Bypass Latency | Throughput | LLVM OpcodeName | Resources + units | assembly instruction and its comment (// or /* */) if defined. + It allows to compare scheduling info with architecture documents and fix them + in target description by fixing InstrRW for the reported LLVM opcode. + Scheduling information can be defined in the same order in each instruction + comments to check easily reported and reference scheduling information. + Suggested information in comment: + ``// \\ \\ + , , , , `` + .. option:: -retire-stats Enable extra retire control unit statistics. This view is disabled by default. diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index fe731d086f70a..57c8ebeee02a7 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -402,6 +402,10 @@ struct MCSchedModel { static unsigned getForwardingDelayCycles(ArrayRef Entries, unsigned WriteResourceIdx = 0); + /// Returns the bypass delay cycle for the maximum latency write cycle + static unsigned getBypassDelayCycles(const MCSubtargetInfo &STI, + const MCSchedClassDesc &SCDesc); + /// Returns the default initialized model. static const MCSchedModel Default; }; diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp index ed243cecabb76..08e735cc4cfb6 100644 --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -174,3 +174,38 @@ MCSchedModel::getForwardingDelayCycles(ArrayRef Entries, return std::abs(DelayCycles); } + +unsigned MCSchedModel::getBypassDelayCycles(const MCSubtargetInfo &STI, + const MCSchedClassDesc &SCDesc) { + + ArrayRef Entries = STI.getReadAdvanceEntries(SCDesc); + if (Entries.empty()) + return 0; + + unsigned Latency = 0; + unsigned MaxLatency = 0; + unsigned WriteResourceID = 0; + unsigned DefEnd = SCDesc.NumWriteLatencyEntries; + + for (unsigned DefIdx = 0; DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI.getWriteLatencyEntry(&SCDesc, DefIdx); + unsigned Cycles = (unsigned)WLEntry->Cycles; + // Invalid latency. Consider 0 cycle latency + if (WLEntry->Cycles < 0) + Cycles = 0; + if (Cycles > Latency) { + MaxLatency = Cycles; + WriteResourceID = WLEntry->WriteResourceID; + } + Latency = MaxLatency; + } + + for (const MCReadAdvanceEntry &E : Entries) { + if (E.WriteResourceID == WriteResourceID) + return E.Cycles; + } + + llvm_unreachable("WriteResourceID not found in MCReadAdvanceEntry entries"); +} diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s new file mode 100644 index 0000000000000..c421166f22ea4 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-scheduling-info.s @@ -0,0 +1,7588 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 -scheduling-info < %s | FileCheck %s + + .text + .file "V1-scheduling-info.s" + .globl test + .p2align 4 + .type test,@function +test: + .cfi_startproc + abs D15, D11 /* ABS , \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV */ + abs V25.2S, V25.2S // ABS ., . \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV + abs Z26.B, P6/M, Z27.B // ABS ., /M, . \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01 + adc W13, W6, W4 // ADC , , \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + adc X8, X12, X10 // ADC , , \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + adcs W29, W7, W30 // ADCS , , \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adcs X11, X3, X5 // ADCS , , \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + add WSP, WSP, W10 // ADD , , \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI + add WSP, WSP, W2, UXTB // ADD , , , \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI + add WSP, WSP, W13, UXTH #4 // ADD , , , # \\ ALU, basic, unconditional, no flagset \\ 1 2 2 2.00 V1UnitI + add WSP, WSP, W13, LSL #4 // ADD , , , LSL # \\ Arithmetic, LSL shift, shift <= 4 \\ 1 2 2 2.00 V1UnitI + add X22, X2, X27 // ADD , , X \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + add X25, X9, W25, UXTB // ADD , , , \\ ALU, basic \\ 1 2 2 2.00 V1UnitI + add X4, X28, W3, UXTB #3 // ADD , , , # \\ ALU, extend and shift \\ 1 2 2 2.0 V1UnitM + add X0, X28, X26, LSL #3 // ADD , , X, LSL # \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI + add WSP, WSP, #3765 // ADD , , # \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + add WSP, WSP, #3547, LSL #12 // ADD , , #, \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + add X7, X30, #803 // ADD , , # \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + add X7, X2, #319, LSL #12 // ADD , , #, \\ ALU, basic \\ 1 1 1 4.0 V1UnitI + add Z13.D, Z13.D, #245 // ADD ., ., # \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01 + add Z16.D, Z16.D, #233, LSL #8 // ADD ., ., #, \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01 + add W3, W2, W21, LSL #3 // ADD , , , LSL # \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, no flagset \\ 1 1 1 4.0 V1UnitI + add W6, W21, W17, LSL #15 // ADD , , , LSL # \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM + add W28, W30, W19, ASR #30 // ADD , , , # \\ Arithmetic, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.0 V1UnitM + add X8, X3, X28, LSL #3 // ADD , , , LSL # \\ Arithmetic, LSL shift, shift <= 4 \\ 1 1 1 4.0 V1UnitI + add X12, X13, X0, LSL #44 // ADD , , , LSL # \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM + add X5, X20, X28, LSR #16 // ADD , , , # \\ Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.0 V1UnitM + add D0, D23, D21 // ADD , , \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV + add V19.4S, V24.4S, V15.4S // ADD ., ., . \\ ASIMD arith, basic \\ 1 2 2 4.0 V1UnitV + add Z29.D, P5/M, Z29.D, Z29.D // ADD ., /M, ., . \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01 + add Z10.H, Z22.H, Z13.H // ADD ., ., . \\ Arithmetic, basic \\ 1 2 2 2.0 V1UnitV01 + addhn V26.4H, V5.4S, V9.4S // ADDHN ., ., . \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV + addhn2 V1.16B, V19.8H, V6.8H // ADDHN2 ., ., . \\ ASIMD arith, complex \\ 1 2 2 4.0 V1UnitV + addp D1, V14.2D // ADDP , . \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV + addp V7.2S, V1.2S, V2.2S // ADDP ., ., . \\ ASIMD arith, pair-wise \\ 1 2 2 4.0 V1UnitV + addpl X27, X6, #-6 // ADDPL , , # \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0 + adds W17, WSP, W25 // ADDS , , \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg + adds W6, WSP, W15, UXTH // ADDS , , , \\ ALU, basic, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg + adds W22, WSP, W30, UXTB #2 // ADDS , , , # \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds W12, WSP, W29, LSL #4 // ADDS , , , LSL # \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 2 2 2.00 V1UnitI,V1UnitFlg + adds X14, X0, X10 // ADDS , , X \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds X13, X23, W8, UXTB // ADDS , , , \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds X4, X26, W28, UXTB #1 // ADDS , , , # \\ ALU, flagset, extend and shift \\ 1 1 1 3.00 V1UnitFlg, V1UnitI + adds X10, X3, X29, LSL #2 // ADDS , , X, LSL # \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds W23, WSP, #502 // ADDS , , # \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds W2, WSP, #2980, LSL #12 // ADDS , , #, \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 1 1 3.00 V1UnitFlg, V1UnitI + adds X12, X4, #1345 // ADDS , , # \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds X25, X18, #3037, LSL #12 // ADDS , , #, \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 1 1 3.00 V1UnitFlg, V1UnitI + adds W12, W13, W26 // ADDS , , \\ ALU, basic, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds W0, W23, W20, LSL #0 // ADDS , , , LSL # \\ Arithmetic, LSL shift by immed, shift <= 4, unconditional, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds W13, W16, W12, LSL #28 // ADDS , , , LSL # \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg + adds W20, W19, W16, ASR #0 // ADDS , , , # \\ Arithmetic, flagset, LSR/ASR/ROR shift by immed or LSL shift by immed > 4, unconditional \\ 1 2 2 2.00 V1UnitM,V1UnitFlg + adds X23, X12, X4 // ADDS , , \\ ALU, basic, flagset \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds X0, X13, X4, LSL #2 // ADDS , , , LSL # \\ Arithmetic, flagset, LSL shift, shift <= 4 \\ 1 1 1 3.00 V1UnitI,V1UnitFlg + adds X4, X7, X6, LSL #31 // ADDS , , , LSL # \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg + adds X9, X8, X9, ASR #41 // ADDS , , , # \\ Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 \\ 1 2 2 2.00 V1UnitM,V1UnitFlg + addv B0, V28.8B // ADDV B, .8B \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13 + addv B1, V26.16B // ADDV B, .16B \\ ASIMD arith, reduce, 16B \\ 2 4 4 1.00 V1UnitV13[2] + addv H18, V13.4H // ADDV H, .4H \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13 + addv H29, V17.8H // ADDV H, .8H \\ ASIMD arith, reduce, 8B/8H \\ 2 4 4 2.00 V1UnitV13 + addv S22, V18.4S // ADDV S, .4S \\ ASIMD arith, reduce, 4H/4S \\ 1 2 2 2.0 V1UnitV13 + addvl X1, X27, #-8 // ADDVL , , # \\ Predicate counting scalar \\ 1 2 2 1.0 V1UnitM0 + adr X3, test // ADR ,