Skip to content

Commit 20c5968

Browse files
committed
[X86] Increase latency of port5 masked compares and kshift/kadd/kunpck instructions in SKX scheduler model
Uops.info shows these as 4 cycle latency.
1 parent c636f69 commit 20c5968

File tree

7 files changed

+555
-533
lines changed

7 files changed

+555
-533
lines changed

llvm/lib/Target/X86/X86SchedSkylakeServer.td

Lines changed: 68 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -817,27 +817,33 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
817817
}
818818
def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
819819
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
820-
"KADD(B|D|Q|W)rr",
820+
"VALIGND(Z|Z128|Z256)rri",
821+
"VALIGNQ(Z|Z128|Z256)rri",
822+
"VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
823+
"VPBROADCAST(B|W)rr",
824+
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
825+
826+
def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
827+
let Latency = 4;
828+
let NumMicroOps = 1;
829+
let ResourceCycles = [1];
830+
}
831+
def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
821832
"KSHIFTL(B|D|Q|W)ri",
822833
"KSHIFTR(B|D|Q|W)ri",
823834
"KUNPCK(BW|DQ|WD)rr",
824-
"VALIGND(Z|Z128|Z256)rri",
825-
"VALIGNQ(Z|Z128|Z256)rri",
826835
"VCMPPD(Z|Z128|Z256)rri",
827836
"VCMPPS(Z|Z128|Z256)rri",
828837
"VCMP(SD|SS)Zrr",
829-
"VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
830838
"VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
831839
"VFPCLASS(SD|SS)Zrr",
832-
"VPBROADCAST(B|W)rr",
833840
"VPCMPB(Z|Z128|Z256)rri",
834841
"VPCMPD(Z|Z128|Z256)rri",
835842
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
836843
"VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
837844
"VPCMPQ(Z|Z128|Z256)rri",
838845
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
839846
"VPCMPW(Z|Z128|Z256)rri",
840-
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
841847
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
842848

843849
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -1519,7 +1525,6 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
15191525
let ResourceCycles = [1,1];
15201526
}
15211527
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
1522-
"VFPCLASSSDZrm(b?)",
15231528
"VPBROADCASTB(Z|Z256)rm(b?)",
15241529
"VPBROADCASTW(Z|Z256)rm(b?)")>;
15251530
def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
@@ -1670,17 +1675,9 @@ def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
16701675
VPMOVSXWDYrm,
16711676
VPMOVZXWDYrm)>;
16721677
def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
1673-
"VCMP(PD|PS)Z128rm(b?)i",
1674-
"VCMP(SD|SS)Zrm",
1678+
"VFPCLASSSDZrm(b?)",
16751679
"VFPCLASSSSZrm(b?)",
1676-
"VPCMPBZ128rmi(b?)",
1677-
"VPCMPDZ128rmi(b?)",
1678-
"VPCMPEQ(B|D|Q|W)Z128rm(b?)",
1679-
"VPCMPGT(B|D|Q|W)Z128rm(b?)",
16801680
"(V?)PCMPGTQrm",
1681-
"VPCMPQZ128rmi(b?)",
1682-
"VPCMPU(B|D|Q|W)Z128rmi(b?)",
1683-
"VPCMPWZ128rmi(b?)",
16841681
"VPERMI2D128rm(b?)",
16851682
"VPERMI2PD128rm(b?)",
16861683
"VPERMI2PS128rm(b?)",
@@ -1704,15 +1701,32 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
17041701
"VPMOVZXBWZ128rm(b?)",
17051702
"VPMOVZXDQZ128rm(b?)",
17061703
"VPMOVZXWDZ128rm(b?)",
1707-
"VPMOVZXWQZ128rm(b?)",
1708-
"VPTESTMBZ128rm(b?)",
1709-
"VPTESTMDZ128rm(b?)",
1710-
"VPTESTMQZ128rm(b?)",
1711-
"VPTESTMWZ128rm(b?)",
1712-
"VPTESTNMBZ128rm(b?)",
1713-
"VPTESTNMDZ128rm(b?)",
1714-
"VPTESTNMQZ128rm(b?)",
1715-
"VPTESTNMWZ128rm(b?)")>;
1704+
"VPMOVZXWQZ128rm(b?)")>;
1705+
1706+
def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
1707+
let Latency = 10;
1708+
let NumMicroOps = 2;
1709+
let ResourceCycles = [1,1];
1710+
}
1711+
def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
1712+
"VCMP(SD|SS)Zrm",
1713+
"VFPCLASSPDZ128rm(b?)",
1714+
"VFPCLASSPSZ128rm(b?)",
1715+
"VPCMPBZ128rmi(b?)",
1716+
"VPCMPDZ128rmi(b?)",
1717+
"VPCMPEQ(B|D|Q|W)Z128rm(b?)",
1718+
"VPCMPGT(B|D|Q|W)Z128rm(b?)",
1719+
"VPCMPQZ128rmi(b?)",
1720+
"VPCMPU(B|D|Q|W)Z128rmi(b?)",
1721+
"VPCMPWZ128rmi(b?)",
1722+
"VPTESTMBZ128rm(b?)",
1723+
"VPTESTMDZ128rm(b?)",
1724+
"VPTESTMQZ128rm(b?)",
1725+
"VPTESTMWZ128rm(b?)",
1726+
"VPTESTNMBZ128rm(b?)",
1727+
"VPTESTNMDZ128rm(b?)",
1728+
"VPTESTNMQZ128rm(b?)",
1729+
"VPTESTNMWZ128rm(b?)")>;
17161730

17171731
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
17181732
let Latency = 9;
@@ -1748,30 +1762,38 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
17481762
"ILD_F(16|32|64)m",
17491763
"VALIGND(Z|Z256)rm(b?)i",
17501764
"VALIGNQ(Z|Z256)rm(b?)i",
1751-
"VCMPPD(Z|Z256)rm(b?)i",
1752-
"VCMPPS(Z|Z256)rm(b?)i",
1753-
"VPCMPB(Z|Z256)rmi(b?)",
1754-
"VPCMPD(Z|Z256)rmi(b?)",
1755-
"VPCMPEQB(Z|Z256)rm(b?)",
1756-
"VPCMPEQD(Z|Z256)rm(b?)",
1757-
"VPCMPEQQ(Z|Z256)rm(b?)",
1758-
"VPCMPEQW(Z|Z256)rm(b?)",
1759-
"VPCMPGTB(Z|Z256)rm(b?)",
1760-
"VPCMPGTD(Z|Z256)rm(b?)",
1761-
"VPCMPGTQ(Z|Z256)rm(b?)",
1762-
"VPCMPGTW(Z|Z256)rm(b?)",
1763-
"VPCMPQ(Z|Z256)rmi(b?)",
1764-
"VPCMPU(B|D|Q|W)Z256rmi(b?)",
1765-
"VPCMPU(B|D|Q|W)Zrmi(b?)",
1766-
"VPCMPW(Z|Z256)rmi(b?)",
17671765
"VPMAXSQ(Z|Z256)rm(b?)",
17681766
"VPMAXUQ(Z|Z256)rm(b?)",
17691767
"VPMINSQ(Z|Z256)rm(b?)",
1770-
"VPMINUQ(Z|Z256)rm(b?)",
1771-
"VPTESTM(B|D|Q|W)Z256rm(b?)",
1772-
"VPTESTM(B|D|Q|W)Zrm(b?)",
1773-
"VPTESTNM(B|D|Q|W)Z256rm(b?)",
1774-
"VPTESTNM(B|D|Q|W)Zrm(b?)")>;
1768+
"VPMINUQ(Z|Z256)rm(b?)")>;
1769+
1770+
def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
1771+
let Latency = 11;
1772+
let NumMicroOps = 2;
1773+
let ResourceCycles = [1,1];
1774+
}
1775+
def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
1776+
"VCMPPS(Z|Z256)rm(b?)i",
1777+
"VFPCLASSPD(Z|Z256)rm(b?)",
1778+
"VFPCLASSPS(Z|Z256)rm(b?)",
1779+
"VPCMPB(Z|Z256)rmi(b?)",
1780+
"VPCMPD(Z|Z256)rmi(b?)",
1781+
"VPCMPEQB(Z|Z256)rm(b?)",
1782+
"VPCMPEQD(Z|Z256)rm(b?)",
1783+
"VPCMPEQQ(Z|Z256)rm(b?)",
1784+
"VPCMPEQW(Z|Z256)rm(b?)",
1785+
"VPCMPGTB(Z|Z256)rm(b?)",
1786+
"VPCMPGTD(Z|Z256)rm(b?)",
1787+
"VPCMPGTQ(Z|Z256)rm(b?)",
1788+
"VPCMPGTW(Z|Z256)rm(b?)",
1789+
"VPCMPQ(Z|Z256)rmi(b?)",
1790+
"VPCMPU(B|D|Q|W)Z256rmi(b?)",
1791+
"VPCMPU(B|D|Q|W)Zrmi(b?)",
1792+
"VPCMPW(Z|Z256)rmi(b?)",
1793+
"VPTESTM(B|D|Q|W)Z256rm(b?)",
1794+
"VPTESTM(B|D|Q|W)Zrm(b?)",
1795+
"VPTESTNM(B|D|Q|W)Z256rm(b?)",
1796+
"VPTESTNM(B|D|Q|W)Zrm(b?)")>;
17751797

17761798
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
17771799
let Latency = 10;

0 commit comments

Comments
 (0)