@@ -817,27 +817,33 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
817817}
818818def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
819819def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
820- "KADD(B|D|Q|W)rr",
820+ "VALIGND(Z|Z128|Z256)rri",
821+ "VALIGNQ(Z|Z128|Z256)rri",
822+ "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
823+ "VPBROADCAST(B|W)rr",
824+ "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
825+
826+ def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
827+ let Latency = 4;
828+ let NumMicroOps = 1;
829+ let ResourceCycles = [1];
830+ }
831+ def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
821832 "KSHIFTL(B|D|Q|W)ri",
822833 "KSHIFTR(B|D|Q|W)ri",
823834 "KUNPCK(BW|DQ|WD)rr",
824- "VALIGND(Z|Z128|Z256)rri",
825- "VALIGNQ(Z|Z128|Z256)rri",
826835 "VCMPPD(Z|Z128|Z256)rri",
827836 "VCMPPS(Z|Z128|Z256)rri",
828837 "VCMP(SD|SS)Zrr",
829- "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
830838 "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
831839 "VFPCLASS(SD|SS)Zrr",
832- "VPBROADCAST(B|W)rr",
833840 "VPCMPB(Z|Z128|Z256)rri",
834841 "VPCMPD(Z|Z128|Z256)rri",
835842 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
836843 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
837844 "VPCMPQ(Z|Z128|Z256)rri",
838845 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
839846 "VPCMPW(Z|Z128|Z256)rri",
840- "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
841847 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
842848
843849def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -1519,7 +1525,6 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
15191525 let ResourceCycles = [1,1];
15201526}
15211527def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
1522- "VFPCLASSSDZrm(b?)",
15231528 "VPBROADCASTB(Z|Z256)rm(b?)",
15241529 "VPBROADCASTW(Z|Z256)rm(b?)")>;
15251530def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
@@ -1670,17 +1675,9 @@ def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
16701675 VPMOVSXWDYrm,
16711676 VPMOVZXWDYrm)>;
16721677def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
1673- "VCMP(PD|PS)Z128rm(b?)i",
1674- "VCMP(SD|SS)Zrm",
1678+ "VFPCLASSSDZrm(b?)",
16751679 "VFPCLASSSSZrm(b?)",
1676- "VPCMPBZ128rmi(b?)",
1677- "VPCMPDZ128rmi(b?)",
1678- "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
1679- "VPCMPGT(B|D|Q|W)Z128rm(b?)",
16801680 "(V?)PCMPGTQrm",
1681- "VPCMPQZ128rmi(b?)",
1682- "VPCMPU(B|D|Q|W)Z128rmi(b?)",
1683- "VPCMPWZ128rmi(b?)",
16841681 "VPERMI2D128rm(b?)",
16851682 "VPERMI2PD128rm(b?)",
16861683 "VPERMI2PS128rm(b?)",
@@ -1704,15 +1701,32 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
17041701 "VPMOVZXBWZ128rm(b?)",
17051702 "VPMOVZXDQZ128rm(b?)",
17061703 "VPMOVZXWDZ128rm(b?)",
1707- "VPMOVZXWQZ128rm(b?)",
1708- "VPTESTMBZ128rm(b?)",
1709- "VPTESTMDZ128rm(b?)",
1710- "VPTESTMQZ128rm(b?)",
1711- "VPTESTMWZ128rm(b?)",
1712- "VPTESTNMBZ128rm(b?)",
1713- "VPTESTNMDZ128rm(b?)",
1714- "VPTESTNMQZ128rm(b?)",
1715- "VPTESTNMWZ128rm(b?)")>;
1704+ "VPMOVZXWQZ128rm(b?)")>;
1705+
1706+ def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
1707+ let Latency = 10;
1708+ let NumMicroOps = 2;
1709+ let ResourceCycles = [1,1];
1710+ }
1711+ def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
1712+ "VCMP(SD|SS)Zrm",
1713+ "VFPCLASSPDZ128rm(b?)",
1714+ "VFPCLASSPSZ128rm(b?)",
1715+ "VPCMPBZ128rmi(b?)",
1716+ "VPCMPDZ128rmi(b?)",
1717+ "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
1718+ "VPCMPGT(B|D|Q|W)Z128rm(b?)",
1719+ "VPCMPQZ128rmi(b?)",
1720+ "VPCMPU(B|D|Q|W)Z128rmi(b?)",
1721+ "VPCMPWZ128rmi(b?)",
1722+ "VPTESTMBZ128rm(b?)",
1723+ "VPTESTMDZ128rm(b?)",
1724+ "VPTESTMQZ128rm(b?)",
1725+ "VPTESTMWZ128rm(b?)",
1726+ "VPTESTNMBZ128rm(b?)",
1727+ "VPTESTNMDZ128rm(b?)",
1728+ "VPTESTNMQZ128rm(b?)",
1729+ "VPTESTNMWZ128rm(b?)")>;
17161730
17171731def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
17181732 let Latency = 9;
@@ -1748,30 +1762,38 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
17481762 "ILD_F(16|32|64)m",
17491763 "VALIGND(Z|Z256)rm(b?)i",
17501764 "VALIGNQ(Z|Z256)rm(b?)i",
1751- "VCMPPD(Z|Z256)rm(b?)i",
1752- "VCMPPS(Z|Z256)rm(b?)i",
1753- "VPCMPB(Z|Z256)rmi(b?)",
1754- "VPCMPD(Z|Z256)rmi(b?)",
1755- "VPCMPEQB(Z|Z256)rm(b?)",
1756- "VPCMPEQD(Z|Z256)rm(b?)",
1757- "VPCMPEQQ(Z|Z256)rm(b?)",
1758- "VPCMPEQW(Z|Z256)rm(b?)",
1759- "VPCMPGTB(Z|Z256)rm(b?)",
1760- "VPCMPGTD(Z|Z256)rm(b?)",
1761- "VPCMPGTQ(Z|Z256)rm(b?)",
1762- "VPCMPGTW(Z|Z256)rm(b?)",
1763- "VPCMPQ(Z|Z256)rmi(b?)",
1764- "VPCMPU(B|D|Q|W)Z256rmi(b?)",
1765- "VPCMPU(B|D|Q|W)Zrmi(b?)",
1766- "VPCMPW(Z|Z256)rmi(b?)",
17671765 "VPMAXSQ(Z|Z256)rm(b?)",
17681766 "VPMAXUQ(Z|Z256)rm(b?)",
17691767 "VPMINSQ(Z|Z256)rm(b?)",
1770- "VPMINUQ(Z|Z256)rm(b?)",
1771- "VPTESTM(B|D|Q|W)Z256rm(b?)",
1772- "VPTESTM(B|D|Q|W)Zrm(b?)",
1773- "VPTESTNM(B|D|Q|W)Z256rm(b?)",
1774- "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
1768+ "VPMINUQ(Z|Z256)rm(b?)")>;
1769+
1770+ def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
1771+ let Latency = 11;
1772+ let NumMicroOps = 2;
1773+ let ResourceCycles = [1,1];
1774+ }
1775+ def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
1776+ "VCMPPS(Z|Z256)rm(b?)i",
1777+ "VFPCLASSPD(Z|Z256)rm(b?)",
1778+ "VFPCLASSPS(Z|Z256)rm(b?)",
1779+ "VPCMPB(Z|Z256)rmi(b?)",
1780+ "VPCMPD(Z|Z256)rmi(b?)",
1781+ "VPCMPEQB(Z|Z256)rm(b?)",
1782+ "VPCMPEQD(Z|Z256)rm(b?)",
1783+ "VPCMPEQQ(Z|Z256)rm(b?)",
1784+ "VPCMPEQW(Z|Z256)rm(b?)",
1785+ "VPCMPGTB(Z|Z256)rm(b?)",
1786+ "VPCMPGTD(Z|Z256)rm(b?)",
1787+ "VPCMPGTQ(Z|Z256)rm(b?)",
1788+ "VPCMPGTW(Z|Z256)rm(b?)",
1789+ "VPCMPQ(Z|Z256)rmi(b?)",
1790+ "VPCMPU(B|D|Q|W)Z256rmi(b?)",
1791+ "VPCMPU(B|D|Q|W)Zrmi(b?)",
1792+ "VPCMPW(Z|Z256)rmi(b?)",
1793+ "VPTESTM(B|D|Q|W)Z256rm(b?)",
1794+ "VPTESTM(B|D|Q|W)Zrm(b?)",
1795+ "VPTESTNM(B|D|Q|W)Z256rm(b?)",
1796+ "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
17751797
17761798def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
17771799 let Latency = 10;
0 commit comments