Skip to content

Commit

Permalink
add vmpsadbw for avx10.2
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 13, 2024
1 parent 2d3a9ce commit 8552268
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 13 deletions.
3 changes: 1 addition & 2 deletions gen/gen_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,15 +447,14 @@ void putX_X_XM_IMM()
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },

{ 0x52, "vdpphps", T_MUST_EVEX | T_0F38 | T_EW0 | T_YMM | T_B32, false },
// { 0x42, "vmpsadbw", T_MUST_EVEX | T_F3 | T_0F3A | T_EW0 | T_YMM | T_B32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
// puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_EW0 | T_B32); }");
puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }");
}

void putShift()
Expand Down
2 changes: 1 addition & 1 deletion gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void putX_X_XM(bool omitOnly)
{ 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 },
{ 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 1 },
{ 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
Expand Down
9 changes: 8 additions & 1 deletion test/avx10/misc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,11 @@ vdpphps(zm1, zm2, zm3);
vdpphps(zm1, zm2, ptr[rax+128]);
vdpphps(zm1, zm2, ptr_b[rax+128]);

// skip vmpsadbw
vmpsadbw(xm1, xm3, xm15, 3);
vmpsadbw(xm1|T_z, xm4, ptr[rax+128], 5);

vmpsadbw(ym1|k4, ym3, ym15, 3);
vmpsadbw(ym1, ym4, ptr[rax+128], 5);

vmpsadbw(zm1|k4, zm3, zm15, 3);
vmpsadbw(zm1, zm4, ptr[rax+128], 5);
24 changes: 24 additions & 0 deletions test/avx10_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,27 @@ CYBOZU_TEST_AUTO(ymm_with_sae)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(vmpsadbw)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
setDefaultEncoding();
vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
setDefaultEncoding(VexEncoding, EvexEncoding);
vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
}
} c;
const uint8_t tbl[] = {
0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
3 changes: 3 additions & 0 deletions test/test_by_xed.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include <stdio.h>
#include <xbyak/xbyak.h>

using namespace Xbyak;

struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
setDefaultEncoding(VexEncoding, EvexEncoding);
#include "tmp.cpp"
}
};
Expand Down
5 changes: 5 additions & 0 deletions test/test_by_xed.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ def parseNmemonic(s):
args = []
attrs = []

# remove Xbyak::{Evex,Vex}Encoding
r = re.search(r'(,[^,]*Encoding)', s)
if r:
s = s.replace(r.group(1), '')

(s, broadcast) = parseBroadcast(s)

# replace xm0 with xmm0
Expand Down
16 changes: 8 additions & 8 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -2661,21 +2661,21 @@ class CodeGenerator : public CodeArray {
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int sel = 0)
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, sel), code);
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
}
int orEvexIf(PreferredEncoding encoding, int sel = 0) {
int orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
if (encoding == DefaultEncoding) {
encoding = defaultEncoding_[sel];
}
if (encoding == EvexEncoding) {
#ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
return T_MUST_EVEX;
return T_MUST_EVEX | typeEvex;
}
return 0;
return typeVex;
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
Expand Down Expand Up @@ -3132,8 +3132,8 @@ class CodeGenerator : public CodeArray {
#endif
, isDefaultJmpNEAR_(false)
{
defaultEncoding_[0] = EvexEncoding; // use avx512-vnni not avx-vnni
defaultEncoding_[1] = VexEncoding; // use vmpsadbw(avx) not avx10.2
// select avx512-vnni, vmpsadbw(avx)
setDefaultEncoding();
labelMgr_.set(this);
}
void reset()
Expand Down Expand Up @@ -3171,7 +3171,7 @@ class CodeGenerator : public CodeArray {
#endif

// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding vnniEnc, PreferredEncoding mpsadbwEnc = VexEncoding)
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding mpsadbwEnc = VexEncoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = mpsadbwEnc; }

void sha1msg12(const Xmm& x, const Operand& op)
Expand Down
2 changes: 1 addition & 1 deletion xbyak/xbyak_mnemonic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1369,7 +1369,6 @@ void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x10); }
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x11); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x10); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x42, imm); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
Expand Down Expand Up @@ -2408,6 +2407,7 @@ void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2,
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
Expand Down

0 comments on commit 8552268

Please sign in to comment.