Skip to content

Commit e3f26e2

Browse files
author
H. Peter Anvin (Intel)
committed
Reshuffle shift and rotate patterns for APX
The shift and rotate patterns are "interesting" in the following way: 1. Even though only 4/5/6 bits of the input are ever used, for the regular instructions the input is specified as the CL register, but for the -X instructions as a size-matching register. This makes the optimization patterns "interesting." 2. The sequencing of legacy, VEX -X versions, APX EVEX, and APX -X For #1, allow any size register to contain the shift count. For #2, split up the macro generation of the patterns, and add a new "$xmacro" macro to deal with the combinatorics of generating all the -X patterns. Written directly in Perl since it seemed easier than trying to make anything more general for what is very much a special case... Reported-by: Maciej Wieczor-Retman <[email protected]> Signed-off-by: H. Peter Anvin (Intel) <[email protected]>
1 parent e39b856 commit e3f26e2

File tree

2 files changed

+54
-14
lines changed

2 files changed

+54
-14
lines changed

x86/insns.dat

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,12 @@ $bwdq TEST rm#,imm# [mi: o# f6# /0 i# ] 8086,SM
8080
;# The basic shift and rotate operations
8181
$shift ROL ROR RCL RCR SHL,SAL SHR - SAR
8282

83-
$dq RORX reg#,rm#*,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1
84-
$dq ROLX reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1
85-
$dq SHLX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM
86-
$dq SALX reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,SM,ND
87-
$dq SARX reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,SM
88-
$dq SHRX reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,SM
89-
90-
$dq ROR reg#,rm#,imm8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib] BMI2,SM0-1,ND,NF!,OPT
91-
$dq ROL reg#,rm#*,imm_known8 [rmi: vex+.lz.f2.0f3a.w# f0 /r ib^(d:1f/3f)] BMI2,SM0-1,ND,NF!,OPT
92-
$dq SHL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
93-
$dq SAL reg#,rm#*,reg# [rmv: vex+.lz.66.0f38.w# f7 /r] BMI2,ND,NF!,OPT
94-
$dq SAR reg#,rm#*,reg# [rmv: vex+.lz.f3.0f38.w# f7 /r] BMI2,ND,NF!,OPT
95-
$dq SHR reg#,rm#*,reg# [rmv: vex+.lz.f2.0f38.w# f7 /r] BMI2,ND,NF!,OPT
83+
; RORX, SHLX, SARX
84+
$xshift evex=0
85+
86+
;# APX EVEX versions
87+
$eshift ROL ROR RCL RCR SHL,SAL SHR - SAR
88+
$xshift evex=1
9689

9790
;# Other basic integer arithmetic
9891
$wd INC reg# [r: o# 40+r] 8086,NOLONG

x86/preinsns.pl

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,64 @@
3535
};
3636

3737
# Common pattern for the basic shift and rotate instructions
38+
# Separate legacy and EVEX versions because additional patterns are
39+
# needed to handle the -X VEX versions
3840
$macros{'shift'} = {
3941
'def' => *def_eightfold,
4042
'txt' => <<'EOL'
4143
$$bwdq $op rm#,unity [m-: o# d0# /$n] ] 8086,FL
4244
$$bwdq $op rm#,reg_cl [m-: o# d2# /$n] ] 8086,FL
45+
$$bwdq $op rm#,reg_cx [m-: o# d2# /$n] ] 8086,FL,ND
46+
$$bwdq $op rm#,reg_ecx [m-: o# d2# /$n] ] 8086,FL,ND
47+
$$bwdq $op rm#,reg_rcx [m-: o# d2# /$n] ] 8086,FL,ND
4348
$$bwdq $op rm#,imm8 [mi: o# c0# /$n ib,u] ] 186,FL
49+
EOL
50+
};
51+
52+
# APX EVEX versions
53+
$macros{'eshift'} = {
54+
'def' => *def_eightfold,
55+
'txt' => <<'EOL'
4456
$$bwdq $op reg#?,rm#,unity [vm-: evex.ndx.nf.l0.m4.o# d0# /$n ] $apx,FL,SM0-1
4557
$$bwdq $op reg#?,rm#,reg_cl [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1
58+
$$bwdq $op reg#?,rm#,reg_cx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
59+
$$bwdq $op reg#?,rm#,reg_ecx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
60+
$$bwdq $op reg#?,rm#,reg_rcx [vm-: evex.ndx.nf.l0.m4.o# d2# /$n ] $apx,FL,SM0-1,ND
4661
$$bwdq $op reg#?,rm#,imm8 [vmi: evex.ndx.nf.l0.m4.o# c0# /$n ib,u ] $apx,FL,SM0-1
4762
EOL
4863
};
4964

65+
# -X shifts
66+
$macros{'xshift'} = {
67+
'func' => sub {
68+
my($mac, $args, $rawargs) = @_;
69+
my @ol;
70+
my $vex = 'vex';
71+
my $vfl = '';
72+
if (grep { /^evex=1$/ } @$rawargs) {
73+
$vex = 'evex';
74+
$vfl = 'APX';
75+
}
76+
foreach my $xf (['X',"$vfl"], ['', "$vfl,ND,NF!,OPT"]) {
77+
my($x,$fl) = @$xf;
78+
foreach my $os (32, 64) {
79+
my $w = ($os eq 32) ? 'w0' : 'w1';
80+
my $ixor = sprintf('%02x', $os-1);
81+
push(@ol, "ROR$x reg$os,rm$os,imm8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib] BMI2,SM0-1,!FL,$fl");
82+
push(@ol, "ROL$x reg$os,rm$os,imm_known8 [rmi: $vex.lz.f2.0f3a.$w f0 /r ib^$ixor] BMI2,SM0-1,!FL,$fl");
83+
foreach my $ss (8, 16, 32, 64) {
84+
foreach my $opp (['SHL','66'], ['SAL','66'], ['SAR','f3'], ['SHR','f2']) {
85+
my($op,$pp) = @$opp;
86+
my $ndss = ',ND' unless ($ss == $os && $op ne 'SAR');
87+
push(@ol, "$op$x reg$os,rm${os}*,reg$ss [rmv: $vex.lz.$pp.0f38.$w f7 /r] BMI2,SM0-1,!FL,$fl,$ndss");
88+
}
89+
}
90+
}
91+
}
92+
return @ol;
93+
}
94+
};
95+
5096
#
5197
# Common pattern for multiple 32/64, 16/32/64, or 8/16/32/64 instructions.
5298
# 'z' is used for a null-prefixed default-sized instruction (osm/osd)
@@ -433,7 +479,8 @@ (@)
433479
## XXX: check: CMPSS, CMPSD
434480
## XXX: check VEX encoded instructions that do not write
435481

436-
# Instructions which (possibly) change the flags
482+
# Instructions which (possibly) change the flags without annotations
483+
# The FL or !FL flags will override this
437484
my $flaggy = '^(aa[adms]|ad[dc]|ad[co]x|aes\w*kl|and|andn|arpl|bextr|bl[sc]ic?|bl[sc]msk|bl[sc]r|\
438485
bs[rf]|bt|bt[crs]|bzhi|clac|clc|cld|cli|clrssbsy|cmc|cmp|cmpxchg.*|da[as]|dec|div|\
439486
encodekey.*|enqcmd.*|fu?comip?|idiv|imul|inc|iret.*|kortest.*|ktest.*|lar|loadiwkey|\

0 commit comments

Comments
 (0)