Skip to content

Commit 7e69118

Browse files
committed
i386: Fix up copysign/xorsign expansion [PR104612]
We ICE on the following testcase for -m32 since r12-3435. because operands[2] is (subreg:SF (reg:DI ...) 0) and lowpart_subreg (V4SFmode, operands[2], SFmode) returns NULL, and that is what we use in AND etc. insns we emit. My earlier version of the patch fixes that by calling force_reg for the input operands, to make sure they are really REGs and so lowpart_subreg will succeed on them - even for theoretical MEMs using REGs there seems desirable, we don't want to read following memory slots for the paradoxical subreg. For the outputs, I thought we'd get better code by always computing result into a new pseudo and them move lowpart of that pseudo into dest. Unfortunately it regressed FAIL: gcc.target/i386/pr89984-2.c scan-assembler-not vmovaps on which the patch changes: vandps .LC0(%rip), %xmm1, %xmm1 - vxorps %xmm0, %xmm1, %xmm0 + vxorps %xmm0, %xmm1, %xmm1 + vmovaps %xmm1, %xmm0 ret The RA sees: (insn 8 4 9 2 (set (reg:V4SF 85) (and:V4SF (subreg:V4SF (reg:SF 90) 0) (mem/u/c:V4SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S16 A128]))) "pr89984-2.c":7:12 2838 {*andv4sf3} (expr_list:REG_DEAD (reg:SF 90) (nil))) (insn 9 8 10 2 (set (reg:V4SF 87) (xor:V4SF (reg:V4SF 85) (subreg:V4SF (reg:SF 89) 0))) "pr89984-2.c":7:12 2842 {*xorv4sf3} (expr_list:REG_DEAD (reg:SF 89) (expr_list:REG_DEAD (reg:V4SF 85) (nil)))) (insn 10 9 14 2 (set (reg:SF 82 [ <retval> ]) (subreg:SF (reg:V4SF 87) 0)) "pr89984-2.c":7:12 142 {*movsf_internal} (expr_list:REG_DEAD (reg:V4SF 87) (nil))) (insn 14 10 15 2 (set (reg/i:SF 20 xmm0) (reg:SF 82 [ <retval> ])) "pr89984-2.c":8:1 142 {*movsf_internal} (expr_list:REG_DEAD (reg:SF 82 [ <retval> ]) (nil))) (insn 15 14 0 2 (use (reg/i:SF 20 xmm0)) "pr89984-2.c":8:1 -1 (nil)) and doesn't know that if it would use xmm0 not just for pseudo 82 but also for pseudo 87, it could create a noop move in insn 10 and so could avoid an extra register copy and nothing later on is able to figure that out either. I don't know how the RA should know that though. So that we don't regress, this version of the patch will do this stuff (i.e. use fresh vector pseudo as destination and then move lowpart of that to dest) over what it used before (i.e. use paradoxical subreg of the dest) only if lowpart_subreg returns NULL. 2022-02-22 Jakub Jelinek <[email protected]> PR target/104612 * config/i386/i386-expand.cc (ix86_expand_copysign): Call force_reg on input operands before calling lowpart_subreg on it. For output operand, use a vmode pseudo as destination and then move its lowpart subreg into operands[0] if lowpart_subreg fails on dest. (ix86_expand_xorsign): Likewise. * gcc.dg/pr104612.c: New test.
1 parent 6263b65 commit 7e69118

File tree

2 files changed

+54
-11
lines changed

2 files changed

+54
-11
lines changed

gcc/config/i386/i386-expand.cc

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,7 +2153,7 @@ void
21532153
ix86_expand_copysign (rtx operands[])
21542154
{
21552155
machine_mode mode, vmode;
2156-
rtx dest, op0, op1, mask, op2, op3;
2156+
rtx dest, vdest, op0, op1, mask, op2, op3;
21572157

21582158
mode = GET_MODE (operands[0]);
21592159

@@ -2174,8 +2174,13 @@ ix86_expand_copysign (rtx operands[])
21742174
return;
21752175
}
21762176

2177-
dest = lowpart_subreg (vmode, operands[0], mode);
2178-
op1 = lowpart_subreg (vmode, operands[2], mode);
2177+
dest = operands[0];
2178+
vdest = lowpart_subreg (vmode, dest, mode);
2179+
if (vdest == NULL_RTX)
2180+
vdest = gen_reg_rtx (vmode);
2181+
else
2182+
dest = NULL_RTX;
2183+
op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
21792184
mask = ix86_build_signbit_mask (vmode, 0, 0);
21802185

21812186
if (CONST_DOUBLE_P (operands[1]))
@@ -2184,7 +2189,9 @@ ix86_expand_copysign (rtx operands[])
21842189
/* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
21852190
if (op0 == CONST0_RTX (mode))
21862191
{
2187-
emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
2192+
emit_move_insn (vdest, gen_rtx_AND (vmode, mask, op1));
2193+
if (dest)
2194+
emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
21882195
return;
21892196
}
21902197

@@ -2193,15 +2200,17 @@ ix86_expand_copysign (rtx operands[])
21932200
op0 = force_reg (vmode, op0);
21942201
}
21952202
else
2196-
op0 = lowpart_subreg (vmode, operands[1], mode);
2203+
op0 = lowpart_subreg (vmode, force_reg (mode, operands[1]), mode);
21972204

21982205
op2 = gen_reg_rtx (vmode);
21992206
op3 = gen_reg_rtx (vmode);
22002207
emit_move_insn (op2, gen_rtx_AND (vmode,
22012208
gen_rtx_NOT (vmode, mask),
22022209
op0));
22032210
emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
2204-
emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
2211+
emit_move_insn (vdest, gen_rtx_IOR (vmode, op2, op3));
2212+
if (dest)
2213+
emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
22052214
}
22062215

22072216
/* Expand an xorsign operation. */
@@ -2210,7 +2219,7 @@ void
22102219
ix86_expand_xorsign (rtx operands[])
22112220
{
22122221
machine_mode mode, vmode;
2213-
rtx dest, op0, op1, mask, x, temp;
2222+
rtx dest, vdest, op0, op1, mask, x, temp;
22142223

22152224
dest = operands[0];
22162225
op0 = operands[1];
@@ -2230,15 +2239,22 @@ ix86_expand_xorsign (rtx operands[])
22302239
temp = gen_reg_rtx (vmode);
22312240
mask = ix86_build_signbit_mask (vmode, 0, 0);
22322241

2233-
op1 = lowpart_subreg (vmode, op1, mode);
2242+
op1 = lowpart_subreg (vmode, force_reg (mode, op1), mode);
22342243
x = gen_rtx_AND (vmode, op1, mask);
22352244
emit_insn (gen_rtx_SET (temp, x));
22362245

2237-
op0 = lowpart_subreg (vmode, op0, mode);
2246+
op0 = lowpart_subreg (vmode, force_reg (mode, op0), mode);
22382247
x = gen_rtx_XOR (vmode, temp, op0);
22392248

2240-
dest = lowpart_subreg (vmode, dest, mode);
2241-
emit_insn (gen_rtx_SET (dest, x));
2249+
vdest = lowpart_subreg (vmode, dest, mode);
2250+
if (vdest == NULL_RTX)
2251+
vdest = gen_reg_rtx (vmode);
2252+
else
2253+
dest = NULL_RTX;
2254+
emit_insn (gen_rtx_SET (vdest, x));
2255+
2256+
if (dest)
2257+
emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
22422258
}
22432259

22442260
static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1);

gcc/testsuite/gcc.dg/pr104612.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* PR target/104612 */
2+
/* { dg-do compile } */
3+
/* { dg-options "-O2" } */
4+
/* { dg-additional-options "-msse2 -mfpmath=sse" { target i?86-*-* x86_64-*-* } } */
5+
6+
struct V { float x, y; };
7+
8+
struct V
9+
foo (struct V v)
10+
{
11+
struct V ret;
12+
ret.x = __builtin_copysignf (1.0e+0, v.x);
13+
ret.y = __builtin_copysignf (1.0e+0, v.y);
14+
return ret;
15+
}
16+
17+
float
18+
bar (struct V v)
19+
{
20+
return __builtin_copysignf (v.x, v.y);
21+
}
22+
23+
float
24+
baz (struct V v)
25+
{
26+
return v.x * __builtin_copysignf (1.0f, v.y);
27+
}

0 commit comments

Comments
 (0)