PR rtl-optimization/7061: Complex number arguments on x86_64-like ABIs.
This patch addresses the issue in comment #6 of PR rtl-optimization/7061 (a four digit PR number) from 2006 where on x86_64 complex number arguments are unconditionally spilled to the stack. For the test cases below: float re(float _Complex a) { return __real__ a; } float im(float _Complex a) { return __imag__ a; } GCC with -O2 currently generates: re: movq %xmm0, -8(%rsp) movss -8(%rsp), %xmm0 ret im: movq %xmm0, -8(%rsp) movss -4(%rsp), %xmm0 ret with this patch we now generate: re: ret im: movq %xmm0, %rax shrq $32, %rax movd %eax, %xmm0 ret [Technically, this shift can be performed on %xmm0 in a single instruction, but the backend needs to be taught to do that, the important bit is that the SCmode argument isn't written to the stack]. The patch itself is to emit_group_store where just before RTL expansion commits to writing to the stack, we check if the store group consists of a single scalar integer register that holds a complex mode value; on x86_64 SCmode arguments are passed in DImode registers. If this is the case, we can use a SUBREG to "view_convert" the integer to the equivalent complex mode. An interesting corner case that showed up during testing is that x86_64 also passes HCmode arguments in DImode registers(!), i.e. using modes of different sizes. This is easily handled/supported by first converting to an integer mode of the correct size, and then generating a complex mode SUBREG of this. This is similar in concept to the patch I proposed here: https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590139.html 2020-06-10 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR rtl-optimization/7061 * expr.cc (emit_group_store): For groups that consist of a single scalar integer register that hold a complex mode value, use gen_lowpart to generate a SUBREG to "view_convert" to the complex mode. For modes of different sizes, first convert to an integer mode of the appropriate size. gcc/testsuite/ChangeLog PR rtl-optimization/7061 * gcc.target/i386/pr7061-1.c: New test case. * gcc.target/i386/pr7061-2.c: New test case.
This commit is contained in:
parent
b370ed0bf9
commit
1753a71201
18
gcc/expr.cc
18
gcc/expr.cc
|
@ -2801,10 +2801,26 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED,
|
||||||
{
|
{
|
||||||
machine_mode dest_mode = GET_MODE (dest);
|
machine_mode dest_mode = GET_MODE (dest);
|
||||||
machine_mode tmp_mode = GET_MODE (tmps[i]);
|
machine_mode tmp_mode = GET_MODE (tmps[i]);
|
||||||
|
scalar_int_mode imode;
|
||||||
|
|
||||||
gcc_assert (known_eq (bytepos, 0) && XVECLEN (src, 0));
|
gcc_assert (known_eq (bytepos, 0) && XVECLEN (src, 0));
|
||||||
|
|
||||||
if (GET_MODE_ALIGNMENT (dest_mode)
|
if (finish == 1
|
||||||
|
&& REG_P (tmps[i])
|
||||||
|
&& COMPLEX_MODE_P (dest_mode)
|
||||||
|
&& SCALAR_INT_MODE_P (tmp_mode)
|
||||||
|
&& int_mode_for_mode (dest_mode).exists (&imode))
|
||||||
|
{
|
||||||
|
if (tmp_mode != imode)
|
||||||
|
{
|
||||||
|
rtx tmp = gen_reg_rtx (imode);
|
||||||
|
emit_move_insn (tmp, gen_lowpart (imode, tmps[i]));
|
||||||
|
dst = gen_lowpart (dest_mode, tmp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
dst = gen_lowpart (dest_mode, tmps[i]);
|
||||||
|
}
|
||||||
|
else if (GET_MODE_ALIGNMENT (dest_mode)
|
||||||
>= GET_MODE_ALIGNMENT (tmp_mode))
|
>= GET_MODE_ALIGNMENT (tmp_mode))
|
||||||
{
|
{
|
||||||
dest = assign_stack_temp (dest_mode,
|
dest = assign_stack_temp (dest_mode,
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
/* { dg-do compile { target { ! ia32 } } } */
|
||||||
|
/* { dg-options "-O2" } */
|
||||||
|
float re(float _Complex a) { return __real__ a; }
|
||||||
|
/* { dg-final { scan-assembler-not "mov" } } */
|
|
@ -0,0 +1,5 @@
|
||||||
|
/* { dg-do compile { target { ! ia32 } } } */
|
||||||
|
/* { dg-options "-O2" } */
|
||||||
|
float im(float _Complex a) { return __imag__ a; }
|
||||||
|
/* { dg-final { scan-assembler-not "movss" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "rsp" } } */
|
Loading…
Reference in New Issue