x86: Allow V1TI vector register pushes

Add V1TI vector register push and split it after reload to a sequence
of:

(set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
(set (match_dup 0) (match_dup 1))

so that STV pass can convert TI mode integer push to V1TI vector register
push.  Rename has_non_address_hard_reg to pseudo_reg_set, combine calls
of single_set and has_non_address_hard_reg to pseudo_reg_set, to ignore
pseudo register push.

Remove c-c++-common/dfp/func-vararg-mixed-2.c since it is compiled with
-mpreferred-stack-boundary=2 and leads to segfault:

Dump of assembler code for function __bid_nesd2:
   0x08049210 <+0>:	endbr32
   0x08049214 <+4>:	push   %esi
   0x08049215 <+5>:	push   %ebx
   0x08049216 <+6>:	call   0x8049130 <__x86.get_pc_thunk.bx>
   0x0804921b <+11>:	add    $0x8de5,%ebx
   0x08049221 <+17>:	sub    $0x20,%esp
   0x08049224 <+20>:	mov    0x30(%esp),%esi
   0x08049228 <+24>:	pushl  0x2c(%esp)
   0x0804922c <+28>:	call   0x804e600 <__bid32_to_bid64>
   0x08049231 <+33>:	mov    %esi,(%esp)
   0x08049234 <+36>:	movd   %edx,%xmm1
   0x08049238 <+40>:	movd   %eax,%xmm0
   0x0804923c <+44>:	punpckldq %xmm1,%xmm0
=> 0x08049240 <+48>:	movaps %xmm0,0x10(%esp)
   0x08049245 <+53>:	call   0x804e600 <__bid32_to_bid64>
   0x0804924a <+58>:	push   %edx
   0x0804924b <+59>:	push   %eax
   0x0804924c <+60>:	pushl  0x1c(%esp)
   0x08049250 <+64>:	pushl  0x1c(%esp)
   0x08049254 <+68>:	call   0x804b260 <__bid64_quiet_not_equal>
   0x08049259 <+73>:	add    $0x34,%esp
   0x0804925c <+76>:	pop    %ebx
   0x0804925d <+77>:	pop    %esi
   0x0804925e <+78>:	ret

when libgcc is compiled with -msse2.  According to GCC manual:

'-mpreferred-stack-boundary=NUM'
     Attempt to keep the stack boundary aligned to a 2 raised to NUM
     byte boundary.  If '-mpreferred-stack-boundary' is not specified,
     the default is 4 (16 bytes or 128-bits).

     *Warning:* If you use this switch, then you must build all modules
     with the same value, including any libraries.  This includes the
     system libraries and startup modules.

c-c++-common/dfp/func-vararg-mixed-2.c, which was added by

commit 3b2488ca6e
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Wed Jul 30 19:24:02 2008 +0000

    func-vararg-alternate-d128-2.c: New.

    2008-07-30  H.J. Lu  <hongjiu.lu@intel.com>
                Joey Ye  <joey.ye@intel.com>

            * gcc.dg/dfp/func-vararg-alternate-d128-2.c: New.
            * gcc.dg/dfp/func-vararg-mixed-2.c: Likewise.

isn't expected to work with libgcc.

gcc/

	PR target/95021
	* config/i386/i386-features.c (has_non_address_hard_reg):
	Renamed to ...
	(pseudo_reg_set): This.  Return the SET expression.  Ignore
	pseudo register push.
	(general_scalar_to_vector_candidate_p): Combine single_set and
	has_non_address_hard_reg calls to pseudo_reg_set.
	(timode_scalar_to_vector_candidate_p): Likewise.
	* config/i386/i386.md (*pushv1ti2): New pattern.

gcc/testsuite/

	PR target/95021
	* c-c++-common/dfp/func-vararg-mixed-2.c: Removed.
	* gcc.target/i386/pr95021-1.c: New test.
	* gcc.target/i386/pr95021-2.c: Likewise.
	* gcc.target/i386/pr95021-3.c: Likewise.
	* gcc.target/i386/pr95021-4.c: Likewise.
	* gcc.target/i386/pr95021-5.c: Likewise.
This commit is contained in:
H.J. Lu 2020-05-17 10:10:34 -07:00
parent e977a5df5b
commit 266f44a91c
10 changed files with 203 additions and 121 deletions

View File

@ -1,3 +1,15 @@
2020-05-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/95021
* config/i386/i386-features.c (has_non_address_hard_reg):
Renamed to ...
(pseudo_reg_set): This. Return the SET expression. Ignore
pseudo register push.
(general_scalar_to_vector_candidate_p): Combine single_set and
has_non_address_hard_reg calls to pseudo_reg_set.
(timode_scalar_to_vector_candidate_p): Likewise.
* config/i386/i386.md (*pushv1ti2): New pattern.
2020-05-17 Aldy Hernandez <aldyh@redhat.com>
Revert:

View File

@ -1253,25 +1253,36 @@ scalar_chain::convert ()
return converted_insns;
}
/* Return 1 if INSN uses or defines a hard register.
Hard register uses in a memory address are ignored.
Clobbers and flags definitions are ignored. */
/* Return the SET expression if INSN doesn't reference hard register.
Return NULL if INSN uses or defines a hard register, excluding
pseudo register pushes, hard register uses in a memory address,
clobbers and flags definitions. */
static bool
has_non_address_hard_reg (rtx_insn *insn)
static rtx
pseudo_reg_set (rtx_insn *insn)
{
rtx set = single_set (insn);
if (!set)
return NULL;
/* Check pseudo register push first. */
if (REG_P (SET_SRC (set))
&& !HARD_REGISTER_P (SET_SRC (set))
&& push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))))
return set;
df_ref ref;
FOR_EACH_INSN_DEF (ref, insn)
if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
&& DF_REF_REGNO (ref) != FLAGS_REG)
return true;
return NULL;
FOR_EACH_INSN_USE (ref, insn)
if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
return true;
return NULL;
return false;
return set;
}
/* Check if comparison INSN may be transformed
@ -1345,14 +1356,11 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
static bool
general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
{
rtx def_set = single_set (insn);
rtx def_set = pseudo_reg_set (insn);
if (!def_set)
return false;
if (has_non_address_hard_reg (insn))
return false;
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
@ -1442,14 +1450,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
static bool
timode_scalar_to_vector_candidate_p (rtx_insn *insn)
{
rtx def_set = single_set (insn);
rtx def_set = pseudo_reg_set (insn);
if (!def_set)
return false;
if (has_non_address_hard_reg (insn))
return false;
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);

View File

@ -1674,6 +1674,22 @@
;; Push/pop instructions.
(define_insn_and_split "*pushv1ti2"
[(set (match_operand:V1TI 0 "push_operand" "=<")
(match_operand:V1TI 1 "register_operand" "v"))]
"TARGET_64BIT && TARGET_STV"
"#"
"&& reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
(set (match_dup 0) (match_dup 1))]
{
operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
/* Preserve memory attributes. */
operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
}
[(set_attr "type" "multi")
(set_attr "mode" "TI")])
(define_insn "*push<mode>2"
[(set (match_operand:DWI 0 "push_operand" "=<,<")
(match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]

View File

@ -1,3 +1,13 @@
2020-05-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/95021
* c-c++-common/dfp/func-vararg-mixed-2.c: Removed.
* gcc.target/i386/pr95021-1.c: New test.
* gcc.target/i386/pr95021-2.c: Likewise.
* gcc.target/i386/pr95021-3.c: Likewise.
* gcc.target/i386/pr95021-4.c: Likewise.
* gcc.target/i386/pr95021-5.c: Likewise.
2020-05-17 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/strncmp-1.c: New test.

View File

@ -1,105 +0,0 @@
/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
/* { dg-options "-mpreferred-stack-boundary=2" } */
/* C99 6.5.2.2 Function calls.
Test passing varargs of the combination of decimal float types and
other types. */
#include <stdarg.h>
#include "dfp-dbg.h"
/* Supposing the list of varying number of arguments is:
unsigned int, _Decimal128, double, _Decimal32, _Decimal64. */
static _Decimal32
vararg_d32 (unsigned arg, ...)
{
va_list ap;
_Decimal32 result;
va_start (ap, arg);
va_arg (ap, unsigned int);
va_arg (ap, _Decimal128);
va_arg (ap, double);
result = va_arg (ap, _Decimal32);
va_end (ap);
return result;
}
static _Decimal32
vararg_d64 (unsigned arg, ...)
{
va_list ap;
_Decimal64 result;
va_start (ap, arg);
va_arg (ap, unsigned int);
va_arg (ap, _Decimal128);
va_arg (ap, double);
va_arg (ap, _Decimal32);
result = va_arg (ap, _Decimal64);
va_end (ap);
return result;
}
static _Decimal128
vararg_d128 (unsigned arg, ...)
{
va_list ap;
_Decimal128 result;
va_start (ap, arg);
va_arg (ap, unsigned int);
result = va_arg (ap, _Decimal128);
va_end (ap);
return result;
}
static unsigned int
vararg_int (unsigned arg, ...)
{
va_list ap;
unsigned int result;
va_start (ap, arg);
result = va_arg (ap, unsigned int);
va_end (ap);
return result;
}
static double
vararg_double (unsigned arg, ...)
{
va_list ap;
float result;
va_start (ap, arg);
va_arg (ap, unsigned int);
va_arg (ap, _Decimal128);
result = va_arg (ap, double);
va_end (ap);
return result;
}
int
main ()
{
if (vararg_d32 (3, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 3.0df) FAILURE
if (vararg_d64 (4, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 4.0dd) FAILURE
if (vararg_d128 (1, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 1.0dl) FAILURE
if (vararg_int (0, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 0) FAILURE
if (vararg_double (2, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 2.0) FAILURE
FINISH
}

View File

@ -0,0 +1,27 @@
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2 -msse2 -mstv -W" } */
/* { dg-final { scan-assembler "movq\[ \t\]%xmm\[0-9\]+, \\(%esp\\)" } } */
/* { dg-final { scan-assembler-not "psrlq" } } */
#include <setjmp.h>
extern jmp_buf buf;
extern long long *target_p;
extern long long *c;
extern void foo (long long);
__attribute__ ((noclone, noinline))
void
bar (void)
{
if (setjmp (buf))
{
long long target = *target_p;
*c = target;
foo (target);
}
else
foo (0);
}

View File

@ -0,0 +1,39 @@
/* { dg-do run { target ia32 } } */
/* { dg-require-effective-target sse2_runtime } */
/* { dg-options "-O2 -msse2 -mstv -W" } */
#include <stdlib.h>
#include "pr95021-1.c"
jmp_buf buf;
long long *target_p;
long long *c;
int count;
__attribute__ ((noclone, noinline))
void
foo (long long x)
{
if (x != *c)
abort ();
if (!count)
{
count++;
longjmp (buf, 1);
}
}
int
main ()
{
long long val = 30;
long long local = 0;
target_p = &val;
c = &local;
bar ();
if (val != local)
abort ();
return 0;
}

View File

@ -0,0 +1,5 @@
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
#include "pr95021-1.c"

View File

@ -0,0 +1,28 @@
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -msse2 -mstv -W" } */
/* { dg-final { scan-assembler "(movaps|vmovdqa)\[ \t\]%xmm\[0-9\]+, \\(%rsp\\)" } } */
#include <setjmp.h>
extern jmp_buf buf;
extern __int128 *target_p;
__int128 *c;
extern int count;
extern void foo (__int128, __int128, __int128, __int128);
__attribute__ ((noclone, noinline))
void
bar (void)
{
if (setjmp (buf))
{
__int128 target = *target_p;
*c = target;
foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, target);
}
else
foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, 0);
}

View File

@ -0,0 +1,45 @@
/* { dg-do run { target int128 } } */
/* { dg-require-effective-target sse2_runtime } */
/* { dg-options "-O2 -msse2 -mstv -W" } */
#include <stdlib.h>
#include "pr95021-4.c"
jmp_buf buf;
__int128 *target_p;
__int128 *c;
int count;
__attribute__ ((noclone, noinline))
void
foo (__int128 i1, __int128 i2, __int128 i3, __int128 x)
{
if (i1 != 0xbadbeef1)
abort ();
if (i2 != 0x2badbeef)
abort ();
if (i3 != 0xbad3beef)
abort ();
if (x != *c)
abort ();
if (!count)
{
count++;
longjmp (buf, 1);
}
}
int
main ()
{
__int128 val = 30;
__int128 local = 0;
target_p = &val;
c = &local;
bar ();
if (val != local)
abort ();
return 0;
}