re PR rtl-optimization/77541 (wrong code with 512bit vectors of int128 @ -O1)

2016-11-24  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/77541
	* lra-constraints.c (struct input_reload): Add field match_p.
	(get_reload_reg): Check modes of input reloads to generate unique
	value reload pseudo.
	(match_reload): Add input reload pseudo for the current insn.

2016-11-24  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/77541
	* gcc.target/i386/pr77541.c: New.

From-SVN: r242848
This commit is contained in:
Vladimir Makarov 2016-11-24 19:54:27 +00:00 committed by Vladimir Makarov
parent fba5a793b3
commit 3f156a6ce0
4 changed files with 92 additions and 31 deletions

View File

@ -1,3 +1,11 @@
2016-11-24 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/77541
* lra-constraints.c (struct input_reload): Add field match_p.
(get_reload_reg): Check modes of input reloads to generate unique
value reload pseudo.
(match_reload): Add input reload pseudo for the current insn.
2016-11-24 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update

View File

@ -529,6 +529,8 @@ init_curr_operand_mode (void)
/* Structure describes input reload of the current insns. */
struct input_reload
{
/* True for input reload of matched operands. */
bool match_p;
/* Reloaded value. */
rtx input;
/* Reload pseudo used. */
@ -563,6 +565,7 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
{
int i, regno;
enum reg_class new_class;
bool unique_p = false;
if (type == OP_OUT)
{
@ -574,39 +577,53 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original,
e.g. volatile memory. */
if (! side_effects_p (original))
for (i = 0; i < curr_insn_input_reloads_num; i++)
if (rtx_equal_p (curr_insn_input_reloads[i].input, original)
&& in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class))
{
rtx reg = curr_insn_input_reloads[i].reg;
regno = REGNO (reg);
/* If input is equal to original and both are VOIDmode,
GET_MODE (reg) might be still different from mode.
Ensure we don't return *result_reg with wrong mode. */
if (GET_MODE (reg) != mode)
{
if (in_subreg_p)
continue;
if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode))
continue;
reg = lowpart_subreg (mode, reg, GET_MODE (reg));
if (reg == NULL_RTX || GET_CODE (reg) != SUBREG)
continue;
}
*result_reg = reg;
if (lra_dump_file != NULL)
{
fprintf (lra_dump_file, " Reuse r%d for reload ", regno);
dump_value_slim (lra_dump_file, original, 1);
}
if (new_class != lra_get_allocno_class (regno))
lra_change_class (regno, new_class, ", change to", false);
if (lra_dump_file != NULL)
fprintf (lra_dump_file, "\n");
return false;
}
*result_reg = lra_create_new_reg (mode, original, rclass, title);
{
if (! curr_insn_input_reloads[i].match_p
&& rtx_equal_p (curr_insn_input_reloads[i].input, original)
&& in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class))
{
rtx reg = curr_insn_input_reloads[i].reg;
regno = REGNO (reg);
/* If input is equal to original and both are VOIDmode,
GET_MODE (reg) might be still different from mode.
Ensure we don't return *result_reg with wrong mode. */
if (GET_MODE (reg) != mode)
{
if (in_subreg_p)
continue;
if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode))
continue;
reg = lowpart_subreg (mode, reg, GET_MODE (reg));
if (reg == NULL_RTX || GET_CODE (reg) != SUBREG)
continue;
}
*result_reg = reg;
if (lra_dump_file != NULL)
{
fprintf (lra_dump_file, " Reuse r%d for reload ", regno);
dump_value_slim (lra_dump_file, original, 1);
}
if (new_class != lra_get_allocno_class (regno))
lra_change_class (regno, new_class, ", change to", false);
if (lra_dump_file != NULL)
fprintf (lra_dump_file, "\n");
return false;
}
/* If we have an input reload with a different mode, make sure it
will get a different hard reg. */
else if (REG_P (original)
&& REG_P (curr_insn_input_reloads[i].input)
&& REGNO (original) == REGNO (curr_insn_input_reloads[i].input)
&& (GET_MODE (original)
!= GET_MODE (curr_insn_input_reloads[i].input)))
unique_p = true;
}
*result_reg = (unique_p
? lra_create_new_reg_with_unique_value
: lra_create_new_reg) (mode, original, rclass, title);
lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
curr_insn_input_reloads[curr_insn_input_reloads_num].input = original;
curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = false;
curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = *result_reg;
return true;
}
@ -1002,6 +1019,12 @@ match_reload (signed char out, signed char *ins, signed char *outs,
lra_emit_move (copy_rtx (new_in_reg), in_rtx);
*before = get_insns ();
end_sequence ();
/* Add the new pseudo to consider values of subsequent input reload
pseudos. */
lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
curr_insn_input_reloads[curr_insn_input_reloads_num].input = in_rtx;
curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = true;
curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = new_in_reg;
for (i = 0; (in = ins[i]) >= 0; i++)
{
lra_assert

View File

@ -1,3 +1,8 @@
2016-11-24 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/77541
* gcc.target/i386/pr77541.c: New.
2016-11-24 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/78500

View File

@ -0,0 +1,25 @@
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -Wno-psabi" } */
#define MAGIC 0x0706050403020100
typedef unsigned long long u64;
typedef unsigned __int128 v64u128 __attribute__ ((vector_size (64)));
v64u128 __attribute__ ((noinline, noclone))
foo (u64 x1, u64 x2, u64 x3, u64 x4, v64u128 x5)
{
(void)x1, (void)x2;
x4 >>= x4 & 63;
return x3 + x4 + x5;
}
int
main ()
{
v64u128 x = foo (0, 0, 0, MAGIC, (v64u128) {});
if (x[0] != MAGIC || x[1] != MAGIC || x[2] != MAGIC || x[3] != MAGIC)
__builtin_abort();
return 0;
}