re PR rtl-optimization/64286 (Redundant extend removal ignores vector element type)
PR rtl-optimization/64286 * ree.c (combine_reaching_defs): Move part of comment earlier, remove !SCALAR_INT_MODE_P check. (add_removable_extension): Don't add vector mode extensions if all uses of the source register aren't the same vector extensions. * gcc.target/i386/avx2-pr64286.c: New test. From-SVN: r219550
This commit is contained in:
parent
6e5a37dae7
commit
860dadcbea
|
@ -1,3 +1,12 @@
|
|||
2015-01-13 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/64286
|
||||
* ree.c (combine_reaching_defs): Move part of comment earlier,
|
||||
remove !SCALAR_INT_MODE_P check.
|
||||
(add_removable_extension): Don't add vector mode
|
||||
extensions if all uses of the source register aren't the same
|
||||
vector extensions.
|
||||
|
||||
2015-01-13 Renlin Li <renlin.li@arm.com>
|
||||
|
||||
* config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO): Return 2.
|
||||
|
|
75
gcc/ree.c
75
gcc/ree.c
|
@ -783,6 +783,17 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
|
|||
!= REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
|
||||
if (copy_needed)
|
||||
{
|
||||
/* Considering transformation of
|
||||
(set (reg1) (expression))
|
||||
...
|
||||
(set (reg2) (any_extend (reg1)))
|
||||
|
||||
into
|
||||
|
||||
(set (reg2) (any_extend (expression)))
|
||||
(set (reg1) (reg2))
|
||||
... */
|
||||
|
||||
/* In theory we could handle more than one reaching def, it
|
||||
just makes the code to update the insn stream more complex. */
|
||||
if (state->defs_list.length () != 1)
|
||||
|
@ -798,18 +809,6 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
|
|||
if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
|
||||
return false;
|
||||
|
||||
/* Transformation of
|
||||
(set (reg1) (expression))
|
||||
(set (reg2) (any_extend (reg1)))
|
||||
into
|
||||
(set (reg2) (any_extend (expression)))
|
||||
(set (reg1) (reg2))
|
||||
is only valid for scalar integral modes, as it relies on the low
|
||||
subreg of reg1 to have the value of (expression), which is not true
|
||||
e.g. for vector modes. */
|
||||
if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
|
||||
return false;
|
||||
|
||||
machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
|
||||
rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
|
||||
|
||||
|
@ -1027,6 +1026,7 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
|
|||
different extension. FIXME: this obviously can be improved. */
|
||||
for (def = defs; def; def = def->next)
|
||||
if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
|
||||
&& idx != -1U
|
||||
&& (cand = &(*insn_list)[idx - 1])
|
||||
&& cand->code != code)
|
||||
{
|
||||
|
@ -1038,6 +1038,57 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
|
|||
}
|
||||
return;
|
||||
}
|
||||
/* For vector mode extensions, ensure that all uses of the
|
||||
XEXP (src, 0) register are the same extension (both code
|
||||
and to which mode), as unlike integral extensions lowpart
|
||||
subreg of the sign/zero extended register are not equal
|
||||
to the original register, so we have to change all uses or
|
||||
none. */
|
||||
else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
|
||||
{
|
||||
if (idx == 0)
|
||||
{
|
||||
struct df_link *ref_chain, *ref_link;
|
||||
|
||||
ref_chain = DF_REF_CHAIN (def->ref);
|
||||
for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
|
||||
{
|
||||
if (ref_link->ref == NULL
|
||||
|| DF_REF_INSN_INFO (ref_link->ref) == NULL)
|
||||
{
|
||||
idx = -1U;
|
||||
break;
|
||||
}
|
||||
rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
|
||||
const_rtx use_set;
|
||||
if (use_insn == insn || DEBUG_INSN_P (use_insn))
|
||||
continue;
|
||||
if (!(use_set = single_set (use_insn))
|
||||
|| !REG_P (SET_DEST (use_set))
|
||||
|| GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
|
||||
|| GET_CODE (SET_SRC (use_set)) != code
|
||||
|| !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
|
||||
XEXP (src, 0)))
|
||||
{
|
||||
idx = -1U;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (idx == -1U)
|
||||
def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
|
||||
}
|
||||
if (idx == -1U)
|
||||
{
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Cannot eliminate extension:\n");
|
||||
print_rtl_single (dump_file, insn);
|
||||
fprintf (dump_file,
|
||||
" because some vector uses aren't extension\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Then add the candidate to the list and insert the reaching definitions
|
||||
into the definition map. */
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2015-01-13 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/64286
|
||||
* gcc.target/i386/avx2-pr64286.c: New test.
|
||||
|
||||
2015-01-13 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/57626
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/* PR rtl-optimization/64286 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx2" } */
|
||||
/* { dg-require-effective-target avx2 } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <x86intrin.h>
|
||||
#include "avx2-check.h"
|
||||
|
||||
__m128i v;
|
||||
__m256i w;
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
foo (__m128i *p, __m128i *q)
|
||||
{
|
||||
__m128i a = _mm_loadu_si128 (p);
|
||||
__m128i b = _mm_xor_si128 (a, v);
|
||||
w = _mm256_cvtepu8_epi16 (a);
|
||||
*q = b;
|
||||
}
|
||||
|
||||
static void
|
||||
avx2_test (void)
|
||||
{
|
||||
v = _mm_set1_epi8 (0x40);
|
||||
__m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
|
||||
__m128i d;
|
||||
foo (&c, &d);
|
||||
__m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49,
|
||||
0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41);
|
||||
__m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9,
|
||||
8, 7, 6, 5, 4, 3, 2, 1);
|
||||
if (memcmp (&w, &f, sizeof (w)) != 0
|
||||
|| memcmp (&d, &e, sizeof (d)) != 0)
|
||||
abort ();
|
||||
}
|
Loading…
Reference in New Issue