re PR target/32661 (__builtin_ia32_vec_ext suboptimal for pointer/ref args)
PR target/32661 * simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]: Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as operand) when top VEC_SELECT extracts scalar element. * config/i386/sse.md (*vec_extract_v4si_mem): New. (*vec_extract_v4sf_mem): Ditto. testsuite/ChangeLog: PR target/32661 * gcc.target/i386/pr32661.c: New test. From-SVN: r127857
This commit is contained in:
parent
0b738568da
commit
7f97f938a7
|
@ -1,3 +1,12 @@
|
|||
2007-08-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/32661
|
||||
* simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]:
|
||||
Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as
|
||||
operand) when top VEC_SELECT extracts scalar element.
|
||||
* config/i386/sse.md (*vec_extract_v4si_mem): New pattern.
|
||||
(*vec_extract_v4sf_mem): Ditto.
|
||||
|
||||
2007-08-28 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/32370
|
||||
|
@ -604,8 +613,8 @@
|
|||
|
||||
2007-08-23 Brian Sidebotham <brian.sidebotham@gmail.com>
|
||||
|
||||
* configure.ac (leb128): Modify sed statement to work with any binutils
|
||||
version string.
|
||||
* configure.ac (leb128): Modify sed statement to work with any
|
||||
binutils version string.
|
||||
* configure: Regenerate
|
||||
|
||||
2007-08-23 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
|
||||
|
|
|
@ -1542,6 +1542,22 @@
|
|||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_insn_and_split "*vec_extract_v4sf_mem"
|
||||
[(set (match_operand:SF 0 "register_operand" "=x*rf")
|
||||
(vec_select:SF
|
||||
(match_operand:V4SF 1 "memory_operand" "o")
|
||||
(parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
|
||||
""
|
||||
"#"
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
int i = INTVAL (operands[2]);
|
||||
|
||||
emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extractv4sf"
|
||||
[(match_operand:SF 0 "register_operand" "")
|
||||
(match_operand:V4SF 1 "register_operand" "")
|
||||
|
@ -4634,6 +4650,22 @@
|
|||
operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
|
||||
})
|
||||
|
||||
(define_insn_and_split "*vec_ext_v4si_mem"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(vec_select:SI
|
||||
(match_operand:V4SI 1 "memory_operand" "o")
|
||||
(parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
|
||||
""
|
||||
"#"
|
||||
"reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
int i = INTVAL (operands[2]);
|
||||
|
||||
emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sse_storeq"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "")
|
||||
(vec_select:DI
|
||||
|
|
|
@ -2659,6 +2659,85 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
|
|||
if (GET_CODE (trueop0) == CONST_VECTOR)
|
||||
return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
|
||||
(trueop1, 0, 0)));
|
||||
|
||||
/* Extract a scalar element from a nested VEC_SELECT expression
|
||||
(with optional nested VEC_CONCAT expression). Some targets
|
||||
(i386) extract scalar element from a vector using chain of
|
||||
nested VEC_SELECT expressions. When input operand is a memory
|
||||
operand, this operation can be simplified to a simple scalar
|
||||
load from an offseted memory address. */
|
||||
if (GET_CODE (trueop0) == VEC_SELECT)
|
||||
{
|
||||
rtx op0 = XEXP (trueop0, 0);
|
||||
rtx op1 = XEXP (trueop0, 1);
|
||||
|
||||
enum machine_mode opmode = GET_MODE (op0);
|
||||
int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
|
||||
int n_elts = GET_MODE_SIZE (opmode) / elt_size;
|
||||
|
||||
int i = INTVAL (XVECEXP (trueop1, 0, 0));
|
||||
int elem;
|
||||
|
||||
rtvec vec;
|
||||
rtx tmp_op, tmp;
|
||||
|
||||
gcc_assert (GET_CODE (op1) == PARALLEL);
|
||||
gcc_assert (i < n_elts);
|
||||
|
||||
/* Select element, pointed by nested selector. */
|
||||
elem = INTVAL (CONST_VECTOR_ELT (op1, i));
|
||||
|
||||
/* Handle the case when nested VEC_SELECT wraps VEC_CONCAT. */
|
||||
if (GET_CODE (op0) == VEC_CONCAT)
|
||||
{
|
||||
rtx op00 = XEXP (op0, 0);
|
||||
rtx op01 = XEXP (op0, 1);
|
||||
|
||||
enum machine_mode mode00, mode01;
|
||||
int n_elts00, n_elts01;
|
||||
|
||||
mode00 = GET_MODE (op00);
|
||||
mode01 = GET_MODE (op01);
|
||||
|
||||
/* Find out number of elements of each operand. */
|
||||
if (VECTOR_MODE_P (mode00))
|
||||
{
|
||||
elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00));
|
||||
n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
|
||||
}
|
||||
else
|
||||
n_elts00 = 1;
|
||||
|
||||
if (VECTOR_MODE_P (mode01))
|
||||
{
|
||||
elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01));
|
||||
n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
|
||||
}
|
||||
else
|
||||
n_elts01 = 1;
|
||||
|
||||
gcc_assert (n_elts == n_elts00 + n_elts01);
|
||||
|
||||
/* Select correct operand of VEC_CONCAT
|
||||
and adjust selector. */
|
||||
if (elem < n_elts01)
|
||||
tmp_op = op00;
|
||||
else
|
||||
{
|
||||
tmp_op = op01;
|
||||
elem -= n_elts00;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp_op = op0;
|
||||
|
||||
vec = rtvec_alloc (1);
|
||||
RTVEC_ELT (vec, 0) = GEN_INT (elem);
|
||||
|
||||
tmp = gen_rtx_fmt_ee (code, mode,
|
||||
tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2007-08-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/32661
|
||||
* gcc.target/i386/pr32661.c: New test.
|
||||
|
||||
2007-08-28 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR middle-end/32370
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
typedef int __v4si __attribute__ ((__vector_size__ (16)));
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
int fooSI_1(__v4si *val)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4si(*val, 1);
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "pshufd" } } */
|
||||
|
||||
int fooSI_2(__v4si *val)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4si(*val, 2);
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "punpckhdq" } } */
|
||||
|
||||
float fooSF_2(__v4sf *val)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4sf(*val, 2);
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
|
||||
float fooSF_3(__v4sf *val)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4sf(*val, 3);
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
Loading…
Reference in New Issue