re PR target/32661 (__builtin_ia32_vec_ext suboptimal for pointer/ref args)

PR target/32661
	* simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]:
	Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as
	operand) when top VEC_SELECT extracts scalar element.
	* config/i386/sse.md (*vec_extract_v4si_mem): New.
	(*vec_extract_v4sf_mem): Ditto.

testsuite/ChangeLog:

	PR target/32661
	* gcc.target/i386/pr32661.c: New test.

From-SVN: r127857
This commit is contained in:
Uros Bizjak 2007-08-28 11:52:06 +02:00
parent 0b738568da
commit 7f97f938a7
5 changed files with 156 additions and 2 deletions

View File

@ -1,3 +1,12 @@
2007-08-28 Uros Bizjak <ubizjak@gmail.com>
PR target/32661
* simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]:
Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as
operand) when top VEC_SELECT extracts scalar element.
* config/i386/sse.md (*vec_extract_v4si_mem): New pattern.
(*vec_extract_v4sf_mem): Ditto.
2007-08-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/32370
@ -604,8 +613,8 @@
2007-08-23 Brian Sidebotham <brian.sidebotham@gmail.com>
* configure.ac (leb128): Modify sed statement to work with any binutils
version string.
* configure.ac (leb128): Modify sed statement to work with any
binutils version string.
* configure: Regenerate
2007-08-23 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>

View File

@ -1542,6 +1542,22 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V4SF")])
(define_insn_and_split "*vec_extract_v4sf_mem"
[(set (match_operand:SF 0 "register_operand" "=x*rf")
(vec_select:SF
(match_operand:V4SF 1 "memory_operand" "o")
(parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
""
"#"
"reload_completed"
[(const_int 0)]
{
int i = INTVAL (operands[2]);
emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
DONE;
})
(define_expand "vec_extractv4sf"
[(match_operand:SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")
@ -4634,6 +4650,22 @@
operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
})
(define_insn_and_split "*vec_ext_v4si_mem"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
(match_operand:V4SI 1 "memory_operand" "o")
(parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
""
"#"
"reload_completed"
[(const_int 0)]
{
int i = INTVAL (operands[2]);
emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
DONE;
})
(define_expand "sse_storeq"
[(set (match_operand:DI 0 "nonimmediate_operand" "")
(vec_select:DI

View File

@ -2659,6 +2659,85 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
if (GET_CODE (trueop0) == CONST_VECTOR)
return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
(trueop1, 0, 0)));
/* Extract a scalar element from a nested VEC_SELECT expression
(with optional nested VEC_CONCAT expression). Some targets
(i386) extract scalar element from a vector using chain of
nested VEC_SELECT expressions. When input operand is a memory
operand, this operation can be simplified to a simple scalar
load from an offseted memory address. */
if (GET_CODE (trueop0) == VEC_SELECT)
{
rtx op0 = XEXP (trueop0, 0);
rtx op1 = XEXP (trueop0, 1);
enum machine_mode opmode = GET_MODE (op0);
int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
int n_elts = GET_MODE_SIZE (opmode) / elt_size;
int i = INTVAL (XVECEXP (trueop1, 0, 0));
int elem;
rtvec vec;
rtx tmp_op, tmp;
gcc_assert (GET_CODE (op1) == PARALLEL);
gcc_assert (i < n_elts);
/* Select element, pointed by nested selector. */
elem = INTVAL (CONST_VECTOR_ELT (op1, i));
/* Handle the case when nested VEC_SELECT wraps VEC_CONCAT. */
if (GET_CODE (op0) == VEC_CONCAT)
{
rtx op00 = XEXP (op0, 0);
rtx op01 = XEXP (op0, 1);
enum machine_mode mode00, mode01;
int n_elts00, n_elts01;
mode00 = GET_MODE (op00);
mode01 = GET_MODE (op01);
/* Find out number of elements of each operand. */
if (VECTOR_MODE_P (mode00))
{
elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00));
n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
}
else
n_elts00 = 1;
if (VECTOR_MODE_P (mode01))
{
elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01));
n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
}
else
n_elts01 = 1;
gcc_assert (n_elts == n_elts00 + n_elts01);
/* Select correct operand of VEC_CONCAT
and adjust selector. */
if (elem < n_elts01)
tmp_op = op00;
else
{
tmp_op = op01;
elem -= n_elts00;
}
}
else
tmp_op = op0;
vec = rtvec_alloc (1);
RTVEC_ELT (vec, 0) = GEN_INT (elem);
tmp = gen_rtx_fmt_ee (code, mode,
tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
return tmp;
}
}
else
{

View File

@ -1,3 +1,8 @@
2007-08-28 Uros Bizjak <ubizjak@gmail.com>
PR target/32661
* gcc.target/i386/pr32661.c: New test.
2007-08-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/32370

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
int fooSI_1(__v4si *val)
{
return __builtin_ia32_vec_ext_v4si(*val, 1);
}
/* { dg-final { scan-assembler-not "pshufd" } } */
int fooSI_2(__v4si *val)
{
return __builtin_ia32_vec_ext_v4si(*val, 2);
}
/* { dg-final { scan-assembler-not "punpckhdq" } } */
float fooSF_2(__v4sf *val)
{
return __builtin_ia32_vec_ext_v4sf(*val, 2);
}
/* { dg-final { scan-assembler-not "unpckhps" } } */
float fooSF_3(__v4sf *val)
{
return __builtin_ia32_vec_ext_v4sf(*val, 3);
}
/* { dg-final { scan-assembler-not "shufps" } } */