diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bc2e9bef119..3d9cdbb78ca 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2007-08-28 Uros Bizjak + + PR target/32661 + * simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]: + Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as + operand) when top VEC_SELECT extracts scalar element. + * config/i386/sse.md (*vec_extract_v4si_mem): New pattern. + (*vec_extract_v4sf_mem): Ditto. + 2007-08-28 Jakub Jelinek PR middle-end/32370 @@ -604,8 +613,8 @@ 2007-08-23 Brian Sidebotham - * configure.ac (leb128): Modify sed statement to work with any binutils - version string. + * configure.ac (leb128): Modify sed statement to work with any + binutils version string. * configure: Regenerate 2007-08-23 Kaveh R. Ghazi diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 07969375f1d..6779e9a805a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1542,6 +1542,22 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "V4SF")]) +(define_insn_and_split "*vec_extract_v4sf_mem" + [(set (match_operand:SF 0 "register_operand" "=x*rf") + (vec_select:SF + (match_operand:V4SF 1 "memory_operand" "o") + (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + int i = INTVAL (operands[2]); + + emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); + DONE; +}) + (define_expand "vec_extractv4sf" [(match_operand:SF 0 "register_operand" "") (match_operand:V4SF 1 "register_operand" "") @@ -4634,6 +4650,22 @@ operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); }) +(define_insn_and_split "*vec_ext_v4si_mem" + [(set (match_operand:SI 0 "register_operand" "=r") + (vec_select:SI + (match_operand:V4SI 1 "memory_operand" "o") + (parallel [(match_operand 2 "const_0_to_3_operand" "")])))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + int i = INTVAL (operands[2]); + + emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4)); + DONE; +}) + (define_expand "sse_storeq" [(set (match_operand:DI 0 "nonimmediate_operand" "") (vec_select:DI diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 97c4d931805..3271a864851 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -2659,6 +2659,85 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode, if (GET_CODE (trueop0) == CONST_VECTOR) return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP (trueop1, 0, 0))); + + /* Extract a scalar element from a nested VEC_SELECT expression + (with optional nested VEC_CONCAT expression). Some targets + (i386) extract scalar element from a vector using chain of + nested VEC_SELECT expressions. When input operand is a memory + operand, this operation can be simplified to a simple scalar + load from an offseted memory address. */ + if (GET_CODE (trueop0) == VEC_SELECT) + { + rtx op0 = XEXP (trueop0, 0); + rtx op1 = XEXP (trueop0, 1); + + enum machine_mode opmode = GET_MODE (op0); + int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode)); + int n_elts = GET_MODE_SIZE (opmode) / elt_size; + + int i = INTVAL (XVECEXP (trueop1, 0, 0)); + int elem; + + rtvec vec; + rtx tmp_op, tmp; + + gcc_assert (GET_CODE (op1) == PARALLEL); + gcc_assert (i < n_elts); + + /* Select element, pointed by nested selector. */ + elem = INTVAL (CONST_VECTOR_ELT (op1, i)); + + /* Handle the case when nested VEC_SELECT wraps VEC_CONCAT. */ + if (GET_CODE (op0) == VEC_CONCAT) + { + rtx op00 = XEXP (op0, 0); + rtx op01 = XEXP (op0, 1); + + enum machine_mode mode00, mode01; + int n_elts00, n_elts01; + + mode00 = GET_MODE (op00); + mode01 = GET_MODE (op01); + + /* Find out number of elements of each operand. */ + if (VECTOR_MODE_P (mode00)) + { + elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00)); + n_elts00 = GET_MODE_SIZE (mode00) / elt_size; + } + else + n_elts00 = 1; + + if (VECTOR_MODE_P (mode01)) + { + elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01)); + n_elts01 = GET_MODE_SIZE (mode01) / elt_size; + } + else + n_elts01 = 1; + + gcc_assert (n_elts == n_elts00 + n_elts01); + + /* Select correct operand of VEC_CONCAT + and adjust selector. */ + if (elem < n_elts01) + tmp_op = op00; + else + { + tmp_op = op01; + elem -= n_elts00; + } + } + else + tmp_op = op0; + + vec = rtvec_alloc (1); + RTVEC_ELT (vec, 0) = GEN_INT (elem); + + tmp = gen_rtx_fmt_ee (code, mode, + tmp_op, gen_rtx_PARALLEL (VOIDmode, vec)); + return tmp; + } } else { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 47ee5d715d5..f3a78804c85 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2007-08-28 Uros Bizjak + + PR target/32661 + * gcc.target/i386/pr32661.c: New test. + 2007-08-28 Jakub Jelinek PR middle-end/32370 diff --git a/gcc/testsuite/gcc.target/i386/pr32661.c b/gcc/testsuite/gcc.target/i386/pr32661.c new file mode 100644 index 00000000000..247ae131923 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr32661.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +int fooSI_1(__v4si *val) +{ + return __builtin_ia32_vec_ext_v4si(*val, 1); +} +/* { dg-final { scan-assembler-not "pshufd" } } */ + +int fooSI_2(__v4si *val) +{ + return __builtin_ia32_vec_ext_v4si(*val, 2); +} +/* { dg-final { scan-assembler-not "punpckhdq" } } */ + +float fooSF_2(__v4sf *val) +{ + return __builtin_ia32_vec_ext_v4sf(*val, 2); +} +/* { dg-final { scan-assembler-not "unpckhps" } } */ + +float fooSF_3(__v4sf *val) +{ + return __builtin_ia32_vec_ext_v4sf(*val, 3); +} +/* { dg-final { scan-assembler-not "shufps" } } */