sse.md (vec_extract_hi_<mode>, [...]): Use vextracti128 instead of vextractf128 for -mavx2 and integer vectors.
* config/i386/sse.md (vec_extract_hi_<mode>, vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use vextracti128 instead of vextractf128 for -mavx2 and integer vectors. For V4DFmode fix up mode attribute. (VEC_EXTRACT_MODE): For TARGET_AVX add 32-byte vectors. (vec_set_lo_<mode>, vec_set_hi_<mode>): For VI8F_256 modes use V4DF instead of V8SF mode attribute. (avx2_extracti128): Change into define_expand. * config/i386/i386.c (ix86_expand_vector_extract): Handle 32-byte vector modes if TARGET_AVX. * gcc.target/i386/sse2-extract-1.c: New test. * gcc.target/i386/avx-extract-1.c: New test. From-SVN: r178915
This commit is contained in:
parent
6deb519753
commit
6e2cb39139
|
@ -1,3 +1,16 @@
|
|||
2011-09-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/sse.md (vec_extract_hi_<mode>,
|
||||
vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use
|
||||
vextracti128 instead of vextractf128 for -mavx2 and
|
||||
integer vectors. For V4DFmode fix up mode attribute.
|
||||
(VEC_EXTRACT_MODE): For TARGET_AVX add 32-byte vectors.
|
||||
(vec_set_lo_<mode>, vec_set_hi_<mode>): For VI8F_256 modes use V4DF
|
||||
instead of V8SF mode attribute.
|
||||
(avx2_extracti128): Change into define_expand.
|
||||
* config/i386/i386.c (ix86_expand_vector_extract): Handle
|
||||
32-byte vector modes if TARGET_AVX.
|
||||
|
||||
2011-09-16 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
* config/avr/avr.md: (umulqihi3, mulqihi3): Write as one pattern.
|
||||
|
|
|
@ -32587,6 +32587,84 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
use_vec_extr = TARGET_SSE4_1;
|
||||
break;
|
||||
|
||||
case V8SFmode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V4SFmode);
|
||||
if (elt < 4)
|
||||
emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V4DFmode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V2DFmode);
|
||||
if (elt < 2)
|
||||
emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V32QImode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V16QImode);
|
||||
if (elt < 16)
|
||||
emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 15);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V16HImode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V8HImode);
|
||||
if (elt < 8)
|
||||
emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 7);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V8SImode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V4SImode);
|
||||
if (elt < 4)
|
||||
emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 3);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V4DImode:
|
||||
if (TARGET_AVX)
|
||||
{
|
||||
tmp = gen_reg_rtx (V2DImode);
|
||||
if (elt < 2)
|
||||
emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
|
||||
else
|
||||
emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
|
||||
ix86_expand_vector_extract (false, target, tmp, elt & 1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case V8QImode:
|
||||
/* ??? Could extract the appropriate HImode element and shift. */
|
||||
default:
|
||||
|
|
|
@ -3827,13 +3827,23 @@
|
|||
(match_operand:VI8F_256 1 "register_operand" "x,x")
|
||||
(parallel [(const_int 2) (const_int 3)])))]
|
||||
"TARGET_AVX"
|
||||
"vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
|
||||
{
|
||||
if (get_attr_mode (insn) == MODE_OI)
|
||||
return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
else
|
||||
return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "memory" "none,store")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set (attr "mode")
|
||||
(if_then_else
|
||||
(and (match_test "TARGET_AVX2")
|
||||
(eq (const_string "<MODE>mode") (const_string "V4DImode")))
|
||||
(const_string "OI")
|
||||
(const_string "V4DF")))])
|
||||
|
||||
(define_insn_and_split "vec_extract_lo_<mode>"
|
||||
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
|
||||
|
@ -3862,13 +3872,23 @@
|
|||
(parallel [(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)])))]
|
||||
"TARGET_AVX"
|
||||
"vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
|
||||
{
|
||||
if (get_attr_mode (insn) == MODE_OI)
|
||||
return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
else
|
||||
return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "memory" "none,store")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set (attr "mode")
|
||||
(if_then_else
|
||||
(and (match_test "TARGET_AVX2")
|
||||
(eq (const_string "<MODE>mode") (const_string "V8SImode")))
|
||||
(const_string "OI")
|
||||
(const_string "V8SF")))])
|
||||
|
||||
(define_insn_and_split "vec_extract_lo_v16hi"
|
||||
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
|
||||
|
@ -3901,13 +3921,21 @@
|
|||
(const_int 12) (const_int 13)
|
||||
(const_int 14) (const_int 15)])))]
|
||||
"TARGET_AVX"
|
||||
"vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
|
||||
{
|
||||
if (get_attr_mode (insn) == MODE_OI)
|
||||
return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
else
|
||||
return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "memory" "none,store")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set (attr "mode")
|
||||
(if_then_else (match_test "TARGET_AVX2")
|
||||
(const_string "OI")
|
||||
(const_string "V8SF")))])
|
||||
|
||||
(define_insn_and_split "vec_extract_lo_v32qi"
|
||||
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
|
||||
|
@ -3948,13 +3976,21 @@
|
|||
(const_int 28) (const_int 29)
|
||||
(const_int 30) (const_int 31)])))]
|
||||
"TARGET_AVX"
|
||||
"vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
|
||||
{
|
||||
if (get_attr_mode (insn) == MODE_OI)
|
||||
return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
else
|
||||
return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "memory" "none,store")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set (attr "mode")
|
||||
(if_then_else (match_test "TARGET_AVX2")
|
||||
(const_string "OI")
|
||||
(const_string "V8SF")))])
|
||||
|
||||
(define_insn "*sse4_1_extractps"
|
||||
[(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
|
||||
|
@ -3988,7 +4024,10 @@
|
|||
|
||||
;; Modes handled by vec_extract patterns.
|
||||
(define_mode_iterator VEC_EXTRACT_MODE
|
||||
[V16QI V8HI V4SI V2DI
|
||||
[(V32QI "TARGET_AVX") V16QI
|
||||
(V16HI "TARGET_AVX") V8HI
|
||||
(V8SI "TARGET_AVX") V4SI
|
||||
(V4DI "TARGET_AVX") V2DI
|
||||
(V8SF "TARGET_AVX") V4SF
|
||||
(V4DF "TARGET_AVX") V2DF])
|
||||
|
||||
|
@ -11916,7 +11955,7 @@
|
|||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "vec_set_hi_<mode>"
|
||||
[(set (match_operand:VI8F_256 0 "register_operand" "=x")
|
||||
|
@ -11931,7 +11970,7 @@
|
|||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "vec_set_lo_<mode>"
|
||||
[(set (match_operand:VI4F_256 0 "register_operand" "=x")
|
||||
|
@ -12122,17 +12161,29 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "avx2_extracti128"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(vec_select:V2DI
|
||||
(match_operand:V4DI 1 "nonimmediate_operand" "xm")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
|
||||
(define_expand "avx2_extracti128"
|
||||
[(match_operand:V2DI 0 "nonimmediate_operand" "")
|
||||
(match_operand:V4DI 1 "register_operand" "")
|
||||
(match_operand:SI 2 "const_0_to_1_operand" "")]
|
||||
"TARGET_AVX2"
|
||||
"vextracti128\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "OI")])
|
||||
{
|
||||
rtx (*insn)(rtx, rtx);
|
||||
|
||||
switch (INTVAL (operands[2]))
|
||||
{
|
||||
case 0:
|
||||
insn = gen_vec_extract_lo_v4di;
|
||||
break;
|
||||
case 1:
|
||||
insn = gen_vec_extract_hi_v4di;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
emit_insn (insn (operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "avx2_inserti128"
|
||||
[(match_operand:V4DI 0 "register_operand" "")
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2011-09-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.target/i386/sse2-extract-1.c: New test.
|
||||
* gcc.target/i386/avx-extract-1.c: New test.
|
||||
|
||||
2011-09-16 Terry Guo <terry.guo@arm.com>
|
||||
|
||||
* gcc.dg/tree-ssa/foldconst-3.c: Don't use short enums.
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
/* { dg-require-effective-target avx_runtime } */
|
||||
|
||||
#include "sse2-extract-1.c"
|
|
@ -0,0 +1,102 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
/* { dg-require-effective-target sse2_runtime } */
|
||||
|
||||
extern void abort (void);
|
||||
typedef unsigned long long uint64_t;
|
||||
|
||||
#define vector(elcount, type) \
|
||||
__attribute__((vector_size((elcount)*sizeof(type)))) type
|
||||
|
||||
#define FN(elcount, type, idx) \
|
||||
__attribute__((noinline, noclone)) \
|
||||
type f##type##elcount##_##idx (vector (elcount, type) x) { return x[idx] + 1; }
|
||||
#define T2(elcount, type) \
|
||||
H (elcount, type) \
|
||||
F (elcount, type, 0) \
|
||||
F (elcount, type, 1)
|
||||
#define T4(elcount, type) \
|
||||
T2 (elcount, type) \
|
||||
F (elcount, type, 2) \
|
||||
F (elcount, type, 3)
|
||||
#define T8(elcount, type) \
|
||||
T4 (elcount, type) \
|
||||
F (elcount, type, 4) \
|
||||
F (elcount, type, 5) \
|
||||
F (elcount, type, 6) \
|
||||
F (elcount, type, 7)
|
||||
#define T16(elcount, type) \
|
||||
T8 (elcount, type) \
|
||||
F (elcount, type, 8) \
|
||||
F (elcount, type, 9) \
|
||||
F (elcount, type, 10) \
|
||||
F (elcount, type, 11) \
|
||||
F (elcount, type, 12) \
|
||||
F (elcount, type, 13) \
|
||||
F (elcount, type, 14) \
|
||||
F (elcount, type, 15)
|
||||
#define T32(elcount, type) \
|
||||
T16 (elcount, type) \
|
||||
F (elcount, type, 16) \
|
||||
F (elcount, type, 17) \
|
||||
F (elcount, type, 18) \
|
||||
F (elcount, type, 19) \
|
||||
F (elcount, type, 20) \
|
||||
F (elcount, type, 21) \
|
||||
F (elcount, type, 22) \
|
||||
F (elcount, type, 23) \
|
||||
F (elcount, type, 24) \
|
||||
F (elcount, type, 25) \
|
||||
F (elcount, type, 26) \
|
||||
F (elcount, type, 27) \
|
||||
F (elcount, type, 28) \
|
||||
F (elcount, type, 29) \
|
||||
F (elcount, type, 30) \
|
||||
F (elcount, type, 31)
|
||||
#define TESTS_SSE2 \
|
||||
T2 (2, double) E \
|
||||
T2 (2, uint64_t) E \
|
||||
T4 (4, float) E \
|
||||
T4 (4, int) E \
|
||||
T8 (8, short) E \
|
||||
T16 (16, char) E
|
||||
#define TESTS_AVX \
|
||||
T4 (4, double) E \
|
||||
T4 (4, uint64_t) E \
|
||||
T8 (8, float) E \
|
||||
T8 (8, int) E \
|
||||
T16 (16, short) E \
|
||||
T32 (32, char) E
|
||||
#ifdef __AVX__
|
||||
#define TESTS TESTS_SSE2 TESTS_AVX
|
||||
#else
|
||||
#define TESTS TESTS_SSE2
|
||||
#endif
|
||||
|
||||
#define F FN
|
||||
#define H(elcount, type)
|
||||
#define E
|
||||
TESTS
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
#undef F
|
||||
#undef H
|
||||
#undef E
|
||||
#define H(elcount, type) \
|
||||
vector (elcount, type) v##type##elcount = {
|
||||
#define E };
|
||||
#define F(elcount, type, idx) idx + 1,
|
||||
TESTS
|
||||
#undef F
|
||||
#undef H
|
||||
#undef E
|
||||
#define H(elcount, type)
|
||||
#define E
|
||||
#define F(elcount, type, idx) \
|
||||
if (f##type##elcount##_##idx (v##type##elcount) != idx + 2) \
|
||||
abort ();
|
||||
TESTS
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue