spu.md (vec_unpacku_hi_v8hi, [...]): Implement.

* config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
	vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v16qi,
	vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi): Implement.

From-SVN: r132472
This commit is contained in:
Ira Rosen 2008-02-20 07:36:47 +00:00 committed by Ira Rosen
parent a8971bdb01
commit 76a893cfc4
5 changed files with 195 additions and 3 deletions

View File

@ -1,3 +1,9 @@
2008-02-20 Ira Rosen <irar@il.ibm.com>
* config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v16qi,
vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi): Implement.
2008-02-19 Jan Hubicka <jh@suse.cz>
* predict.c (tree_bb_level_predictions): Remove variable next

View File

@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3"
DONE;
}")
(define_expand "vec_unpacku_hi_v8hi"
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
(zero_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "spu_reg_operand" "r")
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
""
{
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
DONE;
})
(define_expand "vec_unpacku_lo_v8hi"
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
(zero_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "spu_reg_operand" "r")
(parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
""
{
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
DONE;
})
(define_expand "vec_unpacks_hi_v8hi"
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
(sign_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "spu_reg_operand" "r")
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
""
{
rtx tmp1 = gen_reg_rtx (V8HImode);
rtx tmp2 = gen_reg_rtx (V4SImode);
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
emit_insn (gen_spu_xshw (tmp2, tmp1));
emit_move_insn (operands[0], tmp2);
DONE;
})
(define_expand "vec_unpacks_lo_v8hi"
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
(sign_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "spu_reg_operand" "r")
(parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
""
{
rtx tmp1 = gen_reg_rtx (V8HImode);
rtx tmp2 = gen_reg_rtx (V4SImode);
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
emit_insn (gen_spu_xshw (tmp2, tmp1));
emit_move_insn (operands[0], tmp2);
DONE;
})
(define_expand "vec_unpacku_hi_v16qi"
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
(zero_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 1 "spu_reg_operand" "r")
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
""
{
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
DONE;
})
(define_expand "vec_unpacku_lo_v16qi"
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
(zero_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 1 "spu_reg_operand" "r")
(parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
(const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
""
{
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
DONE;
})
(define_expand "vec_unpacks_hi_v16qi"
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
(sign_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 1 "spu_reg_operand" "r")
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
""
{
rtx tmp1 = gen_reg_rtx (V16QImode);
rtx tmp2 = gen_reg_rtx (V8HImode);
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
emit_insn (gen_spu_xsbh (tmp2, tmp1));
emit_move_insn (operands[0], tmp2);
DONE;
})
(define_expand "vec_unpacks_lo_v16qi"
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
(sign_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 1 "spu_reg_operand" "r")
(parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
(const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
""
{
rtx tmp1 = gen_reg_rtx (V16QImode);
rtx tmp2 = gen_reg_rtx (V8HImode);
rtx mask = gen_reg_rtx (TImode);
unsigned char arr[16] = {
0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
emit_move_insn (mask, array_to_constant (TImode, arr));
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
emit_insn (gen_spu_xsbh (tmp2, tmp1));
emit_move_insn (operands[0], tmp2);
DONE;
})

View File

@ -1,3 +1,11 @@
2008-02-20 Ira Rosen <irar@il.ibm.com>
* lib/target-supports.exp (check_effective_target_vect_unpack):
Return true for SPU.
(check_effective_target_vect_short_mult): Likewise.
* gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of
the loop on targets that support vect_unpack.
2008-02-20 Uros Bizjak <ubizjak@gmail.com>
* g++.dg/compat/struct-layout-1_generate.c (DG_OPTIONS): New define.

View File

@ -48,9 +48,9 @@ main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si || vect_unpack } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_short_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_hi_to_si } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { { ! vect_widen_sum_hi_to_si } && { ! vect_unpack } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -1667,7 +1667,8 @@ proc check_effective_target_vect_unpack { } {
set et_vect_unpack_saved 0
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
|| [istarget x86_64-*-*]
|| [istarget spu-*-*] } {
set et_vect_unpack_saved 1
}
}
@ -1876,6 +1877,7 @@ proc check_effective_target_vect_short_mult { } {
} else {
set et_vect_short_mult_saved 0
if { [istarget ia64-*-*]
|| [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_short_mult_saved 1