[AARCH64] Add zip{1, 2}, uzp{1, 2}, trn{1, 2} support

for vector permute.

gcc/

	* config/aarch64/aarch64-simd-builtins.def: Add new builtins.
	* config/aarch64/aarch64-simd.md (simd_type): Add uzp.
	(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New.
	* config/aarch64/aarch64.c (aarch64_evpc_trn): New.
	(aarch64_evpc_uzp): Likewise.
	(aarch64_evpc_zip): Likewise.
	(aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns.
	* config/aarch64/iterators.md (unspec): Add neccessary unspecs.
	(PERMUTE): New.
	(perm_insn): Likewise.
	(perm_hilo): Likewise.

From-SVN: r194219
This commit is contained in:
James Greenhalgh 2012-12-05 11:42:37 +00:00 committed by James Greenhalgh
parent 88b080739a
commit cc4d934fa0
5 changed files with 317 additions and 2 deletions

View File

@ -1,3 +1,17 @@
2012-12-05 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Add new builtins.
* config/aarch64/aarch64-simd.md (simd_type): Add uzp.
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New.
* config/aarch64/aarch64.c (aarch64_evpc_trn): New.
(aarch64_evpc_uzp): Likewise.
(aarch64_evpc_zip): Likewise.
(aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns.
* config/aarch64/iterators.md (unspec): Add neccessary unspecs.
(PERMUTE): New.
(perm_insn): Likewise.
(perm_hilo): Likewise.
2012-12-05 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-protos.h

View File

@ -224,3 +224,12 @@
BUILTIN_VDQF (UNOP, fcvtpu)
BUILTIN_VDQF (UNOP, fcvtms)
BUILTIN_VDQF (UNOP, fcvtmu)
/* Implemented by
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
BUILTIN_VALL (BINOP, zip1)
BUILTIN_VALL (BINOP, zip2)
BUILTIN_VALL (BINOP, uzp1)
BUILTIN_VALL (BINOP, uzp2)
BUILTIN_VALL (BINOP, trn1)
BUILTIN_VALL (BINOP, trn2)

View File

@ -128,7 +128,8 @@
; simd_store4s store single structure from one lane for four registers (ST4 [index]).
; simd_tbl table lookup.
; simd_trn transpose.
; simd_zip zip/unzip.
; simd_uzp unzip.
; simd_zip zip.
(define_attr "simd_type"
"simd_abd,\
@ -230,6 +231,7 @@
simd_store4s,\
simd_tbl,\
simd_trn,\
simd_uzp,\
simd_zip,\
none"
(const_string "none"))
@ -3406,6 +3408,17 @@
DONE;
})
(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
(match_operand:VALL 2 "register_operand" "w")]
PERMUTE))]
"TARGET_SIMD"
"<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_st2<mode>_dreg"
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:TI [(match_operand:OI 1 "register_operand" "w")

View File

@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
aarch64_expand_vec_perm_1 (target, op0, op1, sel);
}
/* Recognize patterns suitable for the TRN instructions. */
static bool
aarch64_evpc_trn (struct expand_vec_perm_d *d)
{
unsigned int i, odd, mask, nelt = d->nelt;
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
enum machine_mode vmode = d->vmode;
if (GET_MODE_UNIT_SIZE (vmode) > 8)
return false;
/* Note that these are little-endian tests.
We correct for big-endian later. */
if (d->perm[0] == 0)
odd = 0;
else if (d->perm[0] == 1)
odd = 1;
else
return false;
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
for (i = 0; i < nelt; i += 2)
{
if (d->perm[i] != i + odd)
return false;
if (d->perm[i + 1] != ((i + nelt + odd) & mask))
return false;
}
/* Success! */
if (d->testing_p)
return true;
in0 = d->op0;
in1 = d->op1;
if (BYTES_BIG_ENDIAN)
{
x = in0, in0 = in1, in1 = x;
odd = !odd;
}
out = d->target;
if (odd)
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_trn2v16qi; break;
case V8QImode: gen = gen_aarch64_trn2v8qi; break;
case V8HImode: gen = gen_aarch64_trn2v8hi; break;
case V4HImode: gen = gen_aarch64_trn2v4hi; break;
case V4SImode: gen = gen_aarch64_trn2v4si; break;
case V2SImode: gen = gen_aarch64_trn2v2si; break;
case V2DImode: gen = gen_aarch64_trn2v2di; break;
case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
case V2DFmode: gen = gen_aarch64_trn2v2df; break;
default:
return false;
}
}
else
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_trn1v16qi; break;
case V8QImode: gen = gen_aarch64_trn1v8qi; break;
case V8HImode: gen = gen_aarch64_trn1v8hi; break;
case V4HImode: gen = gen_aarch64_trn1v4hi; break;
case V4SImode: gen = gen_aarch64_trn1v4si; break;
case V2SImode: gen = gen_aarch64_trn1v2si; break;
case V2DImode: gen = gen_aarch64_trn1v2di; break;
case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
case V2DFmode: gen = gen_aarch64_trn1v2df; break;
default:
return false;
}
}
emit_insn (gen (out, in0, in1));
return true;
}
/* Recognize patterns suitable for the UZP instructions. */
static bool
aarch64_evpc_uzp (struct expand_vec_perm_d *d)
{
unsigned int i, odd, mask, nelt = d->nelt;
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
enum machine_mode vmode = d->vmode;
if (GET_MODE_UNIT_SIZE (vmode) > 8)
return false;
/* Note that these are little-endian tests.
We correct for big-endian later. */
if (d->perm[0] == 0)
odd = 0;
else if (d->perm[0] == 1)
odd = 1;
else
return false;
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
for (i = 0; i < nelt; i++)
{
unsigned elt = (i * 2 + odd) & mask;
if (d->perm[i] != elt)
return false;
}
/* Success! */
if (d->testing_p)
return true;
in0 = d->op0;
in1 = d->op1;
if (BYTES_BIG_ENDIAN)
{
x = in0, in0 = in1, in1 = x;
odd = !odd;
}
out = d->target;
if (odd)
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
case V4SImode: gen = gen_aarch64_uzp2v4si; break;
case V2SImode: gen = gen_aarch64_uzp2v2si; break;
case V2DImode: gen = gen_aarch64_uzp2v2di; break;
case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
default:
return false;
}
}
else
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
case V4SImode: gen = gen_aarch64_uzp1v4si; break;
case V2SImode: gen = gen_aarch64_uzp1v2si; break;
case V2DImode: gen = gen_aarch64_uzp1v2di; break;
case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
default:
return false;
}
}
emit_insn (gen (out, in0, in1));
return true;
}
/* Recognize patterns suitable for the ZIP instructions. */
static bool
aarch64_evpc_zip (struct expand_vec_perm_d *d)
{
unsigned int i, high, mask, nelt = d->nelt;
rtx out, in0, in1, x;
rtx (*gen) (rtx, rtx, rtx);
enum machine_mode vmode = d->vmode;
if (GET_MODE_UNIT_SIZE (vmode) > 8)
return false;
/* Note that these are little-endian tests.
We correct for big-endian later. */
high = nelt / 2;
if (d->perm[0] == high)
/* Do Nothing. */
;
else if (d->perm[0] == 0)
high = 0;
else
return false;
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
for (i = 0; i < nelt / 2; i++)
{
unsigned elt = (i + high) & mask;
if (d->perm[i * 2] != elt)
return false;
elt = (elt + nelt) & mask;
if (d->perm[i * 2 + 1] != elt)
return false;
}
/* Success! */
if (d->testing_p)
return true;
in0 = d->op0;
in1 = d->op1;
if (BYTES_BIG_ENDIAN)
{
x = in0, in0 = in1, in1 = x;
high = !high;
}
out = d->target;
if (high)
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_zip2v16qi; break;
case V8QImode: gen = gen_aarch64_zip2v8qi; break;
case V8HImode: gen = gen_aarch64_zip2v8hi; break;
case V4HImode: gen = gen_aarch64_zip2v4hi; break;
case V4SImode: gen = gen_aarch64_zip2v4si; break;
case V2SImode: gen = gen_aarch64_zip2v2si; break;
case V2DImode: gen = gen_aarch64_zip2v2di; break;
case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
case V2DFmode: gen = gen_aarch64_zip2v2df; break;
default:
return false;
}
}
else
{
switch (vmode)
{
case V16QImode: gen = gen_aarch64_zip1v16qi; break;
case V8QImode: gen = gen_aarch64_zip1v8qi; break;
case V8HImode: gen = gen_aarch64_zip1v8hi; break;
case V4HImode: gen = gen_aarch64_zip1v4hi; break;
case V4SImode: gen = gen_aarch64_zip1v4si; break;
case V2SImode: gen = gen_aarch64_zip1v2si; break;
case V2DImode: gen = gen_aarch64_zip1v2di; break;
case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
case V2DFmode: gen = gen_aarch64_zip1v2df; break;
default:
return false;
}
}
emit_insn (gen (out, in0, in1));
return true;
}
static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{
@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
}
if (TARGET_SIMD)
return aarch64_evpc_tbl (d);
{
if (aarch64_evpc_zip (d))
return true;
else if (aarch64_evpc_uzp (d))
return true;
else if (aarch64_evpc_trn (d))
return true;
return aarch64_evpc_tbl (d);
}
return false;
}

View File

@ -230,6 +230,12 @@
UNSPEC_BSL ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_CONCAT ; Used in vector permute patterns.
UNSPEC_ZIP1 ; Used in vector permute patterns.
UNSPEC_ZIP2 ; Used in vector permute patterns.
UNSPEC_UZP1 ; Used in vector permute patterns.
UNSPEC_UZP2 ; Used in vector permute patterns.
UNSPEC_TRN1 ; Used in vector permute patterns.
UNSPEC_TRN2 ; Used in vector permute patterns.
])
;; -------------------------------------------------------------------
@ -652,6 +658,9 @@
(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
UNSPEC_TRN1 UNSPEC_TRN2
UNSPEC_UZP1 UNSPEC_UZP2])
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
@ -757,3 +766,10 @@
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
(UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
(UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])