From cc4d934fa0d16330f29953d7ad14ff71e15f0d1b Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Wed, 5 Dec 2012 11:42:37 +0000 Subject: [PATCH] [AARCH64] Add zip{1, 2}, uzp{1, 2}, trn{1, 2} support for vector permute. gcc/ * config/aarch64/aarch64-simd-builtins.def: Add new builtins. * config/aarch64/aarch64-simd.md (simd_type): Add uzp. (aarch64_): New. * config/aarch64/aarch64.c (aarch64_evpc_trn): New. (aarch64_evpc_uzp): Likewise. (aarch64_evpc_zip): Likewise. (aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns. * config/aarch64/iterators.md (unspec): Add neccessary unspecs. (PERMUTE): New. (perm_insn): Likewise. (perm_hilo): Likewise. From-SVN: r194219 --- gcc/ChangeLog | 14 + gcc/config/aarch64/aarch64-simd-builtins.def | 9 + gcc/config/aarch64/aarch64-simd.md | 15 +- gcc/config/aarch64/aarch64.c | 265 ++++++++++++++++++- gcc/config/aarch64/iterators.md | 16 ++ 5 files changed, 317 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 370149cf86d..0ba2ae1822d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2012-12-05 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def: Add new builtins. + * config/aarch64/aarch64-simd.md (simd_type): Add uzp. + (aarch64_): New. + * config/aarch64/aarch64.c (aarch64_evpc_trn): New. + (aarch64_evpc_uzp): Likewise. + (aarch64_evpc_zip): Likewise. + (aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns. + * config/aarch64/iterators.md (unspec): Add neccessary unspecs. + (PERMUTE): New. + (perm_insn): Likewise. + (perm_hilo): Likewise. + 2012-12-05 James Greenhalgh * config/aarch64/aarch64-protos.h diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b344120ae90..d441417b00f 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -224,3 +224,12 @@ BUILTIN_VDQF (UNOP, fcvtpu) BUILTIN_VDQF (UNOP, fcvtms) BUILTIN_VDQF (UNOP, fcvtmu) + + /* Implemented by + aarch64_. */ + BUILTIN_VALL (BINOP, zip1) + BUILTIN_VALL (BINOP, zip2) + BUILTIN_VALL (BINOP, uzp1) + BUILTIN_VALL (BINOP, uzp2) + BUILTIN_VALL (BINOP, trn1) + BUILTIN_VALL (BINOP, trn2) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index baee0cc8c59..febf71d37c4 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -128,7 +128,8 @@ ; simd_store4s store single structure from one lane for four registers (ST4 [index]). ; simd_tbl table lookup. ; simd_trn transpose. -; simd_zip zip/unzip. +; simd_uzp unzip. +; simd_zip zip. (define_attr "simd_type" "simd_abd,\ @@ -230,6 +231,7 @@ simd_store4s,\ simd_tbl,\ simd_trn,\ + simd_uzp,\ simd_zip,\ none" (const_string "none")) @@ -3406,6 +3408,17 @@ DONE; }) +(define_insn "aarch64_" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w")] + PERMUTE))] + "TARGET_SIMD" + "\\t%0., %1., %2." + [(set_attr "simd_type" "simd_") + (set_attr "simd_mode" "")] +) + (define_insn "aarch64_st2_dreg" [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") (unspec:TI [(match_operand:OI 1 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ae1a037cf78..7bc2f6b896a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) aarch64_expand_vec_perm_1 (target, op0, op1, sel); } +/* Recognize patterns suitable for the TRN instructions. */ +static bool +aarch64_evpc_trn (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i += 2) + { + if (d->perm[i] != i + odd) + return false; + if (d->perm[i + 1] != ((i + nelt + odd) & mask)) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn2v16qi; break; + case V8QImode: gen = gen_aarch64_trn2v8qi; break; + case V8HImode: gen = gen_aarch64_trn2v8hi; break; + case V4HImode: gen = gen_aarch64_trn2v4hi; break; + case V4SImode: gen = gen_aarch64_trn2v4si; break; + case V2SImode: gen = gen_aarch64_trn2v2si; break; + case V2DImode: gen = gen_aarch64_trn2v2di; break; + case V4SFmode: gen = gen_aarch64_trn2v4sf; break; + case V2SFmode: gen = gen_aarch64_trn2v2sf; break; + case V2DFmode: gen = gen_aarch64_trn2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn1v16qi; break; + case V8QImode: gen = gen_aarch64_trn1v8qi; break; + case V8HImode: gen = gen_aarch64_trn1v8hi; break; + case V4HImode: gen = gen_aarch64_trn1v4hi; break; + case V4SImode: gen = gen_aarch64_trn1v4si; break; + case V2SImode: gen = gen_aarch64_trn1v2si; break; + case V2DImode: gen = gen_aarch64_trn1v2di; break; + case V4SFmode: gen = gen_aarch64_trn1v4sf; break; + case V2SFmode: gen = gen_aarch64_trn1v2sf; break; + case V2DFmode: gen = gen_aarch64_trn1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the UZP instructions. */ +static bool +aarch64_evpc_uzp (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i++) + { + unsigned elt = (i * 2 + odd) & mask; + if (d->perm[i] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp2v16qi; break; + case V8QImode: gen = gen_aarch64_uzp2v8qi; break; + case V8HImode: gen = gen_aarch64_uzp2v8hi; break; + case V4HImode: gen = gen_aarch64_uzp2v4hi; break; + case V4SImode: gen = gen_aarch64_uzp2v4si; break; + case V2SImode: gen = gen_aarch64_uzp2v2si; break; + case V2DImode: gen = gen_aarch64_uzp2v2di; break; + case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp1v16qi; break; + case V8QImode: gen = gen_aarch64_uzp1v8qi; break; + case V8HImode: gen = gen_aarch64_uzp1v8hi; break; + case V4HImode: gen = gen_aarch64_uzp1v4hi; break; + case V4SImode: gen = gen_aarch64_uzp1v4si; break; + case V2SImode: gen = gen_aarch64_uzp1v2si; break; + case V2DImode: gen = gen_aarch64_uzp1v2di; break; + case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the ZIP instructions. */ +static bool +aarch64_evpc_zip (struct expand_vec_perm_d *d) +{ + unsigned int i, high, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + high = nelt / 2; + if (d->perm[0] == high) + /* Do Nothing. */ + ; + else if (d->perm[0] == 0) + high = 0; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt / 2; i++) + { + unsigned elt = (i + high) & mask; + if (d->perm[i * 2] != elt) + return false; + elt = (elt + nelt) & mask; + if (d->perm[i * 2 + 1] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + high = !high; + } + out = d->target; + + if (high) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip2v16qi; break; + case V8QImode: gen = gen_aarch64_zip2v8qi; break; + case V8HImode: gen = gen_aarch64_zip2v8hi; break; + case V4HImode: gen = gen_aarch64_zip2v4hi; break; + case V4SImode: gen = gen_aarch64_zip2v4si; break; + case V2SImode: gen = gen_aarch64_zip2v2si; break; + case V2DImode: gen = gen_aarch64_zip2v2di; break; + case V4SFmode: gen = gen_aarch64_zip2v4sf; break; + case V2SFmode: gen = gen_aarch64_zip2v2sf; break; + case V2DFmode: gen = gen_aarch64_zip2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip1v16qi; break; + case V8QImode: gen = gen_aarch64_zip1v8qi; break; + case V8HImode: gen = gen_aarch64_zip1v8hi; break; + case V4HImode: gen = gen_aarch64_zip1v4hi; break; + case V4SImode: gen = gen_aarch64_zip1v4si; break; + case V2SImode: gen = gen_aarch64_zip1v2si; break; + case V2DImode: gen = gen_aarch64_zip1v2di; break; + case V4SFmode: gen = gen_aarch64_zip1v4sf; break; + case V2SFmode: gen = gen_aarch64_zip1v2sf; break; + case V2DFmode: gen = gen_aarch64_zip1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + static bool aarch64_evpc_tbl (struct expand_vec_perm_d *d) { @@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) } if (TARGET_SIMD) - return aarch64_evpc_tbl (d); + { + if (aarch64_evpc_zip (d)) + return true; + else if (aarch64_evpc_uzp (d)) + return true; + else if (aarch64_evpc_trn (d)) + return true; + return aarch64_evpc_tbl (d); + } return false; } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7cd4cef0eef..0eb30f06c04 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -230,6 +230,12 @@ UNSPEC_BSL ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_CONCAT ; Used in vector permute patterns. + UNSPEC_ZIP1 ; Used in vector permute patterns. + UNSPEC_ZIP2 ; Used in vector permute patterns. + UNSPEC_UZP1 ; Used in vector permute patterns. + UNSPEC_UZP2 ; Used in vector permute patterns. + UNSPEC_TRN1 ; Used in vector permute patterns. + UNSPEC_TRN2 ; Used in vector permute patterns. ]) ;; ------------------------------------------------------------------- @@ -652,6 +658,9 @@ (define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST]) +(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 + UNSPEC_TRN1 UNSPEC_TRN2 + UNSPEC_UZP1 UNSPEC_UZP2]) (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA]) @@ -757,3 +766,10 @@ (define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round") (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")]) +(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") + (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") + (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) + +(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") + (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") + (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])