From 2e2accf8ae33c76c34ba3a44463590cbacbb96d8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 14 Oct 2011 18:55:25 +0200 Subject: [PATCH] sse.md (vec_interleave_high, [...]): Add AVX2 expanders for VI_256 modes. * config/i386/sse.md (vec_interleave_high, vec_interleave_low): Add AVX2 expanders for VI_256 modes. * config/i386/i386.c (expand_vec_perm_interleave3): New function. (ix86_expand_vec_perm_builtin_1): Call it. From-SVN: r179995 --- gcc/ChangeLog | 8 +++++ gcc/config/i386/i386.c | 79 ++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/sse.md | 32 +++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 48b703c09e7..d43a5cf3e5a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2011-10-14 Jakub Jelinek + + * config/i386/sse.md (vec_interleave_high, + vec_interleave_low): Add AVX2 expanders for VI_256 + modes. + * config/i386/i386.c (expand_vec_perm_interleave3): New function. + (ix86_expand_vec_perm_builtin_1): Call it. + 2011-10-14 Georg-Johann Lay Fix thinko from r179765 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index df6267b664c..f09a37277fa 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -35474,6 +35474,82 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) return true; } +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify + a two vector permutation using 2 intra-lane interleave insns + and cross-lane shuffle for 32-byte vectors. */ + +static bool +expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) +{ + unsigned i, nelt; + rtx (*gen) (rtx, rtx, rtx); + + if (d->op0 == d->op1) + return false; + if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) + ; + else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) + ; + else + return false; + + nelt = d->nelt; + if (d->perm[0] != 0 && d->perm[0] != nelt / 2) + return false; + for (i = 0; i < nelt; i += 2) + if (d->perm[i] != d->perm[0] + i / 2 + || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) + return false; + + if (d->testing_p) + return true; + + switch (d->vmode) + { + case V32QImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv32qi; + else + gen = gen_vec_interleave_lowv32qi; + break; + case V16HImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv16hi; + else + gen = gen_vec_interleave_lowv16hi; + break; + case V8SImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv8si; + else + gen = gen_vec_interleave_lowv8si; + break; + case V4DImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv4di; + else + gen = gen_vec_interleave_lowv4di; + break; + case V8SFmode: + if (d->perm[0]) + gen = gen_vec_interleave_highv8sf; + else + gen = gen_vec_interleave_lowv8sf; + break; + case V4DFmode: + if (d->perm[0]) + gen = gen_vec_interleave_highv4df; + else + gen = gen_vec_interleave_lowv4df; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (d->target, d->op0, d->op1)); + return true; +} + /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word permutation with two pshufb insns and an ior. We should have already failed all two instruction sequences. */ @@ -35972,6 +36048,9 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_pshufb2 (d)) return true; + if (expand_vec_perm_interleave3 (d)) + return true; + /* Try sequences of four instructions. */ if (expand_vec_perm_vpshufb2_vpermq (d)) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b07f9a877a..016eae2d371 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6848,6 +6848,38 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "vec_interleave_high" + [(match_operand:VI_256 0 "register_operand" "=x") + (match_operand:VI_256 1 "register_operand" "x") + (match_operand:VI_256 2 "nonimmediate_operand" "xm")] + "TARGET_AVX2" +{ + rtx t1 = gen_reg_rtx (mode); + rtx t2 = gen_reg_rtx (mode); + emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2])); + emit_insn (gen_avx2_interleave_high (t2, operands[1], operands[2])); + emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), + gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); + DONE; +}) + +(define_expand "vec_interleave_low" + [(match_operand:VI_256 0 "register_operand" "=x") + (match_operand:VI_256 1 "register_operand" "x") + (match_operand:VI_256 2 "nonimmediate_operand" "xm")] + "TARGET_AVX2" +{ + rtx t1 = gen_reg_rtx (mode); + rtx t2 = gen_reg_rtx (mode); + emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2])); + emit_insn (gen_avx2_interleave_high (t2, operands[1], operands[2])); + emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), + gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); + DONE; +}) + ;; Modes handled by pinsr patterns. (define_mode_iterator PINSR_MODE [(V16QI "TARGET_SSE4_1") V8HI