From e9e67af14893295ed164e269baa26c2325ae7cf1 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 2 Sep 2014 16:00:01 +0000 Subject: [PATCH] [2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions. PR target/62275 * config/arm/neon.md (neon_vcvt ): New pattern. * config/arm/iterators.md (NEON_VCVT): New int iterator. * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. * config/arm/arm.c (arm_builtin_vectorized_function): Handle BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. PR target/62275 * gcc.target/arm/vect-lceilf_1.c: New test. * gcc.target/arm/vect-lfloorf_1.c: Likewise. * gcc.target/arm/vect-lroundf_1.c: Likewise. From-SVN: r214826 --- gcc/ChangeLog | 13 +++++++ gcc/config/arm/arm.c | 34 +++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 12 +++++++ gcc/config/arm/iterators.md | 2 ++ gcc/config/arm/neon.md | 11 ++++++ gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/gcc.target/arm/vect-lceilf_1.c | 18 ++++++++++ gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c | 18 ++++++++++ gcc/testsuite/gcc.target/arm/vect-lroundf_1.c | 18 ++++++++++ 9 files changed, 133 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/vect-lceilf_1.c create mode 100644 gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c create mode 100644 gcc/testsuite/gcc.target/arm/vect-lroundf_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b46d009219d..7bfbd5a3108 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2014-09-02 Kyrylo Tkachov + + PR target/62275 + * config/arm/neon.md + (neon_vcvt + ): New pattern. + * config/arm/iterators.md (NEON_VCVT): New int iterator. + * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, + vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, + vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. + * config/arm/arm.c (arm_builtin_vectorized_function): Handle + BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. + 2014-09-02 Kyrylo Tkachov PR target/62275 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d8bfda3aa98..ba677abd5be 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -29946,6 +29946,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) { enum machine_mode in_mode, out_mode; int in_n, out_n; + bool out_unsigned_p = TYPE_UNSIGNED (type_out); if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) @@ -29991,6 +29992,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return ARM_FIND_VRINT_VARIANT (vrintz); case BUILT_IN_ROUNDF: return ARM_FIND_VRINT_VARIANT (vrinta); +#undef ARM_CHECK_BUILTIN_MODE_1 +#define ARM_CHECK_BUILTIN_MODE_1(C) \ + (out_mode == SImode && out_n == C \ + && in_mode == SFmode && in_n == C) + +#define ARM_FIND_VCVT_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \ + : NULL_TREE)) + +#define ARM_FIND_VCVTU_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \ + : NULL_TREE)) + case BUILT_IN_LROUNDF: + return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvta) + : ARM_FIND_VCVT_VARIANT (vcvta); + case BUILT_IN_LCEILF: + return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvtp) + : ARM_FIND_VCVT_VARIANT (vcvtp); + case BUILT_IN_LFLOORF: + return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvtm) + : ARM_FIND_VCVT_VARIANT (vcvtm); #undef ARM_CHECK_BUILTIN_MODE #define ARM_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ @@ -30021,9 +30052,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) } return NULL_TREE; } +#undef ARM_FIND_VCVT_VARIANT +#undef ARM_FIND_VCVTU_VARIANT #undef ARM_CHECK_BUILTIN_MODE #undef ARM_FIND_VRINT_VARIANT + /* The AAPCS sets the maximum alignment of a vector to 64 bits. */ static HOST_WIDE_INT arm_vector_alignment (const_tree type) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index f4531f36e7a..efe5bda965a 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf), VAR2 (RINT, vrintm, v2sf, v4sf), VAR2 (RINT, vrintz, v2sf, v4sf), VAR2 (RINT, vrintx, v2sf, v4sf), +VAR1 (RINT, vcvtav2sf, v2si), +VAR1 (RINT, vcvtav4sf, v4si), +VAR1 (RINT, vcvtauv2sf, v2si), +VAR1 (RINT, vcvtauv4sf, v4si), +VAR1 (RINT, vcvtpv2sf, v2si), +VAR1 (RINT, vcvtpv4sf, v4si), +VAR1 (RINT, vcvtpuv2sf, v2si), +VAR1 (RINT, vcvtpuv4sf, v4si), +VAR1 (RINT, vcvtmv2sf, v2si), +VAR1 (RINT, vcvtmv4sf, v4si), +VAR1 (RINT, vcvtmuv2sf, v2si), +VAR1 (RINT, vcvtmuv4sf, v4si), VAR1 (VTBL, vtbl1, v8qi), VAR1 (VTBL, vtbl2, v8qi), VAR1 (VTBL, vtbl3, v8qi), diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index f7e0e1483c9..021372a107a 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -223,6 +223,8 @@ (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN]) +(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA]) + (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index dc364eeb64e..354a105ee95 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -629,6 +629,17 @@ [(set_attr "type" "neon_fp_round_")] ) +(define_insn "neon_vcvt" + [(set (match_operand: 0 "register_operand" "=w") + (FIXUORS: (unspec:VCVTF + [(match_operand:VCVTF 1 "register_operand" "w")] + NEON_VCVT)))] + "TARGET_NEON && TARGET_FPU_ARMV8" + "vcvt.32.f32\\t%0, %1" + [(set_attr "type" "neon_fp_to_int_") + (set_attr "predicable" "no")] +) + (define_insn "ior3" [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cc340df2d6d..97e73f83672 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2014-09-02 Kyrylo Tkachov + + PR target/62275 + * gcc.target/arm/vect-lceilf_1.c: New test. + * gcc.target/arm/vect-lfloorf_1.c: Likewise. + * gcc.target/arm/vect-lroundf_1.c: Likewise. + 2014-09-02 Kyrylo Tkachov PR target/62275 diff --git a/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c b/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c new file mode 100644 index 00000000000..75705aef5d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (int *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_lceilf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c b/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c new file mode 100644 index 00000000000..298d54ed8e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (int *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_lfloorf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c b/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c new file mode 100644 index 00000000000..64438214c05 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (int *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_lroundf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */