[2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions.
PR target/62275 * config/arm/neon.md (neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode> <v_cmp_result>): New pattern. * config/arm/iterators.md (NEON_VCVT): New int iterator. * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. * config/arm/arm.c (arm_builtin_vectorized_function): Handle BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. PR target/62275 * gcc.target/arm/vect-lceilf_1.c: New test. * gcc.target/arm/vect-lfloorf_1.c: Likewise. * gcc.target/arm/vect-lroundf_1.c: Likewise. From-SVN: r214826
This commit is contained in:
parent
ababd93626
commit
e9e67af148
@ -1,3 +1,16 @@
|
||||
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR target/62275
|
||||
* config/arm/neon.md
|
||||
(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
|
||||
<v_cmp_result>): New pattern.
|
||||
* config/arm/iterators.md (NEON_VCVT): New int iterator.
|
||||
* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
|
||||
vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
|
||||
vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
|
||||
* config/arm/arm.c (arm_builtin_vectorized_function): Handle
|
||||
BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
|
||||
|
||||
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR target/62275
|
||||
|
@ -29946,6 +29946,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
|
||||
{
|
||||
enum machine_mode in_mode, out_mode;
|
||||
int in_n, out_n;
|
||||
bool out_unsigned_p = TYPE_UNSIGNED (type_out);
|
||||
|
||||
if (TREE_CODE (type_out) != VECTOR_TYPE
|
||||
|| TREE_CODE (type_in) != VECTOR_TYPE)
|
||||
@ -29991,6 +29992,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
|
||||
return ARM_FIND_VRINT_VARIANT (vrintz);
|
||||
case BUILT_IN_ROUNDF:
|
||||
return ARM_FIND_VRINT_VARIANT (vrinta);
|
||||
#undef ARM_CHECK_BUILTIN_MODE_1
|
||||
#define ARM_CHECK_BUILTIN_MODE_1(C) \
|
||||
(out_mode == SImode && out_n == C \
|
||||
&& in_mode == SFmode && in_n == C)
|
||||
|
||||
#define ARM_FIND_VCVT_VARIANT(N) \
|
||||
(ARM_CHECK_BUILTIN_MODE (2) \
|
||||
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
|
||||
: (ARM_CHECK_BUILTIN_MODE (4) \
|
||||
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
|
||||
: NULL_TREE))
|
||||
|
||||
#define ARM_FIND_VCVTU_VARIANT(N) \
|
||||
(ARM_CHECK_BUILTIN_MODE (2) \
|
||||
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
|
||||
: (ARM_CHECK_BUILTIN_MODE (4) \
|
||||
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
|
||||
: NULL_TREE))
|
||||
case BUILT_IN_LROUNDF:
|
||||
return out_unsigned_p
|
||||
? ARM_FIND_VCVTU_VARIANT (vcvta)
|
||||
: ARM_FIND_VCVT_VARIANT (vcvta);
|
||||
case BUILT_IN_LCEILF:
|
||||
return out_unsigned_p
|
||||
? ARM_FIND_VCVTU_VARIANT (vcvtp)
|
||||
: ARM_FIND_VCVT_VARIANT (vcvtp);
|
||||
case BUILT_IN_LFLOORF:
|
||||
return out_unsigned_p
|
||||
? ARM_FIND_VCVTU_VARIANT (vcvtm)
|
||||
: ARM_FIND_VCVT_VARIANT (vcvtm);
|
||||
#undef ARM_CHECK_BUILTIN_MODE
|
||||
#define ARM_CHECK_BUILTIN_MODE(C, N) \
|
||||
(out_mode == N##Imode && out_n == C \
|
||||
@ -30021,9 +30052,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
|
||||
}
|
||||
return NULL_TREE;
|
||||
}
|
||||
#undef ARM_FIND_VCVT_VARIANT
|
||||
#undef ARM_FIND_VCVTU_VARIANT
|
||||
#undef ARM_CHECK_BUILTIN_MODE
|
||||
#undef ARM_FIND_VRINT_VARIANT
|
||||
|
||||
|
||||
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
|
||||
static HOST_WIDE_INT
|
||||
arm_vector_alignment (const_tree type)
|
||||
|
@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf),
|
||||
VAR2 (RINT, vrintm, v2sf, v4sf),
|
||||
VAR2 (RINT, vrintz, v2sf, v4sf),
|
||||
VAR2 (RINT, vrintx, v2sf, v4sf),
|
||||
VAR1 (RINT, vcvtav2sf, v2si),
|
||||
VAR1 (RINT, vcvtav4sf, v4si),
|
||||
VAR1 (RINT, vcvtauv2sf, v2si),
|
||||
VAR1 (RINT, vcvtauv4sf, v4si),
|
||||
VAR1 (RINT, vcvtpv2sf, v2si),
|
||||
VAR1 (RINT, vcvtpv4sf, v4si),
|
||||
VAR1 (RINT, vcvtpuv2sf, v2si),
|
||||
VAR1 (RINT, vcvtpuv4sf, v4si),
|
||||
VAR1 (RINT, vcvtmv2sf, v2si),
|
||||
VAR1 (RINT, vcvtmv4sf, v4si),
|
||||
VAR1 (RINT, vcvtmuv2sf, v2si),
|
||||
VAR1 (RINT, vcvtmuv4sf, v4si),
|
||||
VAR1 (VTBL, vtbl1, v8qi),
|
||||
VAR1 (VTBL, vtbl2, v8qi),
|
||||
VAR1 (VTBL, vtbl3, v8qi),
|
||||
|
@ -223,6 +223,8 @@
|
||||
(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
|
||||
UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
|
||||
|
||||
(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
|
||||
|
||||
(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
|
||||
UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
|
||||
|
||||
|
@ -629,6 +629,17 @@
|
||||
[(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
|
||||
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
|
||||
(FIXUORS:<V_cmp_result> (unspec:VCVTF
|
||||
[(match_operand:VCVTF 1 "register_operand" "w")]
|
||||
NEON_VCVT)))]
|
||||
"TARGET_NEON && TARGET_FPU_ARMV8"
|
||||
"vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
|
||||
[(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
|
||||
(set_attr "predicable" "no")]
|
||||
)
|
||||
|
||||
(define_insn "ior<mode>3"
|
||||
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
|
||||
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
|
||||
|
@ -1,3 +1,10 @@
|
||||
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR target/62275
|
||||
* gcc.target/arm/vect-lceilf_1.c: New test.
|
||||
* gcc.target/arm/vect-lfloorf_1.c: Likewise.
|
||||
* gcc.target/arm/vect-lroundf_1.c: Likewise.
|
||||
|
||||
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
|
||||
PR target/62275
|
||||
|
18
gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
Normal file
18
gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_neon_ok } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
|
||||
/* { dg-add-options arm_v8_neon } */
|
||||
|
||||
#define N 32
|
||||
|
||||
void
|
||||
foo (int *output, float *input)
|
||||
{
|
||||
int i = 0;
|
||||
/* Vectorizable. */
|
||||
for (i = 0; i < N; i++)
|
||||
output[i] = __builtin_lceilf (input[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
18
gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
Normal file
18
gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_neon_ok } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
|
||||
/* { dg-add-options arm_v8_neon } */
|
||||
|
||||
#define N 32
|
||||
|
||||
void
|
||||
foo (int *output, float *input)
|
||||
{
|
||||
int i = 0;
|
||||
/* Vectorizable. */
|
||||
for (i = 0; i < N; i++)
|
||||
output[i] = __builtin_lfloorf (input[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
18
gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
Normal file
18
gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_neon_ok } */
|
||||
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
|
||||
/* { dg-add-options arm_v8_neon } */
|
||||
|
||||
#define N 32
|
||||
|
||||
void
|
||||
foo (int *output, float *input)
|
||||
{
|
||||
int i = 0;
|
||||
/* Vectorizable. */
|
||||
for (i = 0; i < N; i++)
|
||||
output[i] = __builtin_lroundf (input[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
Loading…
Reference in New Issue
Block a user