[ARM] Vectorise bswap* in aarch32.

* config/arm/neon.md (neon_bswap<mode>): New pattern.
	* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
	(arm_init_neon_builtins): Handle NEON_BSWAP.
	Define required type nodes.
	(arm_expand_neon_builtin): Handle NEON_BSWAP.
	(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
	* config/arm/arm_neon_builtins.def (bswap): Define builtins.
	* config/arm/iterators.md (VDQHSD): New mode iterator.

	* lib/target-supports.exp (check_effective_target_vect_bswap):
	Specify arm*-*-* support.

From-SVN: r210967
This commit is contained in:
Kyrylo Tkachov 2014-05-27 10:56:25 +00:00 committed by Kyrylo Tkachov
parent 597f59975c
commit 7a10ea9f10
7 changed files with 112 additions and 13 deletions

View File

@ -1,3 +1,14 @@
2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/neon.md (neon_bswap<mode>): New pattern.
* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
(arm_init_neon_builtins): Handle NEON_BSWAP.
Define required type nodes.
(arm_expand_neon_builtin): Handle NEON_BSWAP.
(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
* config/arm/arm_neon_builtins.def (bswap): Define builtins.
* config/arm/iterators.md (VDQHSD): New mode iterator.
2014-05-27 Richard Biener <rguenther@suse.de>
* tree-vrp.c (vrp_evaluate_conditional_warnv_with_ops_using_ranges):

View File

@ -23065,6 +23065,7 @@ typedef enum {
NEON_BINOP,
NEON_TERNOP,
NEON_UNOP,
NEON_BSWAP,
NEON_GETLANE,
NEON_SETLANE,
NEON_CREATE,
@ -23531,14 +23532,19 @@ arm_init_neon_builtins (void)
tree V8QI_type_node;
tree V4HI_type_node;
tree V4UHI_type_node;
tree V4HF_type_node;
tree V2SI_type_node;
tree V2USI_type_node;
tree V2SF_type_node;
tree V16QI_type_node;
tree V8HI_type_node;
tree V8UHI_type_node;
tree V4SI_type_node;
tree V4USI_type_node;
tree V4SF_type_node;
tree V2DI_type_node;
tree V2UDI_type_node;
tree intUQI_type_node;
tree intUHI_type_node;
@ -23613,16 +23619,26 @@ arm_init_neon_builtins (void)
const_intDI_pointer_node = build_pointer_type (const_intDI_node);
const_float_pointer_node = build_pointer_type (const_float_node);
/* Unsigned integer types for various mode sizes. */
intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
/* Now create vector types based on our NEON element types. */
/* 64-bit vectors. */
V8QI_type_node =
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
V4HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
V4UHI_type_node =
build_vector_type_for_mode (intUHI_type_node, V4HImode);
V4HF_type_node =
build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
V2SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
V2USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V2SImode);
V2SF_type_node =
build_vector_type_for_mode (neon_float_type_node, V2SFmode);
/* 128-bit vectors. */
@ -23630,19 +23646,18 @@ arm_init_neon_builtins (void)
build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
V8HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
V8UHI_type_node =
build_vector_type_for_mode (intUHI_type_node, V8HImode);
V4SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
V4USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V4SImode);
V4SF_type_node =
build_vector_type_for_mode (neon_float_type_node, V4SFmode);
V2DI_type_node =
build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
/* Unsigned integer types for various mode sizes. */
intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
V2UDI_type_node =
build_vector_type_for_mode (intUDI_type_node, V2DImode);
(*lang_hooks.types.register_builtin_type) (intUQI_type_node,
@ -23677,8 +23692,6 @@ arm_init_neon_builtins (void)
if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
{
tree V4USI_type_node =
build_vector_type_for_mode (intUSI_type_node, V4SImode);
tree V16UQI_type_node =
build_vector_type_for_mode (intUQI_type_node, V16QImode);
@ -24023,6 +24036,31 @@ arm_init_neon_builtins (void)
ftype = build_function_type_list (return_type, eltype, NULL);
break;
}
case NEON_BSWAP:
{
tree eltype = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4HImode:
eltype = V4UHI_type_node;
break;
case V8HImode:
eltype = V8UHI_type_node;
break;
case V2SImode:
eltype = V2USI_type_node;
break;
case V4SImode:
eltype = V4USI_type_node;
break;
case V2DImode:
eltype = V2UDI_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (eltype, eltype, NULL);
break;
}
default:
gcc_unreachable ();
}
@ -25197,6 +25235,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
case NEON_SPLIT:
case NEON_FLOAT_WIDEN:
case NEON_FLOAT_NARROW:
case NEON_BSWAP:
case NEON_REINTERP:
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
@ -29677,8 +29716,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
int in_n, out_n;
if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE
|| !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
|| TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;
out_mode = TYPE_MODE (TREE_TYPE (type_out));
@ -29690,7 +29728,13 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
decl of the vectorized builtin for the appropriate vector mode.
NULL_TREE is returned if no such builtin is available. */
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C) \
#define ARM_CHECK_BUILTIN_MODE(C) \
(TARGET_NEON && TARGET_FPU_ARMV8 \
&& flag_unsafe_math_optimizations \
&& ARM_CHECK_BUILTIN_MODE_1 (C))
#undef ARM_CHECK_BUILTIN_MODE_1
#define ARM_CHECK_BUILTIN_MODE_1(C) \
(out_mode == SFmode && out_n == C \
&& in_mode == SFmode && in_n == C)
@ -29715,6 +29759,30 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return ARM_FIND_VRINT_VARIANT (vrintz);
case BUILT_IN_ROUNDF:
return ARM_FIND_VRINT_VARIANT (vrinta);
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
&& in_mode == N##Imode && in_n == C)
case BUILT_IN_BSWAP16:
if (ARM_CHECK_BUILTIN_MODE (4, H))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
else if (ARM_CHECK_BUILTIN_MODE (8, H))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
else
return NULL_TREE;
case BUILT_IN_BSWAP32:
if (ARM_CHECK_BUILTIN_MODE (2, S))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
else if (ARM_CHECK_BUILTIN_MODE (4, S))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
else
return NULL_TREE;
case BUILT_IN_BSWAP64:
if (ARM_CHECK_BUILTIN_MODE (2, D))
return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
else
return NULL_TREE;
default:
return NULL_TREE;
}

View File

@ -88,6 +88,7 @@ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
VAR2 (UNOP, vcnt, v8qi, v16qi),
VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),

View File

@ -116,6 +116,9 @@
;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
;; Vector modes for H, S and D types.
(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
;; Vector modes for float->int conversions.
(define_mode_iterator VCVTF [V2SF V4SF])

View File

@ -2547,6 +2547,14 @@
[(set_attr "type" "neon_qabs<q>")]
)
(define_insn "neon_bswap<mode>"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
"TARGET_NEON"
"vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
[(set_attr "type" "neon_rev<q>")]
)
(define_expand "neon_vneg<mode>"
[(match_operand:VDQW 0 "s_register_operand" "")
(match_operand:VDQW 1 "s_register_operand" "")

View File

@ -1,3 +1,8 @@
2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* lib/target-supports.exp (check_effective_target_vect_bswap):
Specify arm*-*-* support.
2014-05-27 Dominique d'Humieres <dominiq@lps.ens.fr>
PR testsuite/61319

View File

@ -3306,7 +3306,10 @@ proc check_effective_target_vect_bswap { } {
verbose "check_effective_target_vect_bswap: using cached result" 2
} else {
set et_vect_bswap_saved 0
if { [istarget aarch64*-*-*] } {
if { [istarget aarch64*-*-*]
|| ([istarget arm*-*-*]
&& [check_effective_target_arm_neon])
} {
set et_vect_bswap_saved 1
}
}