arm.c (neon_vdup_constant, [...]): New.

gcc/
	* config/arm/arm.c (neon_vdup_constant, neon_make_constant): New.
	(neon_expand_vector_init): Use them.  Also handle non-constant
	vectors with identical elements and vectors with only one
	non-constant element.
	(arm_print_operand): Handle 'y' modifier.
	* config/arm/arm-protos.h (neon_make_constant): Declare.
	* config/arm/neon.md (neon_vdup_n<mode>): Split into two
	patterns.  Use VX instead of VDQW for the first one.  Allow
	a VFP alternative and V32 modes for the second one.
	* config/arm/neon.ml (shape_elt): Add Alternatives.
	(ops): Use Alternatives for vdup lane instructions.
	* config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives.
	* config/arm/vec-common.md (mov<mode>): Use neon_make_constant.

	gcc/testsuite/
	* gcc.target/arm/neon: Regenerate generated tests.

From-SVN: r154094
This commit is contained in:
Daniel Jacobowitz 2009-11-11 14:23:03 +00:00 committed by Daniel Jacobowitz
parent 40f73786e9
commit 814a4c3b35
44 changed files with 276 additions and 53 deletions

View File

@ -1,3 +1,19 @@
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
* config/arm/arm.c (neon_vdup_constant, neon_make_constant): New.
(neon_expand_vector_init): Use them. Also handle non-constant
vectors with identical elements and vectors with only one
non-constant element.
(arm_print_operand): Handle 'y' modifier.
* config/arm/arm-protos.h (neon_make_constant): Declare.
* config/arm/neon.md (neon_vdup_n<mode>): Split into two
patterns. Use VX instead of VDQW for the first one. Allow
a VFP alternative and V32 modes for the second one.
* config/arm/neon.ml (shape_elt): Add Alternatives.
(ops): Use Alternatives for vdup lane instructions.
* config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives.
* config/arm/vec-common.md (mov<mode>): Use neon_make_constant.
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
* config/arm/neon.md (*neon_mov<mode>): Reject two non-register

View File

@ -68,6 +68,7 @@ extern char *neon_output_logic_immediate (const char *, rtx *,
enum machine_mode, int, int);
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
rtx (*) (rtx, rtx, rtx));
extern rtx neon_make_constant (rtx);
extern void neon_expand_vector_init (rtx, rtx);
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);

View File

@ -8085,25 +8085,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
}
}
/* Initialize a vector with non-constant elements. FIXME: We can do better
than the current implementation (building a vector on the stack and then
loading it) in many cases. See rs6000.c. */
/* If VALS is a vector constant that can be loaded into a register
using VDUP, generate instructions to do so and return an RTX to
assign to the register. Otherwise return NULL_RTX. */
static rtx
neon_vdup_constant (rtx vals)
{
enum machine_mode mode = GET_MODE (vals);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
bool all_same = true;
rtx x;
int i;
if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
return NULL_RTX;
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false;
}
if (!all_same)
/* The elements are not all the same. We could handle repeating
patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
{0, C, 0, C, 0, C, 0, C} which can be loaded using
vdup.i16). */
return NULL_RTX;
/* We can load this constant by using VDUP and a constant in a
single ARM register. This will be cheaper than a vector
load. */
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
UNSPEC_VDUP_N);
}
/* Generate code to load VALS, which is a PARALLEL containing only
constants (for vec_init) or CONST_VECTOR, efficiently into a
register. Returns an RTX to copy into the register, or NULL_RTX
for a PARALLEL that can not be converted into a CONST_VECTOR. */
rtx
neon_make_constant (rtx vals)
{
enum machine_mode mode = GET_MODE (vals);
rtx target;
rtx const_vec = NULL_RTX;
int n_elts = GET_MODE_NUNITS (mode);
int n_const = 0;
int i;
if (GET_CODE (vals) == CONST_VECTOR)
const_vec = vals;
else if (GET_CODE (vals) == PARALLEL)
{
/* A CONST_VECTOR must contain only CONST_INTs and
CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
Only store valid constants in a CONST_VECTOR. */
for (i = 0; i < n_elts; ++i)
{
rtx x = XVECEXP (vals, 0, i);
if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
n_const++;
}
if (n_const == n_elts)
const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
}
else
gcc_unreachable ();
if (const_vec != NULL
&& neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
return const_vec;
else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
pipeline cycle; creating the constant takes one or two ARM
pipeline cycles. */
return target;
else if (const_vec != NULL_RTX)
/* Load from constant pool. On Cortex-A8 this takes two cycles
(for either double or quad vectors). We can not take advantage
of single-cycle VLD1 because we need a PC-relative addressing
mode. */
return const_vec;
else
/* A PARALLEL containing something not valid inside CONST_VECTOR.
We can not construct an initializer. */
return NULL_RTX;
}
/* Initialize vector TARGET to VALS. */
void
neon_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner = GET_MODE_INNER (mode);
unsigned int i, n_elts = GET_MODE_NUNITS (mode);
rtx mem;
enum machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0, one_var = -1;
bool all_same = true;
rtx x, mem;
int i;
gcc_assert (VECTOR_MODE_P (mode));
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
if (!CONSTANT_P (x))
++n_var, one_var = i;
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false;
}
if (n_var == 0)
{
rtx constant = neon_make_constant (vals);
if (constant != NULL_RTX)
{
emit_move_insn (target, constant);
return;
}
}
/* Splat a single non-constant element if we can. */
if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
{
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
UNSPEC_VDUP_N)));
return;
}
/* One field is non-constant. Load constant then overwrite varying
field. This is more efficient than using the stack. */
if (n_var == 1)
{
rtx copy = copy_rtx (vals);
rtvec ops;
/* Load constant part of vector, substitute neighboring value for
varying element. */
XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
neon_expand_vector_init (target, copy);
/* Insert variable. */
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
ops = gen_rtvec (3, x, target, GEN_INT (one_var));
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
return;
}
/* Construct the vector in memory one field at a time
and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
XVECEXP (vals, 0, i));
emit_move_insn (adjust_address_nv (mem, inner_mode,
i * GET_MODE_SIZE (inner_mode)),
XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
@ -15253,6 +15399,30 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
/* Translate an S register number into a D register number and element index. */
case 'y':
{
int mode = GET_MODE (x);
int regno;
if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
{
output_operand_lossage ("invalid operand for code '%c'", code);
return;
}
regno = REGNO (x);
if (!VFP_REGNO_OK_FOR_SINGLE (regno))
{
output_operand_lossage ("invalid operand for code '%c'", code);
return;
}
regno = regno - FIRST_VFP_REGNUM;
fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
}
return;
/* Register specifier for vld1.16/vst1.16. Translate the S register
number into a D register number and element index. */
case 'z':

View File

@ -175,6 +175,7 @@ let rec analyze_shape shape =
| Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
| Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")"
in
match shape with
All (n, elt) -> commas analyze_shape_elt (n_things n elt) ""

View File

@ -2687,9 +2687,9 @@
})
(define_insn "neon_vdup_n<mode>"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:<V_elem> 1 "s_register_operand" "r")]
UNSPEC_VDUP_N))]
[(set (match_operand:VX 0 "s_register_operand" "=w")
(unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")]
UNSPEC_VDUP_N))]
"TARGET_NEON"
"vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
;; Assume this schedules like vmov.
@ -2697,6 +2697,19 @@
(set_attr "neon_type" "neon_bp_simple")]
)
(define_insn "neon_vdup_n<mode>"
[(set (match_operand:V32 0 "s_register_operand" "=w,w")
(unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")]
UNSPEC_VDUP_N))]
"TARGET_NEON"
"@
vdup%?.<V_sz_elem>\t%<V_reg>0, %1
vdup%?.<V_sz_elem>\t%<V_reg>0, %y1"
;; Assume this schedules like vmov.
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_bp_simple")]
)
(define_insn "neon_vdup_ndi"
[(set (match_operand:DI 0 "s_register_operand" "=w")
(unspec:DI [(match_operand:DI 1 "s_register_operand" "r")]

View File

@ -68,6 +68,7 @@ type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
| Element_of_dreg (* Used for "lane" variants. *)
| Element_of_qreg (* Likewise. *)
| All_elements_of_dreg (* Used for "dup" variants. *)
| Alternatives of shape_elt list (* Used for multiple valid operands *)
type shape_form = All of int * shape_elt
| Long
@ -1008,7 +1009,10 @@ let ops =
pf_su_8_64;
(* Set all lanes to the same value. *)
Vdup_n, [],
Vdup_n,
[Disassembles_as [Use_operands [| Dreg;
Alternatives [ Corereg;
Element_of_dreg ] |]]],
Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
pf_su_8_32;
Vdup_n,
@ -1016,7 +1020,10 @@ let ops =
Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
[S64; U64];
Vdup_n, [],
Vdup_n,
[Disassembles_as [Use_operands [| Qreg;
Alternatives [ Corereg;
Element_of_dreg ] |]]],
Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
pf_su_8_32;
Vdup_n,
@ -1028,7 +1035,10 @@ let ops =
(* These are just aliases for the above. *)
Vmov_n,
[Builtin_name "vdup_n"],
[Builtin_name "vdup_n";
Disassembles_as [Use_operands [| Dreg;
Alternatives [ Corereg;
Element_of_dreg ] |]]],
Use_operands [| Dreg; Corereg |],
"vmov_n", bits_1, pf_su_8_32;
Vmov_n,
@ -1038,7 +1048,10 @@ let ops =
Use_operands [| Dreg; Corereg |],
"vmov_n", notype_1, [S64; U64];
Vmov_n,
[Builtin_name "vdupQ_n"],
[Builtin_name "vdupQ_n";
Disassembles_as [Use_operands [| Qreg;
Alternatives [ Corereg;
Element_of_dreg ] |]]],
Use_operands [| Qreg; Corereg |],
"vmovQ_n", bits_1, pf_su_8_32;
Vmov_n,

View File

@ -42,6 +42,11 @@
{
if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (<MODE>mode, operands[1]);
else if (TARGET_NEON && CONSTANT_P (operands[1]))
{
operands[1] = neon_make_constant (operands[1]);
gcc_assert (operands[1] != NULL_RTX);
}
}
})

View File

@ -1,3 +1,7 @@
2009-11-11 Daniel Jacobowitz <dan@codesourcery.com>
* gcc.target/arm/neon: Regenerate generated tests.
2009-11-10 Jason Merrill <jason@redhat.com>
* g++.dg/init/placement5.C: New.

View File

@ -15,5 +15,5 @@ void test_vdupQ_nf32 (void)
out_float32x4_t = vdupq_n_f32 (arg0_float32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_np16 (void)
out_poly16x8_t = vdupq_n_p16 (arg0_poly16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_np8 (void)
out_poly8x16_t = vdupq_n_p8 (arg0_poly8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_ns16 (void)
out_int16x8_t = vdupq_n_s16 (arg0_int16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_ns32 (void)
out_int32x4_t = vdupq_n_s32 (arg0_int32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_ns8 (void)
out_int8x16_t = vdupq_n_s8 (arg0_int8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_nu16 (void)
out_uint16x8_t = vdupq_n_u16 (arg0_uint16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_nu32 (void)
out_uint32x4_t = vdupq_n_u32 (arg0_uint32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdupQ_nu8 (void)
out_uint8x16_t = vdupq_n_u8 (arg0_uint8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_nf32 (void)
out_float32x2_t = vdup_n_f32 (arg0_float32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_np16 (void)
out_poly16x4_t = vdup_n_p16 (arg0_poly16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_np8 (void)
out_poly8x8_t = vdup_n_p8 (arg0_poly8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_ns16 (void)
out_int16x4_t = vdup_n_s16 (arg0_int16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_ns32 (void)
out_int32x2_t = vdup_n_s32 (arg0_int32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_ns8 (void)
out_int8x8_t = vdup_n_s8 (arg0_int8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_nu16 (void)
out_uint16x4_t = vdup_n_u16 (arg0_uint16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_nu32 (void)
out_uint32x2_t = vdup_n_u32 (arg0_uint32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vdup_nu8 (void)
out_uint8x8_t = vdup_n_u8 (arg0_uint8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_nf32 (void)
out_float32x4_t = vmovq_n_f32 (arg0_float32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_np16 (void)
out_poly16x8_t = vmovq_n_p16 (arg0_poly16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_np8 (void)
out_poly8x16_t = vmovq_n_p8 (arg0_poly8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_ns16 (void)
out_int16x8_t = vmovq_n_s16 (arg0_int16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_ns32 (void)
out_int32x4_t = vmovq_n_s32 (arg0_int32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_ns8 (void)
out_int8x16_t = vmovq_n_s8 (arg0_int8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_nu16 (void)
out_uint16x8_t = vmovq_n_u16 (arg0_uint16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_nu32 (void)
out_uint32x4_t = vmovq_n_u32 (arg0_uint32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmovQ_nu8 (void)
out_uint8x16_t = vmovq_n_u8 (arg0_uint8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_nf32 (void)
out_float32x2_t = vmov_n_f32 (arg0_float32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_np16 (void)
out_poly16x4_t = vmov_n_p16 (arg0_poly16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_np8 (void)
out_poly8x8_t = vmov_n_p8 (arg0_poly8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_ns16 (void)
out_int16x4_t = vmov_n_s16 (arg0_int16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_ns32 (void)
out_int32x2_t = vmov_n_s32 (arg0_int32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_ns8 (void)
out_int8x8_t = vmov_n_s8 (arg0_int8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_nu16 (void)
out_uint16x4_t = vmov_n_u16 (arg0_uint16_t);
}
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_nu32 (void)
out_uint32x2_t = vmov_n_u32 (arg0_uint32_t);
}
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -15,5 +15,5 @@ void test_vmov_nu8 (void)
out_uint8x8_t = vmov_n_u8 (arg0_uint8_t);
}
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */