re PR target/55701 (Inline some instances of memset for ARM)
PR target/55701 * config/arm/arm.md (setmem): New pattern. * config/arm/arm-protos.h (struct tune_params): New fields. (arm_gen_setmem): New prototype. * config/arm/arm.c (arm_slowmul_tune): Initialize new fields. (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto. (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto. (arm_cortex_a8_tune, arm_cortex_a7_tune): Ditto. (arm_cortex_a15_tune, arm_cortex_a53_tune): Ditto. (arm_cortex_a57_tune, arm_cortex_a5_tune): Ditto. (arm_cortex_a9_tune, arm_cortex_a12_tune): Ditto. (arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune): Ditto. (arm_const_inline_cost): New function. (arm_block_set_max_insns): New function. (arm_block_set_non_vect_profit_p): New function. (arm_block_set_vect_profit_p): New function. (arm_block_set_unaligned_vect): New function. (arm_block_set_aligned_vect): New function. (arm_block_set_unaligned_non_vect): New function. (arm_block_set_aligned_non_vect): New function. (arm_block_set_vect, arm_gen_setmem): New functions. testsuite * gcc.target/arm/memset-inline-1.c: New test. * gcc.target/arm/memset-inline-2.c: New test. * gcc.target/arm/memset-inline-3.c: New test. * gcc.target/arm/memset-inline-4.c: New test. * gcc.target/arm/memset-inline-5.c: New test. * gcc.target/arm/memset-inline-6.c: New test. * gcc.target/arm/memset-inline-7.c: New test. * gcc.target/arm/memset-inline-8.c: New test. * gcc.target/arm/memset-inline-9.c: New test. * gcc.target/arm/memset-inline-10.c: New test. From-SVN: r214937
This commit is contained in:
parent
6f22122491
commit
ad42115965
@ -1,3 +1,27 @@
|
||||
2014-09-05 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR target/55701
|
||||
* config/arm/arm.md (setmem): New pattern.
|
||||
* config/arm/arm-protos.h (struct tune_params): New fields.
|
||||
(arm_gen_setmem): New prototype.
|
||||
* config/arm/arm.c (arm_slowmul_tune): Initialize new fields.
|
||||
(arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
|
||||
(arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
|
||||
(arm_cortex_a8_tune, arm_cortex_a7_tune): Ditto.
|
||||
(arm_cortex_a15_tune, arm_cortex_a53_tune): Ditto.
|
||||
(arm_cortex_a57_tune, arm_cortex_a5_tune): Ditto.
|
||||
(arm_cortex_a9_tune, arm_cortex_a12_tune): Ditto.
|
||||
(arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
|
||||
(arm_const_inline_cost): New function.
|
||||
(arm_block_set_max_insns): New function.
|
||||
(arm_block_set_non_vect_profit_p): New function.
|
||||
(arm_block_set_vect_profit_p): New function.
|
||||
(arm_block_set_unaligned_vect): New function.
|
||||
(arm_block_set_aligned_vect): New function.
|
||||
(arm_block_set_unaligned_non_vect): New function.
|
||||
(arm_block_set_aligned_non_vect): New function.
|
||||
(arm_block_set_vect, arm_gen_setmem): New functions.
|
||||
|
||||
2014-09-05 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
* config/arm/arm.md (arm_movqi_insn): Use Uh instead of m constraint.
|
||||
|
@ -278,6 +278,10 @@ struct tune_params
|
||||
/* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
|
||||
would be set. */
|
||||
bool disparage_partial_flag_setting_t16_encodings;
|
||||
/* Prefer to inline string operations like memset by using Neon. */
|
||||
bool string_ops_prefer_neon;
|
||||
/* Maximum number of instructions to inline calls to memset. */
|
||||
int max_insns_inline_memset;
|
||||
};
|
||||
|
||||
extern const struct tune_params *current_tune;
|
||||
@ -290,6 +294,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
|
||||
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
extern bool arm_gen_setmem (rtx *);
|
||||
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
|
||||
extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
|
||||
|
||||
|
@ -1698,7 +1698,9 @@ const struct tune_params arm_slowmul_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_fastmul_tune =
|
||||
@ -1715,7 +1717,9 @@ const struct tune_params arm_fastmul_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
/* StrongARM has early execution of branches, so a sequence that is worth
|
||||
@ -1735,7 +1739,9 @@ const struct tune_params arm_strongarm_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_xscale_tune =
|
||||
@ -1752,7 +1758,9 @@ const struct tune_params arm_xscale_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_9e_tune =
|
||||
@ -1769,7 +1777,9 @@ const struct tune_params arm_9e_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_v6t2_tune =
|
||||
@ -1786,7 +1796,9 @@ const struct tune_params arm_v6t2_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
|
||||
@ -1804,7 +1816,9 @@ const struct tune_params arm_cortex_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a8_tune =
|
||||
@ -1821,7 +1835,9 @@ const struct tune_params arm_cortex_a8_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
true, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a7_tune =
|
||||
@ -1838,7 +1854,9 @@ const struct tune_params arm_cortex_a7_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
true, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a15_tune =
|
||||
@ -1855,7 +1873,9 @@ const struct tune_params arm_cortex_a15_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
true, true /* Prefer 32-bit encodings. */
|
||||
true, true, /* Prefer 32-bit encodings. */
|
||||
true, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a53_tune =
|
||||
@ -1872,7 +1892,9 @@ const struct tune_params arm_cortex_a53_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a57_tune =
|
||||
@ -1889,7 +1911,9 @@ const struct tune_params arm_cortex_a57_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
true, true /* Prefer 32-bit encodings. */
|
||||
true, true, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
|
||||
@ -1909,7 +1933,9 @@ const struct tune_params arm_cortex_a5_tune =
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
true, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a9_tune =
|
||||
@ -1926,7 +1952,9 @@ const struct tune_params arm_cortex_a9_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a12_tune =
|
||||
@ -1943,7 +1971,9 @@ const struct tune_params arm_cortex_a12_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
true, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
|
||||
@ -1967,7 +1997,9 @@ const struct tune_params arm_v7m_tune =
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
|
||||
@ -1986,7 +2018,9 @@ const struct tune_params arm_v6m_tune =
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_fa726te_tune =
|
||||
@ -2003,7 +2037,9 @@ const struct tune_params arm_fa726te_tune =
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false, /* Prefer Neon for 64-bits bitops. */
|
||||
false, false /* Prefer 32-bit encodings. */
|
||||
false, false, /* Prefer 32-bit encodings. */
|
||||
false, /* Prefer Neon for stringops. */
|
||||
8 /* Maximum insns to inline memset. */
|
||||
};
|
||||
|
||||
|
||||
@ -16903,6 +16939,14 @@ arm_const_double_inline_cost (rtx val)
|
||||
NULL_RTX, NULL_RTX, 0, 0));
|
||||
}
|
||||
|
||||
/* Cost of loading a SImode constant. */
|
||||
static inline int
|
||||
arm_const_inline_cost (enum rtx_code code, rtx val)
|
||||
{
|
||||
return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
|
||||
NULL_RTX, NULL_RTX, 1, 0);
|
||||
}
|
||||
|
||||
/* Return true if it is worthwhile to split a 64-bit constant into two
|
||||
32-bit operations. This is the case if optimizing for size, or
|
||||
if we have load delay slots, or if one 32-bit part can be done with
|
||||
@ -31587,6 +31631,519 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
|
||||
|
||||
}
|
||||
|
||||
/* Maximum number of instructions to set block of memory. */
|
||||
static int
|
||||
arm_block_set_max_insns (void)
|
||||
{
|
||||
if (optimize_function_for_size_p (cfun))
|
||||
return 4;
|
||||
else
|
||||
return current_tune->max_insns_inline_memset;
|
||||
}
|
||||
|
||||
/* Return TRUE if it's profitable to set block of memory for
|
||||
non-vectorized case. VAL is the value to set the memory
|
||||
with. LENGTH is the number of bytes to set. ALIGN is the
|
||||
alignment of the destination memory in bytes. UNALIGNED_P
|
||||
is TRUE if we can only set the memory with instructions
|
||||
meeting alignment requirements. USE_STRD_P is TRUE if we
|
||||
can use strd to set the memory. */
|
||||
static bool
|
||||
arm_block_set_non_vect_profit_p (rtx val,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT align,
|
||||
bool unaligned_p, bool use_strd_p)
|
||||
{
|
||||
int num = 0;
|
||||
/* For leftovers in bytes of 0-7, we can set the memory block using
|
||||
strb/strh/str with minimum instruction number. */
|
||||
const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
|
||||
|
||||
if (unaligned_p)
|
||||
{
|
||||
num = arm_const_inline_cost (SET, val);
|
||||
num += length / align + length % align;
|
||||
}
|
||||
else if (use_strd_p)
|
||||
{
|
||||
num = arm_const_double_inline_cost (val);
|
||||
num += (length >> 3) + leftover[length & 7];
|
||||
}
|
||||
else
|
||||
{
|
||||
num = arm_const_inline_cost (SET, val);
|
||||
num += (length >> 2) + leftover[length & 3];
|
||||
}
|
||||
|
||||
/* We may be able to combine last pair STRH/STRB into a single STR
|
||||
by shifting one byte back. */
|
||||
if (unaligned_access && length > 3 && (length & 3) == 3)
|
||||
num--;
|
||||
|
||||
return (num <= arm_block_set_max_insns ());
|
||||
}
|
||||
|
||||
/* Return TRUE if it's profitable to set block of memory for
|
||||
vectorized case. LENGTH is the number of bytes to set.
|
||||
ALIGN is the alignment of destination memory in bytes.
|
||||
MODE is the vector mode used to set the memory. */
|
||||
static bool
|
||||
arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT align,
|
||||
enum machine_mode mode)
|
||||
{
|
||||
int num;
|
||||
bool unaligned_p = ((align & 3) != 0);
|
||||
unsigned int nelt = GET_MODE_NUNITS (mode);
|
||||
|
||||
/* Instruction loading constant value. */
|
||||
num = 1;
|
||||
/* Instructions storing the memory. */
|
||||
num += (length + nelt - 1) / nelt;
|
||||
/* Instructions adjusting the address expression. Only need to
|
||||
adjust address expression if it's 4 bytes aligned and bytes
|
||||
leftover can only be stored by mis-aligned store instruction. */
|
||||
if (!unaligned_p && (length & 3) != 0)
|
||||
num++;
|
||||
|
||||
/* Store the first 16 bytes using vst1:v16qi for the aligned case. */
|
||||
if (!unaligned_p && mode == V16QImode)
|
||||
num--;
|
||||
|
||||
return (num <= arm_block_set_max_insns ());
|
||||
}
|
||||
|
||||
/* Set a block of memory using vectorization instructions for the
|
||||
unaligned case. We fill the first LENGTH bytes of the memory
|
||||
area starting from DSTBASE with byte constant VALUE. ALIGN is
|
||||
the alignment requirement of memory. Return TRUE if succeeded. */
|
||||
static bool
|
||||
arm_block_set_unaligned_vect (rtx dstbase,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT value,
|
||||
unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
|
||||
rtx dst, mem;
|
||||
rtx val_elt, val_vec, reg;
|
||||
rtx rval[MAX_VECT_LEN];
|
||||
rtx (*gen_func) (rtx, rtx);
|
||||
enum machine_mode mode;
|
||||
unsigned HOST_WIDE_INT v = value;
|
||||
|
||||
gcc_assert ((align & 0x3) != 0);
|
||||
nelt_v8 = GET_MODE_NUNITS (V8QImode);
|
||||
nelt_v16 = GET_MODE_NUNITS (V16QImode);
|
||||
if (length >= nelt_v16)
|
||||
{
|
||||
mode = V16QImode;
|
||||
gen_func = gen_movmisalignv16qi;
|
||||
}
|
||||
else
|
||||
{
|
||||
mode = V8QImode;
|
||||
gen_func = gen_movmisalignv8qi;
|
||||
}
|
||||
nelt_mode = GET_MODE_NUNITS (mode);
|
||||
gcc_assert (length >= nelt_mode);
|
||||
/* Skip if it isn't profitable. */
|
||||
if (!arm_block_set_vect_profit_p (length, align, mode))
|
||||
return false;
|
||||
|
||||
dst = copy_addr_to_reg (XEXP (dstbase, 0));
|
||||
mem = adjust_automodify_address (dstbase, mode, dst, 0);
|
||||
|
||||
v = sext_hwi (v, BITS_PER_WORD);
|
||||
val_elt = GEN_INT (v);
|
||||
for (j = 0; j < nelt_mode; j++)
|
||||
rval[j] = val_elt;
|
||||
|
||||
reg = gen_reg_rtx (mode);
|
||||
val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
|
||||
/* Emit instruction loading the constant value. */
|
||||
emit_move_insn (reg, val_vec);
|
||||
|
||||
/* Handle nelt_mode bytes in a vector. */
|
||||
for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
|
||||
{
|
||||
emit_insn ((*gen_func) (mem, reg));
|
||||
if (i + 2 * nelt_mode <= length)
|
||||
emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
|
||||
}
|
||||
|
||||
/* If there are not less than nelt_v8 bytes leftover, we must be in
|
||||
V16QI mode. */
|
||||
gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
|
||||
|
||||
/* Handle (8, 16) bytes leftover. */
|
||||
if (i + nelt_v8 < length)
|
||||
{
|
||||
emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
|
||||
/* We are shifting bytes back, set the alignment accordingly. */
|
||||
if ((length & 1) != 0 && align >= 2)
|
||||
set_mem_align (mem, BITS_PER_UNIT);
|
||||
|
||||
emit_insn (gen_movmisalignv16qi (mem, reg));
|
||||
}
|
||||
/* Handle (0, 8] bytes leftover. */
|
||||
else if (i < length && i + nelt_v8 >= length)
|
||||
{
|
||||
if (mode == V16QImode)
|
||||
{
|
||||
reg = gen_lowpart (V8QImode, reg);
|
||||
mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
|
||||
}
|
||||
emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
|
||||
+ (nelt_mode - nelt_v8))));
|
||||
/* We are shifting bytes back, set the alignment accordingly. */
|
||||
if ((length & 1) != 0 && align >= 2)
|
||||
set_mem_align (mem, BITS_PER_UNIT);
|
||||
|
||||
emit_insn (gen_movmisalignv8qi (mem, reg));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Set a block of memory using vectorization instructions for the
|
||||
aligned case. We fill the first LENGTH bytes of the memory area
|
||||
starting from DSTBASE with byte constant VALUE. ALIGN is the
|
||||
alignment requirement of memory. Return TRUE if succeeded. */
|
||||
static bool
|
||||
arm_block_set_aligned_vect (rtx dstbase,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT value,
|
||||
unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
|
||||
rtx dst, addr, mem;
|
||||
rtx val_elt, val_vec, reg;
|
||||
rtx rval[MAX_VECT_LEN];
|
||||
enum machine_mode mode;
|
||||
unsigned HOST_WIDE_INT v = value;
|
||||
|
||||
gcc_assert ((align & 0x3) == 0);
|
||||
nelt_v8 = GET_MODE_NUNITS (V8QImode);
|
||||
nelt_v16 = GET_MODE_NUNITS (V16QImode);
|
||||
if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
|
||||
mode = V16QImode;
|
||||
else
|
||||
mode = V8QImode;
|
||||
|
||||
nelt_mode = GET_MODE_NUNITS (mode);
|
||||
gcc_assert (length >= nelt_mode);
|
||||
/* Skip if it isn't profitable. */
|
||||
if (!arm_block_set_vect_profit_p (length, align, mode))
|
||||
return false;
|
||||
|
||||
dst = copy_addr_to_reg (XEXP (dstbase, 0));
|
||||
|
||||
v = sext_hwi (v, BITS_PER_WORD);
|
||||
val_elt = GEN_INT (v);
|
||||
for (j = 0; j < nelt_mode; j++)
|
||||
rval[j] = val_elt;
|
||||
|
||||
reg = gen_reg_rtx (mode);
|
||||
val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
|
||||
/* Emit instruction loading the constant value. */
|
||||
emit_move_insn (reg, val_vec);
|
||||
|
||||
i = 0;
|
||||
/* Handle first 16 bytes specially using vst1:v16qi instruction. */
|
||||
if (mode == V16QImode)
|
||||
{
|
||||
mem = adjust_automodify_address (dstbase, mode, dst, 0);
|
||||
emit_insn (gen_movmisalignv16qi (mem, reg));
|
||||
i += nelt_mode;
|
||||
/* Handle (8, 16) bytes leftover using vst1:v16qi again. */
|
||||
if (i + nelt_v8 < length && i + nelt_v16 > length)
|
||||
{
|
||||
emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
|
||||
mem = adjust_automodify_address (dstbase, mode, dst, 0);
|
||||
/* We are shifting bytes back, set the alignment accordingly. */
|
||||
if ((length & 0x3) == 0)
|
||||
set_mem_align (mem, BITS_PER_UNIT * 4);
|
||||
else if ((length & 0x1) == 0)
|
||||
set_mem_align (mem, BITS_PER_UNIT * 2);
|
||||
else
|
||||
set_mem_align (mem, BITS_PER_UNIT);
|
||||
|
||||
emit_insn (gen_movmisalignv16qi (mem, reg));
|
||||
return true;
|
||||
}
|
||||
/* Fall through for bytes leftover. */
|
||||
mode = V8QImode;
|
||||
nelt_mode = GET_MODE_NUNITS (mode);
|
||||
reg = gen_lowpart (V8QImode, reg);
|
||||
}
|
||||
|
||||
/* Handle 8 bytes in a vector. */
|
||||
for (; (i + nelt_mode <= length); i += nelt_mode)
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, mode, addr, i);
|
||||
emit_move_insn (mem, reg);
|
||||
}
|
||||
|
||||
/* Handle single word leftover by shifting 4 bytes back. We can
|
||||
use aligned access for this case. */
|
||||
if (i + UNITS_PER_WORD == length)
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
|
||||
mem = adjust_automodify_address (dstbase, mode,
|
||||
addr, i - UNITS_PER_WORD);
|
||||
/* We are shifting 4 bytes back, set the alignment accordingly. */
|
||||
if (align > UNITS_PER_WORD)
|
||||
set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
|
||||
|
||||
emit_move_insn (mem, reg);
|
||||
}
|
||||
/* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
|
||||
We have to use unaligned access for this case. */
|
||||
else if (i < length)
|
||||
{
|
||||
emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
|
||||
mem = adjust_automodify_address (dstbase, mode, dst, 0);
|
||||
/* We are shifting bytes back, set the alignment accordingly. */
|
||||
if ((length & 1) == 0)
|
||||
set_mem_align (mem, BITS_PER_UNIT * 2);
|
||||
else
|
||||
set_mem_align (mem, BITS_PER_UNIT);
|
||||
|
||||
emit_insn (gen_movmisalignv8qi (mem, reg));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Set a block of memory using plain strh/strb instructions, only
|
||||
using instructions allowed by ALIGN on processor. We fill the
|
||||
first LENGTH bytes of the memory area starting from DSTBASE
|
||||
with byte constant VALUE. ALIGN is the alignment requirement
|
||||
of memory. */
|
||||
static bool
|
||||
arm_block_set_unaligned_non_vect (rtx dstbase,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT value,
|
||||
unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
unsigned int i;
|
||||
rtx dst, addr, mem;
|
||||
rtx val_exp, val_reg, reg;
|
||||
enum machine_mode mode;
|
||||
HOST_WIDE_INT v = value;
|
||||
|
||||
gcc_assert (align == 1 || align == 2);
|
||||
|
||||
if (align == 2)
|
||||
v |= (value << BITS_PER_UNIT);
|
||||
|
||||
v = sext_hwi (v, BITS_PER_WORD);
|
||||
val_exp = GEN_INT (v);
|
||||
/* Skip if it isn't profitable. */
|
||||
if (!arm_block_set_non_vect_profit_p (val_exp, length,
|
||||
align, true, false))
|
||||
return false;
|
||||
|
||||
dst = copy_addr_to_reg (XEXP (dstbase, 0));
|
||||
mode = (align == 2 ? HImode : QImode);
|
||||
val_reg = force_reg (SImode, val_exp);
|
||||
reg = gen_lowpart (mode, val_reg);
|
||||
|
||||
for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, mode, addr, i);
|
||||
emit_move_insn (mem, reg);
|
||||
}
|
||||
|
||||
/* Handle single byte leftover. */
|
||||
if (i + 1 == length)
|
||||
{
|
||||
reg = gen_lowpart (QImode, val_reg);
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, QImode, addr, i);
|
||||
emit_move_insn (mem, reg);
|
||||
i++;
|
||||
}
|
||||
|
||||
gcc_assert (i == length);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Set a block of memory using plain strd/str/strh/strb instructions,
|
||||
to permit unaligned copies on processors which support unaligned
|
||||
semantics for those instructions. We fill the first LENGTH bytes
|
||||
of the memory area starting from DSTBASE with byte constant VALUE.
|
||||
ALIGN is the alignment requirement of memory. */
|
||||
static bool
|
||||
arm_block_set_aligned_non_vect (rtx dstbase,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT value,
|
||||
unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
unsigned int i;
|
||||
rtx dst, addr, mem;
|
||||
rtx val_exp, val_reg, reg;
|
||||
unsigned HOST_WIDE_INT v;
|
||||
bool use_strd_p;
|
||||
|
||||
use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
|
||||
&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
|
||||
|
||||
v = (value | (value << 8) | (value << 16) | (value << 24));
|
||||
if (length < UNITS_PER_WORD)
|
||||
v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
|
||||
|
||||
if (use_strd_p)
|
||||
v |= (v << BITS_PER_WORD);
|
||||
else
|
||||
v = sext_hwi (v, BITS_PER_WORD);
|
||||
|
||||
val_exp = GEN_INT (v);
|
||||
/* Skip if it isn't profitable. */
|
||||
if (!arm_block_set_non_vect_profit_p (val_exp, length,
|
||||
align, false, use_strd_p))
|
||||
{
|
||||
if (!use_strd_p)
|
||||
return false;
|
||||
|
||||
/* Try without strd. */
|
||||
v = (v >> BITS_PER_WORD);
|
||||
v = sext_hwi (v, BITS_PER_WORD);
|
||||
val_exp = GEN_INT (v);
|
||||
use_strd_p = false;
|
||||
if (!arm_block_set_non_vect_profit_p (val_exp, length,
|
||||
align, false, use_strd_p))
|
||||
return false;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
dst = copy_addr_to_reg (XEXP (dstbase, 0));
|
||||
/* Handle double words using strd if possible. */
|
||||
if (use_strd_p)
|
||||
{
|
||||
val_reg = force_reg (DImode, val_exp);
|
||||
reg = val_reg;
|
||||
for (; (i + 8 <= length); i += 8)
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, DImode, addr, i);
|
||||
emit_move_insn (mem, reg);
|
||||
}
|
||||
}
|
||||
else
|
||||
val_reg = force_reg (SImode, val_exp);
|
||||
|
||||
/* Handle words. */
|
||||
reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
|
||||
for (; (i + 4 <= length); i += 4)
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, SImode, addr, i);
|
||||
if ((align & 3) == 0)
|
||||
emit_move_insn (mem, reg);
|
||||
else
|
||||
emit_insn (gen_unaligned_storesi (mem, reg));
|
||||
}
|
||||
|
||||
/* Merge last pair of STRH and STRB into a STR if possible. */
|
||||
if (unaligned_access && i > 0 && (i + 3) == length)
|
||||
{
|
||||
addr = plus_constant (Pmode, dst, i - 1);
|
||||
mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
|
||||
/* We are shifting one byte back, set the alignment accordingly. */
|
||||
if ((align & 1) == 0)
|
||||
set_mem_align (mem, BITS_PER_UNIT);
|
||||
|
||||
/* Most likely this is an unaligned access, and we can't tell at
|
||||
compilation time. */
|
||||
emit_insn (gen_unaligned_storesi (mem, reg));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Handle half word leftover. */
|
||||
if (i + 2 <= length)
|
||||
{
|
||||
reg = gen_lowpart (HImode, val_reg);
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, HImode, addr, i);
|
||||
if ((align & 1) == 0)
|
||||
emit_move_insn (mem, reg);
|
||||
else
|
||||
emit_insn (gen_unaligned_storehi (mem, reg));
|
||||
|
||||
i += 2;
|
||||
}
|
||||
|
||||
/* Handle single byte leftover. */
|
||||
if (i + 1 == length)
|
||||
{
|
||||
reg = gen_lowpart (QImode, val_reg);
|
||||
addr = plus_constant (Pmode, dst, i);
|
||||
mem = adjust_automodify_address (dstbase, QImode, addr, i);
|
||||
emit_move_insn (mem, reg);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Set a block of memory using vectorization instructions for both
|
||||
aligned and unaligned cases. We fill the first LENGTH bytes of
|
||||
the memory area starting from DSTBASE with byte constant VALUE.
|
||||
ALIGN is the alignment requirement of memory. */
|
||||
static bool
|
||||
arm_block_set_vect (rtx dstbase,
|
||||
unsigned HOST_WIDE_INT length,
|
||||
unsigned HOST_WIDE_INT value,
|
||||
unsigned HOST_WIDE_INT align)
|
||||
{
|
||||
/* Check whether we need to use unaligned store instruction. */
|
||||
if (((align & 3) != 0 || (length & 3) != 0)
|
||||
/* Check whether unaligned store instruction is available. */
|
||||
&& (!unaligned_access || BYTES_BIG_ENDIAN))
|
||||
return false;
|
||||
|
||||
if ((align & 3) == 0)
|
||||
return arm_block_set_aligned_vect (dstbase, length, value, align);
|
||||
else
|
||||
return arm_block_set_unaligned_vect (dstbase, length, value, align);
|
||||
}
|
||||
|
||||
/* Expand string store operation. Firstly we try to do that by using
|
||||
vectorization instructions, then try with ARM unaligned access and
|
||||
double-word store if profitable. OPERANDS[0] is the destination,
|
||||
OPERANDS[1] is the number of bytes, operands[2] is the value to
|
||||
initialize the memory, OPERANDS[3] is the known alignment of the
|
||||
destination. */
|
||||
bool
|
||||
arm_gen_setmem (rtx *operands)
|
||||
{
|
||||
rtx dstbase = operands[0];
|
||||
unsigned HOST_WIDE_INT length;
|
||||
unsigned HOST_WIDE_INT value;
|
||||
unsigned HOST_WIDE_INT align;
|
||||
|
||||
if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
|
||||
return false;
|
||||
|
||||
length = UINTVAL (operands[1]);
|
||||
if (length > 64)
|
||||
return false;
|
||||
|
||||
value = (UINTVAL (operands[2]) & 0xFF);
|
||||
align = UINTVAL (operands[3]);
|
||||
if (TARGET_NEON && length >= 8
|
||||
&& current_tune->string_ops_prefer_neon
|
||||
&& arm_block_set_vect (dstbase, length, value, align))
|
||||
return true;
|
||||
|
||||
if (!unaligned_access && (align & 3) != 0)
|
||||
return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
|
||||
|
||||
return arm_block_set_aligned_non_vect (dstbase, length, value, align);
|
||||
}
|
||||
|
||||
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
|
||||
|
||||
static unsigned HOST_WIDE_INT
|
||||
|
@ -6716,6 +6716,20 @@
|
||||
})
|
||||
|
||||
|
||||
(define_expand "setmemsi"
|
||||
[(match_operand:BLK 0 "general_operand" "")
|
||||
(match_operand:SI 1 "const_int_operand" "")
|
||||
(match_operand:SI 2 "const_int_operand" "")
|
||||
(match_operand:SI 3 "const_int_operand" "")]
|
||||
"TARGET_32BIT"
|
||||
{
|
||||
if (arm_gen_setmem (operands))
|
||||
DONE;
|
||||
|
||||
FAIL;
|
||||
})
|
||||
|
||||
|
||||
;; Move a block of memory if it is word aligned and MORE than 2 words long.
|
||||
;; We could let this apply for blocks of less than this, but it clobbers so
|
||||
;; many registers that there is then probably a better way.
|
||||
|
@ -1,3 +1,17 @@
|
||||
2014-09-05 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR target/55701
|
||||
* gcc.target/arm/memset-inline-1.c: New test.
|
||||
* gcc.target/arm/memset-inline-2.c: New test.
|
||||
* gcc.target/arm/memset-inline-3.c: New test.
|
||||
* gcc.target/arm/memset-inline-4.c: New test.
|
||||
* gcc.target/arm/memset-inline-5.c: New test.
|
||||
* gcc.target/arm/memset-inline-6.c: New test.
|
||||
* gcc.target/arm/memset-inline-7.c: New test.
|
||||
* gcc.target/arm/memset-inline-8.c: New test.
|
||||
* gcc.target/arm/memset-inline-9.c: New test.
|
||||
* gcc.target/arm/memset-inline-10.c: New test.
|
||||
|
||||
2014-09-04 Kaz Kojima <kkojima@gcc.gnu.org>
|
||||
|
||||
* gcc.c-torture/execute/pr44683.c: Remove dg-options for sh*-*-*.
|
||||
|
39
gcc/testsuite/gcc.target/arm/memset-inline-1.c
Normal file
39
gcc/testsuite/gcc.target/arm/memset-inline-1.c
Normal file
@ -0,0 +1,39 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
memset (a, -1, 14);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo ();
|
||||
check ((signed char *)a, 14, sizeof (a), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]*memset" { target { arm_thumb2_ok } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
95
gcc/testsuite/gcc.target/arm/memset-inline-10.c
Normal file
95
gcc/testsuite/gcc.target/arm/memset-inline-10.c
Normal file
@ -0,0 +1,95 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=armv7-a -mfloat-abi=hard -mfpu=neon -O2" } */
|
||||
|
||||
#define BUF 100
|
||||
long a[BUF];
|
||||
|
||||
typedef unsigned int size_t;
|
||||
typedef unsigned int wchar_t;
|
||||
void *memset (void *s, int c, size_t n);
|
||||
struct printf_info
|
||||
{
|
||||
int prec;
|
||||
int width;
|
||||
wchar_t spec;
|
||||
unsigned int is_long_double:1;
|
||||
unsigned int is_short:1;
|
||||
unsigned int is_long:1;
|
||||
unsigned int alt:1;
|
||||
unsigned int space:1;
|
||||
unsigned int left:1;
|
||||
unsigned int showsign:1;
|
||||
unsigned int group:1;
|
||||
unsigned int extra:1;
|
||||
unsigned int is_char:1;
|
||||
unsigned int wide:1;
|
||||
unsigned int i18n:1;
|
||||
unsigned int __pad:4;
|
||||
unsigned short int user;
|
||||
wchar_t pad;
|
||||
};
|
||||
|
||||
void bar (int *alt, int *space, int *left, int *showsign,
|
||||
int *group,
|
||||
int *is_long_double,
|
||||
int *is_short,
|
||||
int *is_long,
|
||||
int *width,
|
||||
int *prec,
|
||||
int *use_outdigits,
|
||||
unsigned int *pad,
|
||||
wchar_t *spec);
|
||||
void __printf_fp (char *s, struct printf_info *pinfo);
|
||||
int foo(char *s)
|
||||
{
|
||||
int alt = 0;
|
||||
int space = 0;
|
||||
int left = 0;
|
||||
int showsign = 0;
|
||||
int group = 0;
|
||||
int is_long_double = 0;
|
||||
int is_short = 0;
|
||||
int is_long = 0;
|
||||
int width = 0;
|
||||
int prec = -1;
|
||||
int use_outdigits = 0;
|
||||
unsigned int pad = L' ';
|
||||
wchar_t spec;
|
||||
|
||||
bar (&alt, &space, &left, &showsign, &group, &is_long_double,
|
||||
&is_short, &is_long, &width, &prec, &use_outdigits, &pad, &spec);
|
||||
|
||||
a[1] = a[0] + a[2] + a[3] + a[4] + a[5] + a[6];
|
||||
a[2] = a[1] + a[3] + a[5] + a[5] + a[6] + a[7];
|
||||
a[3] = a[2] + a[5] + a[7] + a[6] + a[7] + a[8];
|
||||
a[4] = a[3] + a[7] + a[11] + a[7] + a[8] + a[9];
|
||||
a[5] = a[5] + a[11] + a[13] + a[8] + a[9] + a[10];
|
||||
a[6] = a[7] + a[13] + a[17] + a[9] + a[10] + a[11];
|
||||
a[7] = a[11] + a[17] + a[19] + a[10] + a[11] + a[12];
|
||||
a[8] = a[17] + a[19] + a[23] + a[29] + a[31] + a[37];
|
||||
|
||||
{
|
||||
struct printf_info info;
|
||||
memset (&info, 0, sizeof (struct printf_info));
|
||||
info.prec = prec;
|
||||
info.width = width;
|
||||
info.spec = spec;
|
||||
info.is_long_double = is_long_double;
|
||||
info.is_short = is_short;
|
||||
info.is_long = is_long;
|
||||
info.alt = alt;
|
||||
info.space = space;
|
||||
info.left = left;
|
||||
info.showsign = showsign;
|
||||
info.group = group;
|
||||
info.pad = pad;
|
||||
info.extra = 0;
|
||||
info.i18n = use_outdigits;
|
||||
info.wide = sizeof (wchar_t) != 1;
|
||||
|
||||
__printf_fp (s, &info);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
38
gcc/testsuite/gcc.target/arm/memset-inline-2.c
Normal file
38
gcc/testsuite/gcc.target/arm/memset-inline-2.c
Normal file
@ -0,0 +1,38 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-save-temps -Os -fno-inline" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
memset (a, -1, 14);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo ();
|
||||
check ((signed char *)a, 14, sizeof (a), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/* { dg-final { scan-assembler "bl?\[ \t\]*memset" { target { ! arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
40
gcc/testsuite/gcc.target/arm/memset-inline-3.c
Normal file
40
gcc/testsuite/gcc.target/arm/memset-inline-3.c
Normal file
@ -0,0 +1,40 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
memset (a, -1, 7);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo ();
|
||||
check ((signed char *)a, 7, sizeof (a), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]*memset" { target { ! arm_thumb1_ok } } } } */
|
||||
/* { dg-final { scan-assembler-not "strh" { target { arm_unaligned } } } } */
|
||||
/* { dg-final { scan-assembler-not "strb" { target { arm_unaligned } } } } */
|
68
gcc/testsuite/gcc.target/arm/memset-inline-4.c
Normal file
68
gcc/testsuite/gcc.target/arm/memset-inline-4.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mcpu=cortex-a9" } { "" } } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mtune=cortex-a9" } { "" } } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options "arm_neon" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
int a[LEN];
|
||||
int b[LEN];
|
||||
int c[LEN];
|
||||
void
|
||||
foo1 (void)
|
||||
{
|
||||
memset (a, -1, 8);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo2 (void)
|
||||
{
|
||||
memset (b, 1, 12);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo3 (void)
|
||||
{
|
||||
memset (c, 1, 13);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
foo1 ();
|
||||
check ((signed char *)a, 8, sizeof (a), -1);
|
||||
|
||||
foo2 ();
|
||||
check ((signed char *)b, 12, sizeof (b), 1);
|
||||
|
||||
foo3 ();
|
||||
check ((signed char *)c, 13, sizeof (c), 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]+memset" { target { ! arm_thumb1_ok } } } } */
|
||||
/* { dg-final { scan-assembler-times "vst1\.8" 1 { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler "vstr" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
78
gcc/testsuite/gcc.target/arm/memset-inline-5.c
Normal file
78
gcc/testsuite/gcc.target/arm/memset-inline-5.c
Normal file
@ -0,0 +1,78 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mcpu=cortex-a9" } { "" } } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mtune=cortex-a9" } { "" } } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options "arm_neon" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
int a[LEN];
|
||||
int b[LEN];
|
||||
int c[LEN];
|
||||
int d[LEN];
|
||||
void
|
||||
foo1 (void)
|
||||
{
|
||||
memset (a, -1, 16);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo2 (void)
|
||||
{
|
||||
memset (b, 1, 25);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo3 (void)
|
||||
{
|
||||
memset (c, -1, 19);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo4 (void)
|
||||
{
|
||||
memset (d, 1, 23);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo1 ();
|
||||
check ((signed char *)a, 16, sizeof (a), -1);
|
||||
|
||||
foo2 ();
|
||||
check ((signed char *)b, 25, sizeof (b), 1);
|
||||
|
||||
foo3 ();
|
||||
check ((signed char *)c, 19, sizeof (c), -1);
|
||||
|
||||
foo4 ();
|
||||
check ((signed char *)d, 23, sizeof (d), 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]+memset" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler "vst1" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler-not "vstr" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
68
gcc/testsuite/gcc.target/arm/memset-inline-6.c
Normal file
68
gcc/testsuite/gcc.target/arm/memset-inline-6.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mcpu=cortex-a9" } { "" } } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mtune=cortex-a9" } { "" } } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options "arm_neon" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
int a[LEN];
|
||||
int b[LEN];
|
||||
int c[LEN];
|
||||
void
|
||||
foo1 (void)
|
||||
{
|
||||
memset (a, -1, 20);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo2 (void)
|
||||
{
|
||||
memset (b, 1, 24);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
foo3 (void)
|
||||
{
|
||||
memset (c, -1, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo1 ();
|
||||
check ((signed char *)a, 20, sizeof (a), -1);
|
||||
|
||||
foo2 ();
|
||||
check ((signed char *)b, 24, sizeof (b), 1);
|
||||
|
||||
foo3 ();
|
||||
check ((signed char *)c, 32, sizeof (c), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]+memset" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler-times "vst1" 3 { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler-times "vstr" 4 { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
||||
|
171
gcc/testsuite/gcc.target/arm/memset-inline-7.c
Normal file
171
gcc/testsuite/gcc.target/arm/memset-inline-7.c
Normal file
@ -0,0 +1,171 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
int b[LEN];
|
||||
|
||||
void
|
||||
init (signed char *arr, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
arr[i] = 0;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
#define TEST(a,l,v) \
|
||||
init ((signed char*)(a), sizeof (a)); \
|
||||
memset ((a), (v), (l)); \
|
||||
check ((signed char *)(a), (l), sizeof (a), (v));
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
TEST (a, 1, -1);
|
||||
TEST (a, 2, -1);
|
||||
TEST (a, 3, -1);
|
||||
TEST (a, 4, -1);
|
||||
TEST (a, 5, -1);
|
||||
TEST (a, 6, -1);
|
||||
TEST (a, 7, -1);
|
||||
TEST (a, 8, -1);
|
||||
TEST (a, 9, 1);
|
||||
TEST (a, 10, -1);
|
||||
TEST (a, 11, 1);
|
||||
TEST (a, 12, -1);
|
||||
TEST (a, 13, 1);
|
||||
TEST (a, 14, -1);
|
||||
TEST (a, 15, 1);
|
||||
TEST (a, 16, -1);
|
||||
TEST (a, 17, 1);
|
||||
TEST (a, 18, -1);
|
||||
TEST (a, 19, 1);
|
||||
TEST (a, 20, -1);
|
||||
TEST (a, 21, 1);
|
||||
TEST (a, 22, -1);
|
||||
TEST (a, 23, 1);
|
||||
TEST (a, 24, -1);
|
||||
TEST (a, 25, 1);
|
||||
TEST (a, 26, -1);
|
||||
TEST (a, 27, 1);
|
||||
TEST (a, 28, -1);
|
||||
TEST (a, 29, 1);
|
||||
TEST (a, 30, -1);
|
||||
TEST (a, 31, 1);
|
||||
TEST (a, 32, -1);
|
||||
TEST (a, 33, 1);
|
||||
TEST (a, 34, -1);
|
||||
TEST (a, 35, 1);
|
||||
TEST (a, 36, -1);
|
||||
TEST (a, 37, 1);
|
||||
TEST (a, 38, -1);
|
||||
TEST (a, 39, 1);
|
||||
TEST (a, 40, -1);
|
||||
TEST (a, 41, 1);
|
||||
TEST (a, 42, -1);
|
||||
TEST (a, 43, 1);
|
||||
TEST (a, 44, -1);
|
||||
TEST (a, 45, 1);
|
||||
TEST (a, 46, -1);
|
||||
TEST (a, 47, 1);
|
||||
TEST (a, 48, -1);
|
||||
TEST (a, 49, 1);
|
||||
TEST (a, 50, -1);
|
||||
TEST (a, 51, 1);
|
||||
TEST (a, 52, -1);
|
||||
TEST (a, 53, 1);
|
||||
TEST (a, 54, -1);
|
||||
TEST (a, 55, 1);
|
||||
TEST (a, 56, -1);
|
||||
TEST (a, 57, 1);
|
||||
TEST (a, 58, -1);
|
||||
TEST (a, 59, 1);
|
||||
TEST (a, 60, -1);
|
||||
TEST (a, 61, 1);
|
||||
TEST (a, 62, -1);
|
||||
TEST (a, 63, 1);
|
||||
TEST (a, 64, -1);
|
||||
|
||||
TEST (b, 1, -1);
|
||||
TEST (b, 2, -1);
|
||||
TEST (b, 3, -1);
|
||||
TEST (b, 4, -1);
|
||||
TEST (b, 5, -1);
|
||||
TEST (b, 6, -1);
|
||||
TEST (b, 7, -1);
|
||||
TEST (b, 8, -1);
|
||||
TEST (b, 9, 1);
|
||||
TEST (b, 10, -1);
|
||||
TEST (b, 11, 1);
|
||||
TEST (b, 12, -1);
|
||||
TEST (b, 13, 1);
|
||||
TEST (b, 14, -1);
|
||||
TEST (b, 15, 1);
|
||||
TEST (b, 16, -1);
|
||||
TEST (b, 17, 1);
|
||||
TEST (b, 18, -1);
|
||||
TEST (b, 19, 1);
|
||||
TEST (b, 20, -1);
|
||||
TEST (b, 21, 1);
|
||||
TEST (b, 22, -1);
|
||||
TEST (b, 23, 1);
|
||||
TEST (b, 24, -1);
|
||||
TEST (b, 25, 1);
|
||||
TEST (b, 26, -1);
|
||||
TEST (b, 27, 1);
|
||||
TEST (b, 28, -1);
|
||||
TEST (b, 29, 1);
|
||||
TEST (b, 30, -1);
|
||||
TEST (b, 31, 1);
|
||||
TEST (b, 32, -1);
|
||||
TEST (b, 33, 1);
|
||||
TEST (b, 34, -1);
|
||||
TEST (b, 35, 1);
|
||||
TEST (b, 36, -1);
|
||||
TEST (b, 37, 1);
|
||||
TEST (b, 38, -1);
|
||||
TEST (b, 39, 1);
|
||||
TEST (b, 40, -1);
|
||||
TEST (b, 41, 1);
|
||||
TEST (b, 42, -1);
|
||||
TEST (b, 43, 1);
|
||||
TEST (b, 44, -1);
|
||||
TEST (b, 45, 1);
|
||||
TEST (b, 46, -1);
|
||||
TEST (b, 47, 1);
|
||||
TEST (b, 48, -1);
|
||||
TEST (b, 49, 1);
|
||||
TEST (b, 50, -1);
|
||||
TEST (b, 51, 1);
|
||||
TEST (b, 52, -1);
|
||||
TEST (b, 53, 1);
|
||||
TEST (b, 54, -1);
|
||||
TEST (b, 55, 1);
|
||||
TEST (b, 56, -1);
|
||||
TEST (b, 57, 1);
|
||||
TEST (b, 58, -1);
|
||||
TEST (b, 59, 1);
|
||||
TEST (b, 60, -1);
|
||||
TEST (b, 61, 1);
|
||||
TEST (b, 62, -1);
|
||||
TEST (b, 63, 1);
|
||||
TEST (b, 64, -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
44
gcc/testsuite/gcc.target/arm/memset-inline-8.c
Normal file
44
gcc/testsuite/gcc.target/arm/memset-inline-8.c
Normal file
@ -0,0 +1,44 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mcpu=cortex-a9" } { "" } } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mtune=cortex-a9" } { "" } } */
|
||||
/* { dg-options "-save-temps -O2 -fno-inline" } */
|
||||
/* { dg-add-options "arm_neon" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
memset (a, -1, 14);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo ();
|
||||
check ((signed char *)a, 14, sizeof (a), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]*memset" { target { arm_thumb2_ok } } } } */
|
||||
/* { dg-final { scan-assembler "vst1" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler-not "vstr" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
42
gcc/testsuite/gcc.target/arm/memset-inline-9.c
Normal file
42
gcc/testsuite/gcc.target/arm/memset-inline-9.c
Normal file
@ -0,0 +1,42 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mcpu=cortex-a9" } { "" } } */
|
||||
/* { dg-skip-if "Don't inline memset using neon instructions on cortex-a9" { *-*-* } { "-mtune=cortex-a9" } { "" } } */
|
||||
/* { dg-options "-save-temps -Os -fno-inline" } */
|
||||
/* { dg-add-options "arm_neon" } */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define LEN (100)
|
||||
short a[LEN];
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
memset (a, -1, 14);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check (signed char *arr, int idx, int len, int v)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < idx; i++)
|
||||
if (arr[i] != v)
|
||||
abort ();
|
||||
|
||||
for (i = idx; i < len; i++)
|
||||
if (arr[i] != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
foo ();
|
||||
check ((signed char *)a, 14, sizeof (a), -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/* { dg-final { scan-assembler-not "bl?\[ \t\]*memset" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { scan-assembler "vst1" { target { arm_little_endian && arm_neon } } } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
Loading…
Reference in New Issue
Block a user