i386.h (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES_EPILOGUES): New tuning flag.
* i386.h (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES_EPILOGUES): New tuning flag. * x86-tune.def (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES): Define it. * i386.c (expand_small_movmem_or_setmem): New function. (expand_set_or_movmem_prologue_epilogue_by_misaligned_moves): New function (alg_usable_p): Add support for value ranges; cleanup. (ix86_expand_set_or_movmem): Add support for misaligned moves. From-SVN: r203937
This commit is contained in:
parent
7a1dd0fab3
commit
561400f0d1
|
@ -1,3 +1,12 @@
|
||||||
|
2013-10-22 Jan Hubicka <jh@suse.cz>
|
||||||
|
|
||||||
|
* i386.h (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES_EPILOGUES): New tuning flag.
|
||||||
|
* x86-tune.def (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES): Define it.
|
||||||
|
* i386.c (expand_small_movmem_or_setmem): New function.
|
||||||
|
(expand_set_or_movmem_prologue_epilogue_by_misaligned_moves): New function
|
||||||
|
(alg_usable_p): Add support for value ranges; cleanup.
|
||||||
|
(ix86_expand_set_or_movmem): Add support for misaligned moves.
|
||||||
|
|
||||||
2013-10-22 Sterling Augustine <saugustine@google.com>
|
2013-10-22 Sterling Augustine <saugustine@google.com>
|
||||||
|
|
||||||
* doc/invoke.texi: Document -ggnu-pubnames.
|
* doc/invoke.texi: Document -ggnu-pubnames.
|
||||||
|
|
|
@ -22731,6 +22731,315 @@ expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
|
||||||
return destmem;
|
return destmem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Test if COUNT&SIZE is nonzero and if so, expand movme
|
||||||
|
or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
|
||||||
|
and jump to DONE_LABEL. */
|
||||||
|
static void
|
||||||
|
expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
|
||||||
|
rtx destptr, rtx srcptr,
|
||||||
|
rtx value, rtx vec_value,
|
||||||
|
rtx count, int size,
|
||||||
|
rtx done_label, bool issetmem)
|
||||||
|
{
|
||||||
|
rtx label = ix86_expand_aligntest (count, size, false);
|
||||||
|
enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
|
||||||
|
rtx modesize;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
/* If we do not have vector value to copy, we must reduce size. */
|
||||||
|
if (issetmem)
|
||||||
|
{
|
||||||
|
if (!vec_value)
|
||||||
|
{
|
||||||
|
if (GET_MODE (value) == VOIDmode && size > 8)
|
||||||
|
mode = Pmode;
|
||||||
|
else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
|
||||||
|
mode = GET_MODE (value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
mode = GET_MODE (vec_value), value = vec_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Choose appropriate vector mode. */
|
||||||
|
if (size >= 32)
|
||||||
|
mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
|
||||||
|
else if (size >= 16)
|
||||||
|
mode = TARGET_SSE ? V16QImode : DImode;
|
||||||
|
srcmem = change_address (srcmem, mode, srcptr);
|
||||||
|
}
|
||||||
|
destmem = change_address (destmem, mode, destptr);
|
||||||
|
modesize = GEN_INT (GET_MODE_SIZE (mode));
|
||||||
|
gcc_assert (GET_MODE_SIZE (mode) <= size);
|
||||||
|
for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
|
||||||
|
{
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, gen_lowpart (mode, value));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
|
||||||
|
}
|
||||||
|
destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
|
||||||
|
}
|
||||||
|
|
||||||
|
destmem = offset_address (destmem, count, 1);
|
||||||
|
destmem = offset_address (destmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
|
||||||
|
GET_MODE_SIZE (mode));
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, gen_lowpart (mode, value));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcmem = offset_address (srcmem, count, 1);
|
||||||
|
srcmem = offset_address (srcmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
|
||||||
|
GET_MODE_SIZE (mode));
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
}
|
||||||
|
emit_jump_insn (gen_jump (done_label));
|
||||||
|
emit_barrier ();
|
||||||
|
|
||||||
|
emit_label (label);
|
||||||
|
LABEL_NUSES (label) = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
|
||||||
|
and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
|
||||||
|
bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
|
||||||
|
proceed with an loop copying SIZE bytes at once. Do moves in MODE.
|
||||||
|
DONE_LABEL is a label after the whole copying sequence. The label is created
|
||||||
|
on demand if *DONE_LABEL is NULL.
|
||||||
|
MIN_SIZE is minimal size of block copied. This value gets adjusted for new
|
||||||
|
bounds after the initial copies.
|
||||||
|
|
||||||
|
DESTMEM/SRCMEM are memory expressions pointing to the copies block,
|
||||||
|
DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
|
||||||
|
we will dispatch to a library call for large blocks.
|
||||||
|
|
||||||
|
In pseudocode we do:
|
||||||
|
|
||||||
|
if (COUNT < SIZE)
|
||||||
|
{
|
||||||
|
Assume that SIZE is 4. Bigger sizes are handled analogously
|
||||||
|
if (COUNT & 4)
|
||||||
|
{
|
||||||
|
copy 4 bytes from SRCPTR to DESTPTR
|
||||||
|
copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
|
||||||
|
goto done_label
|
||||||
|
}
|
||||||
|
if (!COUNT)
|
||||||
|
goto done_label;
|
||||||
|
copy 1 byte from SRCPTR to DESTPTR
|
||||||
|
if (COUNT & 2)
|
||||||
|
{
|
||||||
|
copy 2 bytes from SRCPTR to DESTPTR
|
||||||
|
copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
|
||||||
|
copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
|
||||||
|
|
||||||
|
OLD_DESPTR = DESTPTR;
|
||||||
|
Align DESTPTR up to DESIRED_ALIGN
|
||||||
|
SRCPTR += DESTPTR - OLD_DESTPTR
|
||||||
|
COUNT -= DEST_PTR - OLD_DESTPTR
|
||||||
|
if (DYNAMIC_CHECK)
|
||||||
|
Round COUNT down to multiple of SIZE
|
||||||
|
<< optional caller supplied zero size guard is here >>
|
||||||
|
<< optional caller suppplied dynamic check is here >>
|
||||||
|
<< caller supplied main copy loop is here >>
|
||||||
|
}
|
||||||
|
done_label:
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
|
||||||
|
rtx *destptr, rtx *srcptr,
|
||||||
|
enum machine_mode mode,
|
||||||
|
rtx value, rtx vec_value,
|
||||||
|
rtx *count,
|
||||||
|
rtx *done_label,
|
||||||
|
int size,
|
||||||
|
int desired_align,
|
||||||
|
int align,
|
||||||
|
unsigned HOST_WIDE_INT *min_size,
|
||||||
|
bool dynamic_check,
|
||||||
|
bool issetmem)
|
||||||
|
{
|
||||||
|
rtx loop_label = NULL, label;
|
||||||
|
int n;
|
||||||
|
rtx modesize;
|
||||||
|
int prolog_size = 0;
|
||||||
|
rtx mode_value;
|
||||||
|
|
||||||
|
/* Chose proper value to copy. */
|
||||||
|
if (issetmem && VECTOR_MODE_P (mode))
|
||||||
|
mode_value = vec_value;
|
||||||
|
else
|
||||||
|
mode_value = value;
|
||||||
|
gcc_assert (GET_MODE_SIZE (mode) <= size);
|
||||||
|
|
||||||
|
/* See if block is big or small, handle small blocks. */
|
||||||
|
if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
|
||||||
|
{
|
||||||
|
int size2 = size;
|
||||||
|
loop_label = gen_label_rtx ();
|
||||||
|
|
||||||
|
if (!*done_label)
|
||||||
|
*done_label = gen_label_rtx ();
|
||||||
|
|
||||||
|
emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
|
||||||
|
1, loop_label);
|
||||||
|
size2 >>= 1;
|
||||||
|
|
||||||
|
/* Handle sizes > 3. */
|
||||||
|
for (;size2 > 2; size2 >>= 1)
|
||||||
|
expand_small_movmem_or_setmem (destmem, srcmem,
|
||||||
|
*destptr, *srcptr,
|
||||||
|
value, vec_value,
|
||||||
|
*count,
|
||||||
|
size2, *done_label, issetmem);
|
||||||
|
/* Nothing to copy? Jump to DONE_LABEL if so */
|
||||||
|
emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
|
||||||
|
1, *done_label);
|
||||||
|
|
||||||
|
/* Do a byte copy. */
|
||||||
|
destmem = change_address (destmem, QImode, *destptr);
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, gen_lowpart (QImode, value));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcmem = change_address (srcmem, QImode, *srcptr);
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle sizes 2 and 3. */
|
||||||
|
label = ix86_expand_aligntest (*count, 2, false);
|
||||||
|
destmem = change_address (destmem, HImode, *destptr);
|
||||||
|
destmem = offset_address (destmem, *count, 1);
|
||||||
|
destmem = offset_address (destmem, GEN_INT (-2), 2);
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, gen_lowpart (HImode, value));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcmem = change_address (srcmem, HImode, *srcptr);
|
||||||
|
srcmem = offset_address (srcmem, *count, 1);
|
||||||
|
srcmem = offset_address (srcmem, GEN_INT (-2), 2);
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
}
|
||||||
|
|
||||||
|
emit_label (label);
|
||||||
|
LABEL_NUSES (label) = 1;
|
||||||
|
emit_jump_insn (gen_jump (*done_label));
|
||||||
|
emit_barrier ();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
|
||||||
|
|| UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
|
||||||
|
|
||||||
|
/* Start memcpy for COUNT >= SIZE. */
|
||||||
|
if (loop_label)
|
||||||
|
{
|
||||||
|
emit_label (loop_label);
|
||||||
|
LABEL_NUSES (loop_label) = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy first desired_align bytes. */
|
||||||
|
if (!issetmem)
|
||||||
|
srcmem = change_address (srcmem, mode, *srcptr);
|
||||||
|
destmem = change_address (destmem, mode, *destptr);
|
||||||
|
modesize = GEN_INT (GET_MODE_SIZE (mode));
|
||||||
|
for (n = 0; prolog_size < desired_align - align; n++)
|
||||||
|
{
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, mode_value);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
|
||||||
|
}
|
||||||
|
destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
|
||||||
|
prolog_size += GET_MODE_SIZE (mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Copy last SIZE bytes. */
|
||||||
|
destmem = offset_address (destmem, *count, 1);
|
||||||
|
destmem = offset_address (destmem,
|
||||||
|
GEN_INT (-size - prolog_size),
|
||||||
|
1);
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, mode_value);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcmem = offset_address (srcmem, *count, 1);
|
||||||
|
srcmem = offset_address (srcmem,
|
||||||
|
GEN_INT (-size - prolog_size),
|
||||||
|
1);
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
}
|
||||||
|
for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
|
||||||
|
{
|
||||||
|
destmem = offset_address (destmem, modesize, 1);
|
||||||
|
if (issetmem)
|
||||||
|
emit_move_insn (destmem, mode_value);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcmem = offset_address (srcmem, modesize, 1);
|
||||||
|
emit_move_insn (destmem, srcmem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Align destination. */
|
||||||
|
if (desired_align > 1 && desired_align > align)
|
||||||
|
{
|
||||||
|
rtx saveddest = *destptr;
|
||||||
|
|
||||||
|
gcc_assert (desired_align <= size);
|
||||||
|
/* Align destptr up, place it to new register. */
|
||||||
|
*destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
|
||||||
|
GEN_INT (prolog_size),
|
||||||
|
NULL_RTX, 1, OPTAB_DIRECT);
|
||||||
|
*destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
|
||||||
|
GEN_INT (-desired_align),
|
||||||
|
*destptr, 1, OPTAB_DIRECT);
|
||||||
|
/* See how many bytes we skipped. */
|
||||||
|
saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
|
||||||
|
*destptr,
|
||||||
|
saveddest, 1, OPTAB_DIRECT);
|
||||||
|
/* Adjust srcptr and count. */
|
||||||
|
if (!issetmem)
|
||||||
|
*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
|
||||||
|
*srcptr, 1, OPTAB_DIRECT);
|
||||||
|
*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
|
||||||
|
saveddest, *count, 1, OPTAB_DIRECT);
|
||||||
|
/* We copied at most size + prolog_size. */
|
||||||
|
if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
|
||||||
|
*min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
|
||||||
|
else
|
||||||
|
*min_size = 0;
|
||||||
|
|
||||||
|
/* Our loops always round down the bock size, but for dispatch to library
|
||||||
|
we need precise value. */
|
||||||
|
if (dynamic_check)
|
||||||
|
*count = expand_simple_binop (GET_MODE (*count), AND, *count,
|
||||||
|
GEN_INT (-size), *count, 1, OPTAB_DIRECT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gcc_assert (prolog_size == 0);
|
||||||
|
/* Decrease count, so we won't end up copying last word twice. */
|
||||||
|
if (!CONST_INT_P (*count))
|
||||||
|
*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
|
||||||
|
constm1_rtx, *count, 1, OPTAB_DIRECT);
|
||||||
|
else
|
||||||
|
*count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
|
||||||
|
if (*min_size)
|
||||||
|
*min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* This function is like the previous one, except here we know how many bytes
|
/* This function is like the previous one, except here we know how many bytes
|
||||||
need to be copied. That allows us to update alignment not only of DST, which
|
need to be copied. That allows us to update alignment not only of DST, which
|
||||||
is returned, but also of SRC, which is passed as a pointer for that
|
is returned, but also of SRC, which is passed as a pointer for that
|
||||||
|
@ -22805,62 +23114,99 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
|
/* Return true if ALG can be used in current context.
|
||||||
static enum stringop_alg
|
Assume we expand memset if MEMSET is true. */
|
||||||
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
|
static bool
|
||||||
int *dynamic_check, bool *noalign)
|
alg_usable_p (enum stringop_alg alg, bool memset)
|
||||||
{
|
{
|
||||||
const struct stringop_algs * algs;
|
if (alg == no_stringop)
|
||||||
bool optimize_for_speed;
|
return false;
|
||||||
|
if (alg == vector_loop)
|
||||||
|
return TARGET_SSE || TARGET_AVX;
|
||||||
/* Algorithms using the rep prefix want at least edi and ecx;
|
/* Algorithms using the rep prefix want at least edi and ecx;
|
||||||
additionally, memset wants eax and memcpy wants esi. Don't
|
additionally, memset wants eax and memcpy wants esi. Don't
|
||||||
consider such algorithms if the user has appropriated those
|
consider such algorithms if the user has appropriated those
|
||||||
registers for their own purposes. */
|
registers for their own purposes. */
|
||||||
bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
|
if (alg == rep_prefix_1_byte
|
||||||
|| (memset
|
|| alg == rep_prefix_4_byte
|
||||||
? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
|
|| alg == rep_prefix_8_byte)
|
||||||
*noalign = false;
|
return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
|
||||||
|
|| (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#define ALG_USABLE_P(alg) (rep_prefix_usable \
|
/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
|
||||||
|| (alg != rep_prefix_1_byte \
|
static enum stringop_alg
|
||||||
&& alg != rep_prefix_4_byte \
|
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
|
||||||
&& alg != rep_prefix_8_byte))
|
unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
|
||||||
|
bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
|
||||||
|
{
|
||||||
|
const struct stringop_algs * algs;
|
||||||
|
bool optimize_for_speed;
|
||||||
|
int max = -1;
|
||||||
const struct processor_costs *cost;
|
const struct processor_costs *cost;
|
||||||
|
int i;
|
||||||
|
bool any_alg_usable_p = false;
|
||||||
|
|
||||||
|
*noalign = false;
|
||||||
|
*dynamic_check = -1;
|
||||||
|
|
||||||
/* Even if the string operation call is cold, we still might spend a lot
|
/* Even if the string operation call is cold, we still might spend a lot
|
||||||
of time processing large blocks. */
|
of time processing large blocks. */
|
||||||
if (optimize_function_for_size_p (cfun)
|
if (optimize_function_for_size_p (cfun)
|
||||||
|| (optimize_insn_for_size_p ()
|
|| (optimize_insn_for_size_p ()
|
||||||
&& expected_size != -1 && expected_size < 256))
|
&& (max_size < 256
|
||||||
|
|| (expected_size != -1 && expected_size < 256))))
|
||||||
optimize_for_speed = false;
|
optimize_for_speed = false;
|
||||||
else
|
else
|
||||||
optimize_for_speed = true;
|
optimize_for_speed = true;
|
||||||
|
|
||||||
cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
|
cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
|
||||||
|
|
||||||
*dynamic_check = -1;
|
|
||||||
if (memset)
|
if (memset)
|
||||||
algs = &cost->memset[TARGET_64BIT != 0];
|
algs = &cost->memset[TARGET_64BIT != 0];
|
||||||
else
|
else
|
||||||
algs = &cost->memcpy[TARGET_64BIT != 0];
|
algs = &cost->memcpy[TARGET_64BIT != 0];
|
||||||
if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
|
|
||||||
|
/* See maximal size for user defined algorithm. */
|
||||||
|
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
|
||||||
|
{
|
||||||
|
enum stringop_alg candidate = algs->size[i].alg;
|
||||||
|
bool usable = alg_usable_p (candidate, memset);
|
||||||
|
any_alg_usable_p |= usable;
|
||||||
|
|
||||||
|
if (candidate != libcall && candidate && usable)
|
||||||
|
max = algs->size[i].max;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If expected size is not known but max size is small enough
|
||||||
|
so inline version is a win, set expected size into
|
||||||
|
the range. */
|
||||||
|
if (max > 1 && (unsigned HOST_WIDE_INT)max >= max_size && expected_size == -1)
|
||||||
|
expected_size = min_size / 2 + max_size / 2;
|
||||||
|
|
||||||
|
/* If user specified the algorithm, honnor it if possible. */
|
||||||
|
if (ix86_stringop_alg != no_stringop
|
||||||
|
&& alg_usable_p (ix86_stringop_alg, memset))
|
||||||
return ix86_stringop_alg;
|
return ix86_stringop_alg;
|
||||||
/* rep; movq or rep; movl is the smallest variant. */
|
/* rep; movq or rep; movl is the smallest variant. */
|
||||||
else if (!optimize_for_speed)
|
else if (!optimize_for_speed)
|
||||||
{
|
{
|
||||||
if (!count || (count & 3))
|
*noalign = true;
|
||||||
return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
|
if (!count || (count & 3) || (memset && !zero_memset))
|
||||||
|
return alg_usable_p (rep_prefix_1_byte, memset)
|
||||||
|
? rep_prefix_1_byte : loop_1_byte;
|
||||||
else
|
else
|
||||||
return rep_prefix_usable ? rep_prefix_4_byte : loop;
|
return alg_usable_p (rep_prefix_4_byte, memset)
|
||||||
|
? rep_prefix_4_byte : loop;
|
||||||
}
|
}
|
||||||
/* Very tiny blocks are best handled via the loop, REP is expensive to setup.
|
/* Very tiny blocks are best handled via the loop, REP is expensive to
|
||||||
*/
|
setup. */
|
||||||
else if (expected_size != -1 && expected_size < 4)
|
else if (expected_size != -1 && expected_size < 4)
|
||||||
return loop_1_byte;
|
return loop_1_byte;
|
||||||
else if (expected_size != -1)
|
else if (expected_size != -1)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
|
||||||
enum stringop_alg alg = libcall;
|
enum stringop_alg alg = libcall;
|
||||||
|
bool alg_noalign = false;
|
||||||
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
|
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
|
||||||
{
|
{
|
||||||
/* We get here if the algorithms that were not libcall-based
|
/* We get here if the algorithms that were not libcall-based
|
||||||
|
@ -22873,8 +23219,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
|
||||||
{
|
{
|
||||||
enum stringop_alg candidate = algs->size[i].alg;
|
enum stringop_alg candidate = algs->size[i].alg;
|
||||||
|
|
||||||
if (candidate != libcall && ALG_USABLE_P (candidate))
|
if (candidate != libcall && alg_usable_p (candidate, memset))
|
||||||
alg = candidate;
|
{
|
||||||
|
alg = candidate;
|
||||||
|
alg_noalign = algs->size[i].noalign;
|
||||||
|
}
|
||||||
/* Honor TARGET_INLINE_ALL_STRINGOPS by picking
|
/* Honor TARGET_INLINE_ALL_STRINGOPS by picking
|
||||||
last non-libcall inline algorithm. */
|
last non-libcall inline algorithm. */
|
||||||
if (TARGET_INLINE_ALL_STRINGOPS)
|
if (TARGET_INLINE_ALL_STRINGOPS)
|
||||||
|
@ -22883,17 +23232,19 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
|
||||||
but we are still forced to inline, run the heuristic below
|
but we are still forced to inline, run the heuristic below
|
||||||
that will pick code for medium sized blocks. */
|
that will pick code for medium sized blocks. */
|
||||||
if (alg != libcall)
|
if (alg != libcall)
|
||||||
return alg;
|
{
|
||||||
|
*noalign = alg_noalign;
|
||||||
|
return alg;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (ALG_USABLE_P (candidate))
|
else if (alg_usable_p (candidate, memset))
|
||||||
{
|
{
|
||||||
*noalign = algs->size[i].noalign;
|
*noalign = algs->size[i].noalign;
|
||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
|
|
||||||
}
|
}
|
||||||
/* When asked to inline the call anyway, try to pick meaningful choice.
|
/* When asked to inline the call anyway, try to pick meaningful choice.
|
||||||
We look for maximal size of block that is faster to copy by hand and
|
We look for maximal size of block that is faster to copy by hand and
|
||||||
|
@ -22903,22 +23254,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
|
||||||
If this turns out to be bad, we might simply specify the preferred
|
If this turns out to be bad, we might simply specify the preferred
|
||||||
choice in ix86_costs. */
|
choice in ix86_costs. */
|
||||||
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
|
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
|
||||||
&& (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
|
&& (algs->unknown_size == libcall
|
||||||
|
|| !alg_usable_p (algs->unknown_size, memset)))
|
||||||
{
|
{
|
||||||
int max = -1;
|
|
||||||
enum stringop_alg alg;
|
enum stringop_alg alg;
|
||||||
int i;
|
|
||||||
bool any_alg_usable_p = true;
|
|
||||||
|
|
||||||
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
|
|
||||||
{
|
|
||||||
enum stringop_alg candidate = algs->size[i].alg;
|
|
||||||
any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
|
|
||||||
|
|
||||||
if (candidate != libcall && candidate
|
|
||||||
&& ALG_USABLE_P (candidate))
|
|
||||||
max = algs->size[i].max;
|
|
||||||
}
|
|
||||||
/* If there aren't any usable algorithms, then recursing on
|
/* If there aren't any usable algorithms, then recursing on
|
||||||
smaller sizes isn't going to find anything. Just return the
|
smaller sizes isn't going to find anything. Just return the
|
||||||
simple byte-at-a-time copy loop. */
|
simple byte-at-a-time copy loop. */
|
||||||
|
@ -22931,15 +23271,16 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
|
||||||
}
|
}
|
||||||
if (max == -1)
|
if (max == -1)
|
||||||
max = 4096;
|
max = 4096;
|
||||||
alg = decide_alg (count, max / 2, memset, dynamic_check, noalign);
|
alg = decide_alg (count, max / 2, min_size, max_size, memset,
|
||||||
|
zero_memset, dynamic_check, noalign);
|
||||||
gcc_assert (*dynamic_check == -1);
|
gcc_assert (*dynamic_check == -1);
|
||||||
gcc_assert (alg != libcall);
|
gcc_assert (alg != libcall);
|
||||||
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
|
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
|
||||||
*dynamic_check = max;
|
*dynamic_check = max;
|
||||||
return alg;
|
return alg;
|
||||||
}
|
}
|
||||||
return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
|
return (alg_usable_p (algs->unknown_size, memset)
|
||||||
#undef ALG_USABLE_P
|
? algs->unknown_size : libcall);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Decide on alignment. We know that the operand is already aligned to ALIGN
|
/* Decide on alignment. We know that the operand is already aligned to ALIGN
|
||||||
|
@ -23073,27 +23414,46 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
|
||||||
|
|
||||||
/* Expand string move (memcpy) ot store (memset) operation. Use i386 string
|
/* Expand string move (memcpy) ot store (memset) operation. Use i386 string
|
||||||
operations when profitable. The code depends upon architecture, block size
|
operations when profitable. The code depends upon architecture, block size
|
||||||
and alignment, but always has the same overall structure:
|
and alignment, but always has one of the following overall structures:
|
||||||
|
|
||||||
1) Prologue guard: Conditional that jumps up to epilogues for small
|
Aligned move sequence:
|
||||||
blocks that can be handled by epilogue alone. This is faster
|
|
||||||
but also needed for correctness, since prologue assume the block
|
|
||||||
is larger than the desired alignment.
|
|
||||||
|
|
||||||
Optional dynamic check for size and libcall for large
|
1) Prologue guard: Conditional that jumps up to epilogues for small
|
||||||
blocks is emitted here too, with -minline-stringops-dynamically.
|
blocks that can be handled by epilogue alone. This is faster
|
||||||
|
but also needed for correctness, since prologue assume the block
|
||||||
|
is larger than the desired alignment.
|
||||||
|
|
||||||
2) Prologue: copy/set first few bytes in order to get destination
|
Optional dynamic check for size and libcall for large
|
||||||
aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
|
blocks is emitted here too, with -minline-stringops-dynamically.
|
||||||
than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
|
|
||||||
copied/set. We emit either a jump tree on power of two sized
|
|
||||||
blocks, or a byte loop.
|
|
||||||
|
|
||||||
3) Main body: the copying/storing loop itself, copying/storing in SIZE_NEEDED
|
2) Prologue: copy first few bytes in order to get destination
|
||||||
chunks with specified algorithm.
|
aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
|
||||||
|
than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
|
||||||
|
copied. We emit either a jump tree on power of two sized
|
||||||
|
blocks, or a byte loop.
|
||||||
|
|
||||||
4) Epilogue: code copying/storing tail of the block that is too small to be
|
3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
|
||||||
handled by main body (or up to size guarded by prologue guard). */
|
with specified algorithm.
|
||||||
|
|
||||||
|
4) Epilogue: code copying tail of the block that is too small to be
|
||||||
|
handled by main body (or up to size guarded by prologue guard).
|
||||||
|
|
||||||
|
Misaligned move sequence
|
||||||
|
|
||||||
|
1) missaligned move prologue/epilogue containing:
|
||||||
|
a) Prologue handling small memory blocks and jumping to done_label
|
||||||
|
(skipped if blocks are known to be large enough)
|
||||||
|
b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
|
||||||
|
needed by single possibly misaligned move
|
||||||
|
(skipped if alignment is not needed)
|
||||||
|
c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
|
||||||
|
|
||||||
|
2) Zero size guard dispatching to done_label, if needed
|
||||||
|
|
||||||
|
3) dispatch to library call, if needed,
|
||||||
|
|
||||||
|
3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
|
||||||
|
with specified algorithm. */
|
||||||
static bool
|
static bool
|
||||||
ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
rtx align_exp, rtx expected_align_exp,
|
rtx align_exp, rtx expected_align_exp,
|
||||||
|
@ -23118,6 +23478,10 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
bool noalign;
|
bool noalign;
|
||||||
enum machine_mode move_mode = VOIDmode;
|
enum machine_mode move_mode = VOIDmode;
|
||||||
int unroll_factor = 1;
|
int unroll_factor = 1;
|
||||||
|
/* TODO: Once vlaue ranges are available, fill in proper data. */
|
||||||
|
unsigned HOST_WIDE_INT min_size = 0;
|
||||||
|
unsigned HOST_WIDE_INT max_size = -1;
|
||||||
|
bool misaligned_prologue_used = false;
|
||||||
|
|
||||||
if (CONST_INT_P (align_exp))
|
if (CONST_INT_P (align_exp))
|
||||||
align = INTVAL (align_exp);
|
align = INTVAL (align_exp);
|
||||||
|
@ -23132,7 +23496,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
align = MEM_ALIGN (dst) / BITS_PER_UNIT;
|
align = MEM_ALIGN (dst) / BITS_PER_UNIT;
|
||||||
|
|
||||||
if (CONST_INT_P (count_exp))
|
if (CONST_INT_P (count_exp))
|
||||||
count = expected_size = INTVAL (count_exp);
|
min_size = max_size = count = expected_size = INTVAL (count_exp);
|
||||||
if (CONST_INT_P (expected_size_exp) && count == 0)
|
if (CONST_INT_P (expected_size_exp) && count == 0)
|
||||||
expected_size = INTVAL (expected_size_exp);
|
expected_size = INTVAL (expected_size_exp);
|
||||||
|
|
||||||
|
@ -23142,7 +23506,9 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
|
|
||||||
/* Step 0: Decide on preferred algorithm, desired alignment and
|
/* Step 0: Decide on preferred algorithm, desired alignment and
|
||||||
size of chunks to be copied by main loop. */
|
size of chunks to be copied by main loop. */
|
||||||
alg = decide_alg (count, expected_size, issetmem, &dynamic_check, &noalign);
|
alg = decide_alg (count, expected_size, min_size, max_size, issetmem,
|
||||||
|
issetmem && val_exp == const0_rtx,
|
||||||
|
&dynamic_check, &noalign);
|
||||||
if (alg == libcall)
|
if (alg == libcall)
|
||||||
return false;
|
return false;
|
||||||
gcc_assert (alg != no_stringop);
|
gcc_assert (alg != no_stringop);
|
||||||
|
@ -23234,10 +23600,20 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
}
|
}
|
||||||
gcc_assert (desired_align >= 1 && align >= 1);
|
gcc_assert (desired_align >= 1 && align >= 1);
|
||||||
|
|
||||||
|
/* Misaligned move sequences handles both prologues and epilogues at once.
|
||||||
|
Default code generation results in smaller code for large alignments and
|
||||||
|
also avoids redundant job when sizes are known precisely. */
|
||||||
|
misaligned_prologue_used = (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES
|
||||||
|
&& MAX (desired_align, epilogue_size_needed) <= 32
|
||||||
|
&& ((desired_align > align && !align_bytes)
|
||||||
|
|| (!count && epilogue_size_needed > 1)));
|
||||||
|
|
||||||
/* Do the cheap promotion to allow better CSE across the
|
/* Do the cheap promotion to allow better CSE across the
|
||||||
main loop and epilogue (ie one load of the big constant in the
|
main loop and epilogue (ie one load of the big constant in the
|
||||||
front of all code. */
|
front of all code.
|
||||||
if (issetmem && CONST_INT_P (val_exp))
|
For now the misaligned move sequences do not have fast path
|
||||||
|
without broadcasting. */
|
||||||
|
if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
|
||||||
{
|
{
|
||||||
if (alg == vector_loop)
|
if (alg == vector_loop)
|
||||||
{
|
{
|
||||||
|
@ -23253,8 +23629,45 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
desired_align, align);
|
desired_align, align);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Misaligned move sequences handles both prologues and epilogues at once.
|
||||||
|
Default code generation results in smaller code for large alignments and
|
||||||
|
also avoids redundant job when sizes are known precisely. */
|
||||||
|
if (misaligned_prologue_used)
|
||||||
|
{
|
||||||
|
/* Misaligned move prologue handled small blocks by itself. */
|
||||||
|
misaligned_prologue_used = true;
|
||||||
|
expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
|
||||||
|
(dst, src, &destreg, &srcreg,
|
||||||
|
move_mode, promoted_val, vec_promoted_val,
|
||||||
|
&count_exp,
|
||||||
|
&jump_around_label,
|
||||||
|
desired_align < align
|
||||||
|
? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
|
||||||
|
desired_align, align, &min_size, dynamic_check, issetmem);
|
||||||
|
if (!issetmem)
|
||||||
|
src = change_address (src, BLKmode, srcreg);
|
||||||
|
dst = change_address (dst, BLKmode, destreg);
|
||||||
|
set_mem_align (dst, desired_align * BITS_PER_UNIT);
|
||||||
|
epilogue_size_needed = 0;
|
||||||
|
if (need_zero_guard && !min_size)
|
||||||
|
{
|
||||||
|
/* It is possible that we copied enough so the main loop will not
|
||||||
|
execute. */
|
||||||
|
gcc_assert (size_needed > 1);
|
||||||
|
if (jump_around_label == NULL_RTX)
|
||||||
|
jump_around_label = gen_label_rtx ();
|
||||||
|
emit_cmp_and_jump_insns (count_exp,
|
||||||
|
GEN_INT (size_needed),
|
||||||
|
LTU, 0, counter_mode (count_exp), 1, jump_around_label);
|
||||||
|
if (expected_size == -1
|
||||||
|
|| expected_size < (desired_align - align) / 2 + size_needed)
|
||||||
|
predict_jump (REG_BR_PROB_BASE * 20 / 100);
|
||||||
|
else
|
||||||
|
predict_jump (REG_BR_PROB_BASE * 60 / 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
/* Ensure that alignment prologue won't copy past end of block. */
|
/* Ensure that alignment prologue won't copy past end of block. */
|
||||||
if (size_needed > 1 || (desired_align > 1 && desired_align > align))
|
else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
|
||||||
{
|
{
|
||||||
epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
|
epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
|
||||||
/* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
|
/* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
|
||||||
|
@ -23279,8 +23692,9 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
goto epilogue;
|
goto epilogue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (min_size < (unsigned HOST_WIDE_INT)epilogue_size_needed)
|
||||||
{
|
{
|
||||||
|
gcc_assert (max_size >= (unsigned HOST_WIDE_INT)epilogue_size_needed);
|
||||||
label = gen_label_rtx ();
|
label = gen_label_rtx ();
|
||||||
emit_cmp_and_jump_insns (count_exp,
|
emit_cmp_and_jump_insns (count_exp,
|
||||||
GEN_INT (epilogue_size_needed),
|
GEN_INT (epilogue_size_needed),
|
||||||
|
@ -23327,11 +23741,11 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
|
promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
|
||||||
desired_align, align);
|
desired_align, align);
|
||||||
|
|
||||||
if (desired_align > align)
|
if (desired_align > align && !misaligned_prologue_used)
|
||||||
{
|
{
|
||||||
if (align_bytes == 0)
|
if (align_bytes == 0)
|
||||||
{
|
{
|
||||||
/* Except for the first move in epilogue, we no longer know
|
/* Except for the first move in prologue, we no longer know
|
||||||
constant offset in aliasing info. It don't seems to worth
|
constant offset in aliasing info. It don't seems to worth
|
||||||
the pain to maintain it for the first move, so throw away
|
the pain to maintain it for the first move, so throw away
|
||||||
the info early. */
|
the info early. */
|
||||||
|
@ -23342,6 +23756,11 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
promoted_val, vec_promoted_val,
|
promoted_val, vec_promoted_val,
|
||||||
count_exp, align, desired_align,
|
count_exp, align, desired_align,
|
||||||
issetmem);
|
issetmem);
|
||||||
|
/* At most desired_align - align bytes are copied. */
|
||||||
|
if (min_size < (unsigned)(desired_align - align))
|
||||||
|
min_size = 0;
|
||||||
|
else
|
||||||
|
min_size -= desired_align - align;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -23358,8 +23777,11 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
count_exp = plus_constant (counter_mode (count_exp),
|
count_exp = plus_constant (counter_mode (count_exp),
|
||||||
count_exp, -align_bytes);
|
count_exp, -align_bytes);
|
||||||
count -= align_bytes;
|
count -= align_bytes;
|
||||||
|
min_size -= align_bytes;
|
||||||
|
max_size -= align_bytes;
|
||||||
}
|
}
|
||||||
if (need_zero_guard
|
if (need_zero_guard
|
||||||
|
&& !min_size
|
||||||
&& (count < (unsigned HOST_WIDE_INT) size_needed
|
&& (count < (unsigned HOST_WIDE_INT) size_needed
|
||||||
|| (align_bytes == 0
|
|| (align_bytes == 0
|
||||||
&& count < ((unsigned HOST_WIDE_INT) size_needed
|
&& count < ((unsigned HOST_WIDE_INT) size_needed
|
||||||
|
@ -23389,7 +23811,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
|
||||||
if (issetmem)
|
if (issetmem)
|
||||||
promoted_val = val_exp;
|
promoted_val = val_exp;
|
||||||
}
|
}
|
||||||
else if (label == NULL_RTX)
|
else if (label == NULL_RTX && !misaligned_prologue_used)
|
||||||
epilogue_size_needed = size_needed;
|
epilogue_size_needed = size_needed;
|
||||||
|
|
||||||
/* Step 3: Main loop. */
|
/* Step 3: Main loop. */
|
||||||
|
|
|
@ -350,6 +350,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
||||||
#define TARGET_PROMOTE_QImode ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
|
#define TARGET_PROMOTE_QImode ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
|
||||||
#define TARGET_FAST_PREFIX ix86_tune_features[X86_TUNE_FAST_PREFIX]
|
#define TARGET_FAST_PREFIX ix86_tune_features[X86_TUNE_FAST_PREFIX]
|
||||||
#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
|
#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
|
||||||
|
#define TARGET_MISALIGNED_MOVE_STRING_PROLOGUES_EPILOGUES \
|
||||||
|
ix86_tune_features[TARGET_MISALIGNED_MOVE_STRING_PROLOGUES]
|
||||||
#define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH]
|
#define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH]
|
||||||
#define TARGET_HIMODE_MATH ix86_tune_features[X86_TUNE_HIMODE_MATH]
|
#define TARGET_HIMODE_MATH ix86_tune_features[X86_TUNE_HIMODE_MATH]
|
||||||
#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
|
#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
|
||||||
|
|
|
@ -239,6 +239,15 @@ DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
|
||||||
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
|
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
|
||||||
DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA)
|
DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA)
|
||||||
|
|
||||||
|
/* TARGET_MISALIGNED_MOVE_STRING_PROLOGUES: Enable generation of compace
|
||||||
|
prologues and epilogues by issuing a misaligned moves. This require
|
||||||
|
target to handle misaligned moves and partial memory stalls resonably
|
||||||
|
well.
|
||||||
|
FIXME: This actualy may be a win on more targets than listed here. */
|
||||||
|
DEF_TUNE (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES,
|
||||||
|
"misaligned_move_string_prologues",
|
||||||
|
m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC)
|
||||||
|
|
||||||
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
|
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
|
||||||
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
|
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
|
||||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
|
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE
|
||||||
|
|
Loading…
Reference in New Issue