rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED): Add macro to say we can efficiently handle overlapping unaligned loads.
2016-10-09 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> * config/rs6000/rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED): Add macro to say we can efficiently handle overlapping unaligned loads. * config/rs6000/rs6000.c (expand_block_compare): Avoid generating poor code for processors older than p8. From-SVN: r240908
This commit is contained in:
parent
4815e7d405
commit
87b44b83c0
|
@ -1,3 +1,11 @@
|
||||||
|
2016-10-09 Aaron Sawdey <acsawdey@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* config/rs6000/rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED):
|
||||||
|
Add macro to say we can efficiently handle overlapping unaligned
|
||||||
|
loads.
|
||||||
|
* config/rs6000/rs6000.c (expand_block_compare): Avoid generating
|
||||||
|
poor code for processors older than p8.
|
||||||
|
|
||||||
2016-10-09 Eric Botcazou <ebotcazou@adacore.com>
|
2016-10-09 Eric Botcazou <ebotcazou@adacore.com>
|
||||||
|
|
||||||
* gen-pass-instances.awk: Remove GNUism.
|
* gen-pass-instances.awk: Remove GNUism.
|
||||||
|
|
|
@ -18771,6 +18771,14 @@ expand_block_compare (rtx operands[])
|
||||||
if (bytes <= 0)
|
if (bytes <= 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
/* The code generated for p7 and older is not faster than glibc
|
||||||
|
memcmp if alignment is small and length is not short, so bail
|
||||||
|
out to avoid those conditions. */
|
||||||
|
if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
||||||
|
&& ((base_align == 1 && bytes > 16)
|
||||||
|
|| (base_align == 2 && bytes > 32)))
|
||||||
|
return false;
|
||||||
|
|
||||||
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
||||||
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
||||||
|
|
||||||
|
@ -18820,13 +18828,18 @@ expand_block_compare (rtx operands[])
|
||||||
while (bytes > 0)
|
while (bytes > 0)
|
||||||
{
|
{
|
||||||
int align = compute_current_alignment (base_align, offset);
|
int align = compute_current_alignment (base_align, offset);
|
||||||
load_mode = select_block_compare_mode(offset, bytes, align, word_mode_ok);
|
if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||||
|
load_mode = select_block_compare_mode (offset, bytes, align,
|
||||||
|
word_mode_ok);
|
||||||
|
else
|
||||||
|
load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
|
||||||
load_mode_size = GET_MODE_SIZE (load_mode);
|
load_mode_size = GET_MODE_SIZE (load_mode);
|
||||||
if (bytes >= load_mode_size)
|
if (bytes >= load_mode_size)
|
||||||
cmp_bytes = load_mode_size;
|
cmp_bytes = load_mode_size;
|
||||||
else
|
else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
||||||
{
|
{
|
||||||
/* Move this load back so it doesn't go past the end. */
|
/* Move this load back so it doesn't go past the end.
|
||||||
|
P8/P9 can do this efficiently. */
|
||||||
int extra_bytes = load_mode_size - bytes;
|
int extra_bytes = load_mode_size - bytes;
|
||||||
cmp_bytes = bytes;
|
cmp_bytes = bytes;
|
||||||
if (extra_bytes < offset)
|
if (extra_bytes < offset)
|
||||||
|
@ -18836,6 +18849,11 @@ expand_block_compare (rtx operands[])
|
||||||
bytes = cmp_bytes;
|
bytes = cmp_bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
/* P7 and earlier can't do the overlapping load trick fast,
|
||||||
|
so this forces a non-overlapping load and a shift to get
|
||||||
|
rid of the extra bytes. */
|
||||||
|
cmp_bytes = bytes;
|
||||||
|
|
||||||
src1 = adjust_address (orig_src1, load_mode, offset);
|
src1 = adjust_address (orig_src1, load_mode, offset);
|
||||||
src2 = adjust_address (orig_src2, load_mode, offset);
|
src2 = adjust_address (orig_src2, load_mode, offset);
|
||||||
|
|
|
@ -607,6 +607,9 @@ extern int rs6000_vector_align[];
|
||||||
&& TARGET_POWERPC64)
|
&& TARGET_POWERPC64)
|
||||||
#define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
|
#define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
|
||||||
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
|
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
|
||||||
|
/* This wants to be set for p8 and newer. On p7, overlapping unaligned
|
||||||
|
loads are slow. */
|
||||||
|
#define TARGET_EFFICIENT_OVERLAPPING_UNALIGNED TARGET_EFFICIENT_UNALIGNED_VSX
|
||||||
|
|
||||||
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
|
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
|
||||||
in power7, so conditionalize them on p8 features. TImode syncs need quad
|
in power7, so conditionalize them on p8 features. TImode syncs need quad
|
||||||
|
|
Loading…
Reference in New Issue