rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED): Add macro to say we can efficiently handle overlapping unaligned loads.

2016-10-09  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>

	* config/rs6000/rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED): 
	Add macro to say we can efficiently handle overlapping unaligned
	loads.
	* config/rs6000/rs6000.c (expand_block_compare): Avoid generating
	poor code for processors older than p8.

From-SVN: r240908
This commit is contained in:
Aaron Sawdey 2016-10-10 04:42:08 +00:00 committed by Aaron Sawdey
parent 4815e7d405
commit 87b44b83c0
3 changed files with 33 additions and 4 deletions

View File

@ -1,3 +1,11 @@
2016-10-09 Aaron Sawdey <acsawdey@linux.vnet.ibm.com>
* config/rs6000/rs6000.h (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED):
Add macro to say we can efficiently handle overlapping unaligned
loads.
* config/rs6000/rs6000.c (expand_block_compare): Avoid generating
poor code for processors older than p8.
2016-10-09 Eric Botcazou <ebotcazou@adacore.com> 2016-10-09 Eric Botcazou <ebotcazou@adacore.com>
* gen-pass-instances.awk: Remove GNUism. * gen-pass-instances.awk: Remove GNUism.

View File

@ -18771,6 +18771,14 @@ expand_block_compare (rtx operands[])
if (bytes <= 0) if (bytes <= 0)
return true; return true;
/* The code generated for p7 and older is not faster than glibc
memcmp if alignment is small and length is not short, so bail
out to avoid those conditions. */
if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& ((base_align == 1 && bytes > 16)
|| (base_align == 2 && bytes > 32)))
return false;
rtx tmp_reg_src1 = gen_reg_rtx (word_mode); rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode); rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
@ -18820,13 +18828,18 @@ expand_block_compare (rtx operands[])
while (bytes > 0) while (bytes > 0)
{ {
int align = compute_current_alignment (base_align, offset); int align = compute_current_alignment (base_align, offset);
load_mode = select_block_compare_mode(offset, bytes, align, word_mode_ok); if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
load_mode = select_block_compare_mode (offset, bytes, align,
word_mode_ok);
else
load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
load_mode_size = GET_MODE_SIZE (load_mode); load_mode_size = GET_MODE_SIZE (load_mode);
if (bytes >= load_mode_size) if (bytes >= load_mode_size)
cmp_bytes = load_mode_size; cmp_bytes = load_mode_size;
else else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
{ {
/* Move this load back so it doesn't go past the end. */ /* Move this load back so it doesn't go past the end.
P8/P9 can do this efficiently. */
int extra_bytes = load_mode_size - bytes; int extra_bytes = load_mode_size - bytes;
cmp_bytes = bytes; cmp_bytes = bytes;
if (extra_bytes < offset) if (extra_bytes < offset)
@ -18836,7 +18849,12 @@ expand_block_compare (rtx operands[])
bytes = cmp_bytes; bytes = cmp_bytes;
} }
} }
else
/* P7 and earlier can't do the overlapping load trick fast,
so this forces a non-overlapping load and a shift to get
rid of the extra bytes. */
cmp_bytes = bytes;
src1 = adjust_address (orig_src1, load_mode, offset); src1 = adjust_address (orig_src1, load_mode, offset);
src2 = adjust_address (orig_src2, load_mode, offset); src2 = adjust_address (orig_src2, load_mode, offset);

View File

@ -607,6 +607,9 @@ extern int rs6000_vector_align[];
&& TARGET_POWERPC64) && TARGET_POWERPC64)
#define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ #define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64) && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
/* This wants to be set for p8 and newer. On p7, overlapping unaligned
loads are slow. */
#define TARGET_EFFICIENT_OVERLAPPING_UNALIGNED TARGET_EFFICIENT_UNALIGNED_VSX
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad in power7, so conditionalize them on p8 features. TImode syncs need quad