x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES

1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation
with 256-bit AVX instructions.
2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces
operations with 256-bit AVX instructions.

They are enabled only for Intel Alder Lake and Intel processors with
AVX512.

gcc/

	PR target/101935
	* config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New.
	(TARGET_AVX256_STORE_BY_PIECES): Likewise.
	(MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and
	TARGET_AVX256_STORE_BY_PIECES instead of
	TARGET_AVX256_SPLIT_UNALIGNED_LOAD and
	TARGET_AVX256_SPLIT_UNALIGNED_STORE.
	(STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead
	of TARGET_AVX256_SPLIT_UNALIGNED_STORE.
	* config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New.
	(X86_TUNE_AVX256_STORE_BY_PIECES): Likewise.

gcc/testsuite/

	PR target/101935
	* g++.target/i386/pr80566-1.C: Add
	-mtune-ctrl=avx256_store_by_pieces.
	* gcc.target/i386/pr100865-4a.c: Likewise.
	* gcc.target/i386/pr100865-10a.c: Likewise.
	* gcc.target/i386/pr90773-20.c: Likewise.
	* gcc.target/i386/pr90773-21.c: Likewise.
	* gcc.target/i386/pr90773-22.c: Likewise.
	* gcc.target/i386/pr90773-23.c: Likewise.
	* g++.target/i386/pr80566-2.C: Add
	-mtune-ctrl=avx256_move_by_pieces.
	* gcc.target/i386/eh_return-1.c: Likewise.
	* gcc.target/i386/pr90773-26.c: Likewise.
	* gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell
	with -mtune-ctrl=avx256_move_by_pieces.
	* gcc.target/i386/pieces-memcpy-15.c: Likewise.
	* gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell
	with -mtune-ctrl=avx256_store_by_pieces.
	* gcc.target/i386/pieces-memset-5.c: Likewise.
	* gcc.target/i386/pieces-memset-11.c: Likewise.
	* gcc.target/i386/pieces-memset-14.c: Likewise.
	* gcc.target/i386/pieces-memset-20.c: Likewise.
	* gcc.target/i386/pieces-memset-23.c: Likewise.
	* gcc.target/i386/pieces-memset-29.c: Likewise.
	* gcc.target/i386/pieces-memset-30.c: Likewise.
	* gcc.target/i386/pieces-memset-33.c: Likewise.
	* gcc.target/i386/pieces-memset-34.c: Likewise.
	* gcc.target/i386/pieces-memset-44.c: Likewise.
	* gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic
	with -mtune-ctrl=avx256_store_by_pieces.
This commit is contained in:
H.J. Lu 2021-08-26 05:31:50 -07:00 committed by liuhongt
parent c8e4cb8adf
commit 5b01bfeb87
26 changed files with 42 additions and 27 deletions

View File

@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
#define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
#define TARGET_AVX256_MOVE_BY_PIECES \
ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
#define TARGET_AVX256_STORE_BY_PIECES \
ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
#define TARGET_AVX256_SPLIT_REGS \
ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
#define TARGET_GENERAL_REGS_SSE_SPILL \
@ -1793,8 +1797,8 @@ typedef struct ix86_args {
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
&& !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
&& (TARGET_AVX256_MOVE_BY_PIECES \
|| TARGET_AVX256_STORE_BY_PIECES)) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
@ -1811,7 +1815,7 @@ typedef struct ix86_args {
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
&& !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
&& TARGET_AVX256_STORE_BY_PIECES) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \

View File

@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
instructions in the auto-vectorizer. */
DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit
AVX instructions. */
DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
m_ALDERLAKE | m_CORE_AVX512)
/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit
AVX instructions. */
DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
m_ALDERLAKE | m_CORE_AVX512)
/*****************************************************************************/
/*****************************************************************************/
/* Historical relics: tuning flags that helps a specific old CPU designs */
/*****************************************************************************/

View File

@ -1,5 +1,5 @@
// { dg-do compile }
// { dg-options "-O2 -march=haswell" }
// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" }
#include <cstring>

View File

@ -1,5 +1,5 @@
// { dg-do compile }
// { dg-options "-O2 -march=haswell" }
// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" }
#include <cstring>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
struct _Unwind_Context
{

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
extern char *dst, *src;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
extern char *dst, *src;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
void
foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target int128 } } */
/* { dg-options "-O3 -march=skylake" } */
/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern __int128 array[16];

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char array[64];

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake" } */
/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */
struct S
{