* config/i386/i386-builtin-types.def
	(VOID_PFLOAT_HI_V8DI_V16SF_INT): New.
	(VOID_PDOUBLE_QI_V16SI_V8DF_INT): Ditto.
	(VOID_PINT_HI_V8DI_V16SI_INT): Ditto.
	(VOID_PLONGLONG_QI_V16SI_V8DI_INT): Ditto.
	* config/i386/i386.c
	(ix86_builtins): Add IX86_BUILTIN_SCATTERALTSIV8DF,
	IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
	IX86_BUILTIN_SCATTERALTDIV16SI.
	(ix86_init_mmx_sse_builtins): Define __builtin_ia32_scatteraltsiv8df,
	__builtin_ia32_scatteraltdiv8sf, __builtin_ia32_scatteraltsiv8di,
	__builtin_ia32_scatteraltdiv8si.
	(ix86_expand_builtin): Handle IX86_BUILTIN_SCATTERALTSIV8DF,
	IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
	IX86_BUILTIN_SCATTERALTDIV16SI.
	(ix86_vectorize_builtin_scatter): New.
	(TARGET_VECTORIZE_BUILTIN_SCATTER): Define as
	ix86_vectorize_builtin_scatter.

Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
Co-Authored-By: Petr Murzin <petr.murzin@intel.com>

From-SVN: r227482
This commit is contained in:
Andrey Turetskiy 2015-09-04 08:54:14 +00:00 committed by Kirill Yukhin
parent 3bab634221
commit 624dcfd62b
3 changed files with 147 additions and 0 deletions

View File

@ -1,3 +1,26 @@
2015-09-04 Andrey Turetskiy <andrey.turetskiy@intel.com>
Petr Murzin <petr.murzin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
* config/i386/i386-builtin-types.def
(VOID_PFLOAT_HI_V8DI_V16SF_INT): New.
(VOID_PDOUBLE_QI_V16SI_V8DF_INT): Ditto.
(VOID_PINT_HI_V8DI_V16SI_INT): Ditto.
(VOID_PLONGLONG_QI_V16SI_V8DI_INT): Ditto.
* config/i386/i386.c
(ix86_builtins): Add IX86_BUILTIN_SCATTERALTSIV8DF,
IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
IX86_BUILTIN_SCATTERALTDIV16SI.
(ix86_init_mmx_sse_builtins): Define __builtin_ia32_scatteraltsiv8df,
__builtin_ia32_scatteraltdiv8sf, __builtin_ia32_scatteraltsiv8di,
__builtin_ia32_scatteraltdiv8si.
(ix86_expand_builtin): Handle IX86_BUILTIN_SCATTERALTSIV8DF,
IX86_BUILTIN_SCATTERALTDIV16SF, IX86_BUILTIN_SCATTERALTSIV8DI,
IX86_BUILTIN_SCATTERALTDIV16SI.
(ix86_vectorize_builtin_scatter): New.
(TARGET_VECTORIZE_BUILTIN_SCATTER): Define as
ix86_vectorize_builtin_scatter.
2015-09-04 Andrey Turetskiy <andrey.turetskiy@intel.com> 2015-09-04 Andrey Turetskiy <andrey.turetskiy@intel.com>
Petr Murzin <petr.murzin@intel.com> Petr Murzin <petr.murzin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com> Kirill Yukhin <kirill.yukhin@intel.com>

View File

@ -1021,6 +1021,10 @@ DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT) DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT) DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT) DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V8DI, V16SF, INT)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V16SI, V8DF, INT)
DEF_FUNCTION_TYPE (VOID, PINT, HI, V8DI, V16SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V16SI, V8DI, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT) DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT) DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT)

View File

@ -30388,6 +30388,10 @@ enum ix86_builtins
IX86_BUILTIN_GATHER3SIV16SI, IX86_BUILTIN_GATHER3SIV16SI,
IX86_BUILTIN_GATHER3SIV8DF, IX86_BUILTIN_GATHER3SIV8DF,
IX86_BUILTIN_GATHER3SIV8DI, IX86_BUILTIN_GATHER3SIV8DI,
IX86_BUILTIN_SCATTERALTSIV8DF,
IX86_BUILTIN_SCATTERALTDIV16SF,
IX86_BUILTIN_SCATTERALTSIV8DI,
IX86_BUILTIN_SCATTERALTDIV16SI,
IX86_BUILTIN_SCATTERDIV16SF, IX86_BUILTIN_SCATTERDIV16SF,
IX86_BUILTIN_SCATTERDIV16SI, IX86_BUILTIN_SCATTERDIV16SI,
IX86_BUILTIN_SCATTERDIV8DF, IX86_BUILTIN_SCATTERDIV8DF,
@ -34204,6 +34208,21 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di", def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT, VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
IX86_BUILTIN_SCATTERDIV2DI); IX86_BUILTIN_SCATTERDIV2DI);
def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
IX86_BUILTIN_SCATTERALTSIV8DF);
def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
IX86_BUILTIN_SCATTERALTDIV16SF);
def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
IX86_BUILTIN_SCATTERALTSIV8DI);
def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
IX86_BUILTIN_SCATTERALTDIV16SI);
/* AVX512PF */ /* AVX512PF */
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd", def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
@ -39860,6 +39879,18 @@ rdseed_step:
case IX86_BUILTIN_GATHERPFDPD: case IX86_BUILTIN_GATHERPFDPD:
icode = CODE_FOR_avx512pf_gatherpfv8sidf; icode = CODE_FOR_avx512pf_gatherpfv8sidf;
goto vec_prefetch_gen; goto vec_prefetch_gen;
case IX86_BUILTIN_SCATTERALTSIV8DF:
icode = CODE_FOR_avx512f_scattersiv8df;
goto scatter_gen;
case IX86_BUILTIN_SCATTERALTDIV16SF:
icode = CODE_FOR_avx512f_scatterdiv16sf;
goto scatter_gen;
case IX86_BUILTIN_SCATTERALTSIV8DI:
icode = CODE_FOR_avx512f_scattersiv8di;
goto scatter_gen;
case IX86_BUILTIN_SCATTERALTDIV16SI:
icode = CODE_FOR_avx512f_scatterdiv16si;
goto scatter_gen;
case IX86_BUILTIN_GATHERPFDPS: case IX86_BUILTIN_GATHERPFDPS:
icode = CODE_FOR_avx512pf_gatherpfv16sisf; icode = CODE_FOR_avx512pf_gatherpfv16sisf;
goto vec_prefetch_gen; goto vec_prefetch_gen;
@ -40123,6 +40154,36 @@ rdseed_step:
mode3 = insn_data[icode].operand[3].mode; mode3 = insn_data[icode].operand[3].mode;
mode4 = insn_data[icode].operand[4].mode; mode4 = insn_data[icode].operand[4].mode;
/* Scatter instruction stores operand op3 to memory with
indices from op2 and scale from op4 under writemask op1.
If index operand op2 has more elements then source operand
op3 one need to use only its low half. And vice versa. */
switch (fcode)
{
case IX86_BUILTIN_SCATTERALTSIV8DF:
case IX86_BUILTIN_SCATTERALTSIV8DI:
half = gen_reg_rtx (V8SImode);
if (!nonimmediate_operand (op2, V16SImode))
op2 = copy_to_mode_reg (V16SImode, op2);
emit_insn (gen_vec_extract_lo_v16si (half, op2));
op2 = half;
break;
case IX86_BUILTIN_SCATTERALTDIV16SF:
case IX86_BUILTIN_SCATTERALTDIV16SI:
half = gen_reg_rtx (mode3);
if (mode3 == V8SFmode)
gen = gen_vec_extract_lo_v16sf;
else
gen = gen_vec_extract_lo_v16si;
if (!nonimmediate_operand (op3, GET_MODE (op3)))
op3 = copy_to_mode_reg (GET_MODE (op3), op3);
emit_insn (gen (half, op3));
op3 = half;
break;
default:
break;
}
/* Force memory operand only with base register here. But we /* Force memory operand only with base register here. But we
don't want to do it on memory operand for other builtin don't want to do it on memory operand for other builtin
functions. */ functions. */
@ -41202,6 +41263,62 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
return ix86_get_builtin (code); return ix86_get_builtin (code);
} }
/* Returns a decl of a function that implements scatter store with
register type VECTYPE and index type INDEX_TYPE and SCALE.
Return NULL_TREE if it is not available. */
static tree
ix86_vectorize_builtin_scatter (const_tree vectype,
const_tree index_type, int scale)
{
bool si;
enum ix86_builtins code;
if (!TARGET_AVX512F)
return NULL_TREE;
if ((TREE_CODE (index_type) != INTEGER_TYPE
&& !POINTER_TYPE_P (index_type))
|| (TYPE_MODE (index_type) != SImode
&& TYPE_MODE (index_type) != DImode))
return NULL_TREE;
if (TYPE_PRECISION (index_type) > POINTER_SIZE)
return NULL_TREE;
/* v*scatter* insn sign extends index to pointer mode. */
if (TYPE_PRECISION (index_type) < POINTER_SIZE
&& TYPE_UNSIGNED (index_type))
return NULL_TREE;
/* Scale can be 1, 2, 4 or 8. */
if (scale <= 0
|| scale > 8
|| (scale & (scale - 1)) != 0)
return NULL_TREE;
si = TYPE_MODE (index_type) == SImode;
switch (TYPE_MODE (vectype))
{
case V8DFmode:
code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
break;
case V8DImode:
code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
break;
case V16SFmode:
code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
break;
case V16SImode:
code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
break;
default:
return NULL_TREE;
}
return ix86_builtins[code];
}
/* Returns a code for a target-specific builtin that implements /* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */ reciprocal of the function, or NULL_TREE if not available. */
@ -52332,6 +52449,9 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
#undef TARGET_VECTORIZE_BUILTIN_GATHER #undef TARGET_VECTORIZE_BUILTIN_GATHER
#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
#undef TARGET_VECTORIZE_BUILTIN_SCATTER
#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
#undef TARGET_BUILTIN_RECIPROCAL #undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal