Correct mask operand for AVX mask load/store.
gcc/ 2011-01-17 H.J. Lu <hongjiu.lu@intel.com> Backport from mainline 2011-01-17 H.J. Lu <hongjiu.lu@intel.com> PR target/47318 * config/i386/avxintrin.h (_mm_maskload_pd): Change mask to __m128i. (_mm_maskstore_pd): Likewise. (_mm_maskload_ps): Likewise. (_mm_maskstore_ps): Likewise. (_mm256_maskload_pd): Change mask to __m256i. (_mm256_maskstore_pd): Likewise. (_mm256_maskload_ps): Likewise. (_mm256_maskstore_ps): Likewise. * config/i386/i386-builtin-types.def: Updated. (ix86_expand_special_args_builtin): Likewise. * config/i386/i386.c (ix86_special_builtin_type): Remove V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF, V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF, VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF, VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF. Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI, V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI, VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF, VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF. (bdesc_special_args): Update __builtin_ia32_maskloadpd, __builtin_ia32_maskloadps, __builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256, __builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps, __builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256. (ix86_init_mmx_sse_builtins): Updated. * config/i386/sse.md (avx_maskload<ssemodesuffix><avxmodesuffix>): Use <avxpermvecmode> on mask register. (avx_maskstore<ssemodesuffix><avxmodesuffix>): Likewise. gcc/testsuite/ 2011-01-17 H.J. Lu <hongjiu.lu@intel.com> Backport from mainline 2011-01-17 H.J. Lu <hongjiu.lu@intel.com> PR target/47318 * gcc.target/i386/avx-vmaskmovpd-1.c: New. * gcc.target/i386/avx-vmaskmovpd-2.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask as __m256i. * gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise. From-SVN: r168904
This commit is contained in:
parent
b089a91fe6
commit
3ef2922603
|
@ -1,3 +1,42 @@
|
|||
2011-01-17 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
Backport from mainline
|
||||
2011-01-17 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/47318
|
||||
* config/i386/avxintrin.h (_mm_maskload_pd): Change mask to
|
||||
__m128i.
|
||||
(_mm_maskstore_pd): Likewise.
|
||||
(_mm_maskload_ps): Likewise.
|
||||
(_mm_maskstore_ps): Likewise.
|
||||
(_mm256_maskload_pd): Change mask to __m256i.
|
||||
(_mm256_maskstore_pd): Likewise.
|
||||
(_mm256_maskload_ps): Likewise.
|
||||
(_mm256_maskstore_ps): Likewise.
|
||||
|
||||
* config/i386/i386-builtin-types.def: Updated.
|
||||
(ix86_expand_special_args_builtin): Likewise.
|
||||
|
||||
* config/i386/i386.c (ix86_special_builtin_type): Remove
|
||||
V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF,
|
||||
V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF,
|
||||
VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF,
|
||||
VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF.
|
||||
Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI,
|
||||
V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI,
|
||||
VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF,
|
||||
VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF.
|
||||
(bdesc_special_args): Update
|
||||
__builtin_ia32_maskloadpd, __builtin_ia32_maskloadps,
|
||||
__builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256,
|
||||
__builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps,
|
||||
__builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256.
|
||||
(ix86_init_mmx_sse_builtins): Updated.
|
||||
|
||||
* config/i386/sse.md (avx_maskload<ssemodesuffix><avxmodesuffix>):
|
||||
Use <avxpermvecmode> on mask register.
|
||||
(avx_maskstore<ssemodesuffix><avxmodesuffix>): Likewise.
|
||||
|
||||
2011-01-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
Backport from mainline
|
||||
|
|
|
@ -890,55 +890,55 @@ _mm256_storeu_si256 (__m256i *__P, __m256i __A)
|
|||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskload_pd (double const *__P, __m128d __M)
|
||||
_mm_maskload_pd (double const *__P, __m128i __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
|
||||
(__v2df)__M);
|
||||
(__v2di)__M);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A)
|
||||
_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
|
||||
{
|
||||
__builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A);
|
||||
__builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskload_pd (double const *__P, __m256d __M)
|
||||
_mm256_maskload_pd (double const *__P, __m256i __M)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
|
||||
(__v4df)__M);
|
||||
(__v4di)__M);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A)
|
||||
_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
|
||||
{
|
||||
__builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A);
|
||||
__builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskload_ps (float const *__P, __m128 __M)
|
||||
_mm_maskload_ps (float const *__P, __m128i __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
|
||||
(__v4sf)__M);
|
||||
(__v4si)__M);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A)
|
||||
_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
|
||||
{
|
||||
__builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A);
|
||||
__builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskload_ps (float const *__P, __m256 __M)
|
||||
_mm256_maskload_ps (float const *__P, __m256i __M)
|
||||
{
|
||||
return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
|
||||
(__v8sf)__M);
|
||||
(__v8si)__M);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A)
|
||||
_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
|
||||
{
|
||||
__builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A);
|
||||
__builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
|
|
|
@ -20806,12 +20806,12 @@ enum ix86_special_builtin_type
|
|||
V4DF_FTYPE_PCDOUBLE,
|
||||
V4SF_FTYPE_PCFLOAT,
|
||||
V2DF_FTYPE_PCDOUBLE,
|
||||
V8SF_FTYPE_PCV8SF_V8SF,
|
||||
V4DF_FTYPE_PCV4DF_V4DF,
|
||||
V8SF_FTYPE_PCV8SF_V8SI,
|
||||
V4DF_FTYPE_PCV4DF_V4DI,
|
||||
V4SF_FTYPE_V4SF_PCV2SF,
|
||||
V4SF_FTYPE_PCV4SF_V4SF,
|
||||
V4SF_FTYPE_PCV4SF_V4SI,
|
||||
V2DF_FTYPE_V2DF_PCDOUBLE,
|
||||
V2DF_FTYPE_PCV2DF_V2DF,
|
||||
V2DF_FTYPE_PCV2DF_V2DI,
|
||||
V2DI_FTYPE_PV2DI,
|
||||
VOID_FTYPE_PV2SF_V4SF,
|
||||
VOID_FTYPE_PV4DI_V4DI,
|
||||
|
@ -20824,10 +20824,10 @@ enum ix86_special_builtin_type
|
|||
VOID_FTYPE_PDOUBLE_V2DF,
|
||||
VOID_FTYPE_PDI_DI,
|
||||
VOID_FTYPE_PINT_INT,
|
||||
VOID_FTYPE_PV8SF_V8SF_V8SF,
|
||||
VOID_FTYPE_PV4DF_V4DF_V4DF,
|
||||
VOID_FTYPE_PV4SF_V4SF_V4SF,
|
||||
VOID_FTYPE_PV2DF_V2DF_V2DF
|
||||
VOID_FTYPE_PV8SF_V8SI_V8SF,
|
||||
VOID_FTYPE_PV4DF_V4DI_V4DF,
|
||||
VOID_FTYPE_PV4SF_V4SI_V4SF,
|
||||
VOID_FTYPE_PV2DF_V2DI_V2DF
|
||||
};
|
||||
|
||||
/* Builtin types */
|
||||
|
@ -21058,14 +21058,14 @@ static const struct builtin_description bdesc_special_args[] =
|
|||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
|
||||
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
|
||||
};
|
||||
|
||||
/* Builtins with variable number of arguments. */
|
||||
|
@ -22620,40 +22620,40 @@ ix86_init_mmx_sse_builtins (void)
|
|||
= build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
|
||||
tree pcv4df_type_node
|
||||
= build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
|
||||
tree v8sf_ftype_pcv8sf_v8sf
|
||||
tree v8sf_ftype_pcv8sf_v8si
|
||||
= build_function_type_list (V8SF_type_node,
|
||||
pcv8sf_type_node, V8SF_type_node,
|
||||
pcv8sf_type_node, V8SI_type_node,
|
||||
NULL_TREE);
|
||||
tree v4df_ftype_pcv4df_v4df
|
||||
tree v4df_ftype_pcv4df_v4di
|
||||
= build_function_type_list (V4DF_type_node,
|
||||
pcv4df_type_node, V4DF_type_node,
|
||||
pcv4df_type_node, V4DI_type_node,
|
||||
NULL_TREE);
|
||||
tree v4sf_ftype_pcv4sf_v4sf
|
||||
tree v4sf_ftype_pcv4sf_v4si
|
||||
= build_function_type_list (V4SF_type_node,
|
||||
pcv4sf_type_node, V4SF_type_node,
|
||||
pcv4sf_type_node, V4SI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2df_ftype_pcv2df_v2df
|
||||
tree v2df_ftype_pcv2df_v2di
|
||||
= build_function_type_list (V2DF_type_node,
|
||||
pcv2df_type_node, V2DF_type_node,
|
||||
pcv2df_type_node, V2DI_type_node,
|
||||
NULL_TREE);
|
||||
tree void_ftype_pv8sf_v8sf_v8sf
|
||||
tree void_ftype_pv8sf_v8si_v8sf
|
||||
= build_function_type_list (void_type_node,
|
||||
pv8sf_type_node, V8SF_type_node,
|
||||
pv8sf_type_node, V8SI_type_node,
|
||||
V8SF_type_node,
|
||||
NULL_TREE);
|
||||
tree void_ftype_pv4df_v4df_v4df
|
||||
tree void_ftype_pv4df_v4di_v4df
|
||||
= build_function_type_list (void_type_node,
|
||||
pv4df_type_node, V4DF_type_node,
|
||||
pv4df_type_node, V4DI_type_node,
|
||||
V4DF_type_node,
|
||||
NULL_TREE);
|
||||
tree void_ftype_pv4sf_v4sf_v4sf
|
||||
tree void_ftype_pv4sf_v4si_v4sf
|
||||
= build_function_type_list (void_type_node,
|
||||
pv4sf_type_node, V4SF_type_node,
|
||||
pv4sf_type_node, V4SI_type_node,
|
||||
V4SF_type_node,
|
||||
NULL_TREE);
|
||||
tree void_ftype_pv2df_v2df_v2df
|
||||
tree void_ftype_pv2df_v2di_v2df
|
||||
= build_function_type_list (void_type_node,
|
||||
pv2df_type_node, V2DF_type_node,
|
||||
pv2df_type_node, V2DI_type_node,
|
||||
V2DF_type_node,
|
||||
NULL_TREE);
|
||||
tree v4df_ftype_v2df
|
||||
|
@ -22759,23 +22759,23 @@ ix86_init_mmx_sse_builtins (void)
|
|||
case V2DF_FTYPE_PCDOUBLE:
|
||||
type = v2df_ftype_pcdouble;
|
||||
break;
|
||||
case V8SF_FTYPE_PCV8SF_V8SF:
|
||||
type = v8sf_ftype_pcv8sf_v8sf;
|
||||
case V8SF_FTYPE_PCV8SF_V8SI:
|
||||
type = v8sf_ftype_pcv8sf_v8si;
|
||||
break;
|
||||
case V4DF_FTYPE_PCV4DF_V4DF:
|
||||
type = v4df_ftype_pcv4df_v4df;
|
||||
case V4DF_FTYPE_PCV4DF_V4DI:
|
||||
type = v4df_ftype_pcv4df_v4di;
|
||||
break;
|
||||
case V4SF_FTYPE_V4SF_PCV2SF:
|
||||
type = v4sf_ftype_v4sf_pcv2sf;
|
||||
break;
|
||||
case V4SF_FTYPE_PCV4SF_V4SF:
|
||||
type = v4sf_ftype_pcv4sf_v4sf;
|
||||
case V4SF_FTYPE_PCV4SF_V4SI:
|
||||
type = v4sf_ftype_pcv4sf_v4si;
|
||||
break;
|
||||
case V2DF_FTYPE_V2DF_PCDOUBLE:
|
||||
type = v2df_ftype_v2df_pcdouble;
|
||||
break;
|
||||
case V2DF_FTYPE_PCV2DF_V2DF:
|
||||
type = v2df_ftype_pcv2df_v2df;
|
||||
case V2DF_FTYPE_PCV2DF_V2DI:
|
||||
type = v2df_ftype_pcv2df_v2di;
|
||||
break;
|
||||
case VOID_FTYPE_PV2SF_V4SF:
|
||||
type = void_ftype_pv2sf_v4sf;
|
||||
|
@ -22810,17 +22810,17 @@ ix86_init_mmx_sse_builtins (void)
|
|||
case VOID_FTYPE_PINT_INT:
|
||||
type = void_ftype_pint_int;
|
||||
break;
|
||||
case VOID_FTYPE_PV8SF_V8SF_V8SF:
|
||||
type = void_ftype_pv8sf_v8sf_v8sf;
|
||||
case VOID_FTYPE_PV8SF_V8SI_V8SF:
|
||||
type = void_ftype_pv8sf_v8si_v8sf;
|
||||
break;
|
||||
case VOID_FTYPE_PV4DF_V4DF_V4DF:
|
||||
type = void_ftype_pv4df_v4df_v4df;
|
||||
case VOID_FTYPE_PV4DF_V4DI_V4DF:
|
||||
type = void_ftype_pv4df_v4di_v4df;
|
||||
break;
|
||||
case VOID_FTYPE_PV4SF_V4SF_V4SF:
|
||||
type = void_ftype_pv4sf_v4sf_v4sf;
|
||||
case VOID_FTYPE_PV4SF_V4SI_V4SF:
|
||||
type = void_ftype_pv4sf_v4si_v4sf;
|
||||
break;
|
||||
case VOID_FTYPE_PV2DF_V2DF_V2DF:
|
||||
type = void_ftype_pv2df_v2df_v2df;
|
||||
case VOID_FTYPE_PV2DF_V2DI_V2DF:
|
||||
type = void_ftype_pv2df_v2di_v2df;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -24650,18 +24650,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
|||
klass = load;
|
||||
memory = 1;
|
||||
break;
|
||||
case V8SF_FTYPE_PCV8SF_V8SF:
|
||||
case V4DF_FTYPE_PCV4DF_V4DF:
|
||||
case V4SF_FTYPE_PCV4SF_V4SF:
|
||||
case V2DF_FTYPE_PCV2DF_V2DF:
|
||||
case V8SF_FTYPE_PCV8SF_V8SI:
|
||||
case V4DF_FTYPE_PCV4DF_V4DI:
|
||||
case V4SF_FTYPE_PCV4SF_V4SI:
|
||||
case V2DF_FTYPE_PCV2DF_V2DI:
|
||||
nargs = 2;
|
||||
klass = load;
|
||||
memory = 0;
|
||||
break;
|
||||
case VOID_FTYPE_PV8SF_V8SF_V8SF:
|
||||
case VOID_FTYPE_PV4DF_V4DF_V4DF:
|
||||
case VOID_FTYPE_PV4SF_V4SF_V4SF:
|
||||
case VOID_FTYPE_PV2DF_V2DF_V2DF:
|
||||
case VOID_FTYPE_PV8SF_V8SI_V8SF:
|
||||
case VOID_FTYPE_PV4DF_V4DI_V4DF:
|
||||
case VOID_FTYPE_PV4SF_V4SI_V4SF:
|
||||
case VOID_FTYPE_PV2DF_V2DI_V2DF:
|
||||
nargs = 2;
|
||||
klass = store;
|
||||
/* Reserve memory operand for target. */
|
||||
|
|
|
@ -11657,7 +11657,7 @@
|
|||
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "memory_operand" "m")
|
||||
(match_operand:AVXMODEF2P 2 "register_operand" "x")
|
||||
(match_operand:<avxpermvecmode> 2 "register_operand" "x")
|
||||
(match_dup 0)]
|
||||
UNSPEC_MASKLOAD))]
|
||||
"TARGET_AVX"
|
||||
|
@ -11669,7 +11669,7 @@
|
|||
(define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
|
||||
[(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
|
||||
[(match_operand:<avxpermvecmode> 1 "register_operand" "x")
|
||||
(match_operand:AVXMODEF2P 2 "register_operand" "x")
|
||||
(match_dup 0)]
|
||||
UNSPEC_MASKSTORE))]
|
||||
|
|
|
@ -1,3 +1,20 @@
|
|||
2011-01-17 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
Backport from mainline
|
||||
2011-01-17 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/47318
|
||||
* gcc.target/i386/avx-vmaskmovpd-1.c: New.
|
||||
* gcc.target/i386/avx-vmaskmovpd-2.c: Likewise.
|
||||
* gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
|
||||
* gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
|
||||
|
||||
* gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask
|
||||
as __m256i.
|
||||
* gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise.
|
||||
* gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise.
|
||||
* gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise.
|
||||
|
||||
2011-01-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
Backport from mainline
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 7
|
||||
#endif
|
||||
|
||||
#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63)
|
||||
|
||||
void static
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
long long m[2] = {mask_v(0), mask_v(1)};
|
||||
double s[2] = {1.1, 2.2};
|
||||
union128d u;
|
||||
union128i_q mask;
|
||||
double e[2] = {0.0};
|
||||
|
||||
mask.x = _mm_loadu_si128 ((__m128i *)m);
|
||||
u.x = _mm_maskload_pd (s, mask.x);
|
||||
|
||||
for (i = 0 ; i < 2; i++)
|
||||
e[i] = m[i] ? s[i] : 0;
|
||||
|
||||
if (check_union128d (u, e))
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 6
|
||||
#endif
|
||||
|
||||
#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63)
|
||||
|
||||
void static
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
long long m[2] = {mask_v(0), mask_v(1)};
|
||||
double s[2] = {1.1, 2.2};
|
||||
double e[2] = {0.0};
|
||||
double d[2] = {0.0};
|
||||
union128d src;
|
||||
union128i_q mask;
|
||||
|
||||
src.x = _mm_loadu_pd (s);
|
||||
mask.x = _mm_loadu_si128 ((__m128i *)m);
|
||||
_mm_maskstore_pd (d, mask.x, src.x);
|
||||
|
||||
for (i = 0 ; i < 2; i++)
|
||||
e[i] = m[i] ? s[i] : 0;
|
||||
|
||||
if (checkVd (d, e, 2))
|
||||
abort ();
|
||||
}
|
|
@ -14,12 +14,13 @@ void static
|
|||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
long long m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
|
||||
long long m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
|
||||
double s[4] = {1.1, 2.2, 3.3, 4.4};
|
||||
union256d u, mask;
|
||||
union256d u;
|
||||
union256i_q mask;
|
||||
double e [4] = {0.0};
|
||||
|
||||
mask.x = _mm256_loadu_pd ((double*)m);
|
||||
mask.x = _mm256_loadu_si256 ((__m256i *)m);
|
||||
u.x = _mm256_maskload_pd (s, mask.x);
|
||||
|
||||
for (i = 0 ; i < 4; i++)
|
||||
|
|
|
@ -18,10 +18,11 @@ avx_test (void)
|
|||
double s[4] = {1.1, 2.2, 3.3, 4.4};
|
||||
double e [4] = {0.0};
|
||||
double d [4] = {0.0};
|
||||
union256d src, mask;
|
||||
union256d src;
|
||||
union256i_q mask;
|
||||
|
||||
src.x = _mm256_loadu_pd (s);
|
||||
mask.x = _mm256_loadu_pd ((double*)m);
|
||||
mask.x = _mm256_loadu_si256 ((__m256i *)m);
|
||||
_mm256_maskstore_pd (d, mask.x, src.x);
|
||||
|
||||
for (i = 0 ; i < 4; i++)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 134
|
||||
#endif
|
||||
|
||||
#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31)
|
||||
|
||||
void static
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
|
||||
float s[4] = {1,2,3,4};
|
||||
union128 u;
|
||||
union128i_d mask;
|
||||
float e[4] = {0.0};
|
||||
|
||||
mask.x = _mm_loadu_si128 ((__m128i *)m);
|
||||
u.x = _mm_maskload_ps (s, mask.x);
|
||||
|
||||
for (i = 0 ; i < 4; i++)
|
||||
e[i] = m[i] ? s[i] : 0;
|
||||
|
||||
if (check_union128 (u, e))
|
||||
abort ();
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef MASK
|
||||
#define MASK 214
|
||||
#endif
|
||||
|
||||
#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31)
|
||||
|
||||
void static
|
||||
avx_test (void)
|
||||
{
|
||||
int i;
|
||||
int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
|
||||
float s[4] = {1,2,3,4};
|
||||
union128 src;
|
||||
union128i_d mask;
|
||||
float e[4] = {0.0};
|
||||
float d[4] = {0.0};
|
||||
|
||||
src.x = _mm_loadu_ps (s);
|
||||
mask.x = _mm_loadu_si128 ((__m128i *)m);
|
||||
_mm_maskstore_ps (d, mask.x, src.x);
|
||||
|
||||
for (i = 0 ; i < 4; i++)
|
||||
e[i] = m[i] ? s[i] : 0;
|
||||
|
||||
if (checkVf (d, e, 4))
|
||||
abort ();
|
||||
}
|
|
@ -16,10 +16,11 @@ avx_test (void)
|
|||
int i;
|
||||
int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)};
|
||||
float s[8] = {1,2,3,4,5,6,7,8};
|
||||
union256 u, mask;
|
||||
union256 u;
|
||||
union256i_d mask;
|
||||
float e [8] = {0.0};
|
||||
|
||||
mask.x = _mm256_loadu_ps ((float*)m);
|
||||
mask.x = _mm256_loadu_si256 ((__m256i *)m);
|
||||
u.x = _mm256_maskload_ps (s, mask.x);
|
||||
|
||||
for (i = 0 ; i < 8; i++)
|
||||
|
|
|
@ -16,12 +16,13 @@ avx_test (void)
|
|||
int i;
|
||||
int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)};
|
||||
float s[8] = {1,2,3,4,5,6,7,8};
|
||||
union256 src, mask;
|
||||
union256 src;
|
||||
union256i_d mask;
|
||||
float e [8] = {0.0};
|
||||
float d [8] = {0.0};
|
||||
|
||||
src.x = _mm256_loadu_ps (s);
|
||||
mask.x = _mm256_loadu_ps ((float *)m);
|
||||
mask.x = _mm256_loadu_si256 ((__m256i *)m);
|
||||
_mm256_maskstore_ps (d, mask.x, src.x);
|
||||
|
||||
for (i = 0 ; i < 8; i++)
|
||||
|
|
Loading…
Reference in New Issue