extend.texi: Document AVX built-in functions.
2008-08-28 Joey Ye <joey.ye@intel.com> * doc/extend.texi: Document AVX built-in functions. * doc/invoke.texi: Document -mavx. From-SVN: r139727
This commit is contained in:
parent
95879c728b
commit
31cb596a6b
@ -1,3 +1,8 @@
|
||||
2008-08-28 Joey Ye <joey.ye@intel.com>
|
||||
|
||||
* doc/extend.texi: Document AVX built-in functions.
|
||||
* doc/invoke.texi: Document -mavx.
|
||||
|
||||
2008-08-28 H.J. Lu <hongjiu.lu@intel.com>
|
||||
Joey Ye <joey.ye@intel.com>
|
||||
Xuepeng Guo <xuepeng.guo@intel.com>
|
||||
|
@ -8271,6 +8271,141 @@ depending on the size of @code{unsigned long}.
|
||||
Generates the @code{popcntq} machine instruction.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-mavx} is
|
||||
used. All of them generate the machine instruction that is part of the
|
||||
name.
|
||||
|
||||
@smallexample
|
||||
v4df __builtin_ia32_addpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_addps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_addsubpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_addsubps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_andnpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_andnps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_andpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_andps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_blendpd256 (v4df,v4df,int)
|
||||
v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int)
|
||||
v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df)
|
||||
v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf)
|
||||
v2df __builtin_ia32_cmppd (v2df,v2df,int)
|
||||
v4df __builtin_ia32_cmppd256 (v4df,v4df,int)
|
||||
v4sf __builtin_ia32_cmpps (v4sf,v4sf,int)
|
||||
v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int)
|
||||
v2df __builtin_ia32_cmpsd (v2df,v2df,int)
|
||||
v4sf __builtin_ia32_cmpss (v4sf,v4sf,int)
|
||||
v4df __builtin_ia32_cvtdq2pd256 (v4si)
|
||||
v8sf __builtin_ia32_cvtdq2ps256 (v8si)
|
||||
v4si __builtin_ia32_cvtpd2dq256 (v4df)
|
||||
v4sf __builtin_ia32_cvtpd2ps256 (v4df)
|
||||
v8si __builtin_ia32_cvtps2dq256 (v8sf)
|
||||
v4df __builtin_ia32_cvtps2pd256 (v4sf)
|
||||
v4si __builtin_ia32_cvttpd2dq256 (v4df)
|
||||
v8si __builtin_ia32_cvttps2dq256 (v8sf)
|
||||
v4df __builtin_ia32_divpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_divps256 (v8sf,v8sf)
|
||||
v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int)
|
||||
v4df __builtin_ia32_haddpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_haddps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_hsubpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_hsubps256 (v8sf,v8sf)
|
||||
v32qi __builtin_ia32_lddqu256 (pcchar)
|
||||
v32qi __builtin_ia32_loaddqu256 (pcchar)
|
||||
v4df __builtin_ia32_loadupd256 (pcdouble)
|
||||
v8sf __builtin_ia32_loadups256 (pcfloat)
|
||||
v2df __builtin_ia32_maskloadpd (pcv2df,v2df)
|
||||
v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df)
|
||||
v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf)
|
||||
v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf)
|
||||
void __builtin_ia32_maskstorepd (pv2df,v2df,v2df)
|
||||
void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df)
|
||||
void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf)
|
||||
void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf)
|
||||
v4df __builtin_ia32_maxpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_maxps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_minpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_minps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_movddup256 (v4df)
|
||||
int __builtin_ia32_movmskpd256 (v4df)
|
||||
int __builtin_ia32_movmskps256 (v8sf)
|
||||
v8sf __builtin_ia32_movshdup256 (v8sf)
|
||||
v8sf __builtin_ia32_movsldup256 (v8sf)
|
||||
v4df __builtin_ia32_mulpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_mulps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_orpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_orps256 (v8sf,v8sf)
|
||||
v2df __builtin_ia32_pd_pd256 (v4df)
|
||||
v4df __builtin_ia32_pd256_pd (v2df)
|
||||
v4sf __builtin_ia32_ps_ps256 (v8sf)
|
||||
v8sf __builtin_ia32_ps256_ps (v4sf)
|
||||
int __builtin_ia32_ptestc256 (v4di,v4di,ptest)
|
||||
int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest)
|
||||
int __builtin_ia32_ptestz256 (v4di,v4di,ptest)
|
||||
v8sf __builtin_ia32_rcpps256 (v8sf)
|
||||
v4df __builtin_ia32_roundpd256 (v4df,int)
|
||||
v8sf __builtin_ia32_roundps256 (v8sf,int)
|
||||
v8sf __builtin_ia32_rsqrtps_nr256 (v8sf)
|
||||
v8sf __builtin_ia32_rsqrtps256 (v8sf)
|
||||
v4df __builtin_ia32_shufpd256 (v4df,v4df,int)
|
||||
v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int)
|
||||
v4si __builtin_ia32_si_si256 (v8si)
|
||||
v8si __builtin_ia32_si256_si (v4si)
|
||||
v4df __builtin_ia32_sqrtpd256 (v4df)
|
||||
v8sf __builtin_ia32_sqrtps_nr256 (v8sf)
|
||||
v8sf __builtin_ia32_sqrtps256 (v8sf)
|
||||
void __builtin_ia32_storedqu256 (pchar,v32qi)
|
||||
void __builtin_ia32_storeupd256 (pdouble,v4df)
|
||||
void __builtin_ia32_storeups256 (pfloat,v8sf)
|
||||
v4df __builtin_ia32_subpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_subps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_unpckhpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_unpcklpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf)
|
||||
v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df)
|
||||
v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf)
|
||||
v4df __builtin_ia32_vbroadcastsd256 (pcdouble)
|
||||
v4sf __builtin_ia32_vbroadcastss (pcfloat)
|
||||
v8sf __builtin_ia32_vbroadcastss256 (pcfloat)
|
||||
v2df __builtin_ia32_vextractf128_pd256 (v4df,int)
|
||||
v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int)
|
||||
v4si __builtin_ia32_vextractf128_si256 (v8si,int)
|
||||
v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int)
|
||||
v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int)
|
||||
v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int)
|
||||
v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int)
|
||||
v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int)
|
||||
v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int)
|
||||
v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int)
|
||||
v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int)
|
||||
v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int)
|
||||
v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int)
|
||||
v2df __builtin_ia32_vpermilpd (v2df,int)
|
||||
v4df __builtin_ia32_vpermilpd256 (v4df,int)
|
||||
v4sf __builtin_ia32_vpermilps (v4sf,int)
|
||||
v8sf __builtin_ia32_vpermilps256 (v8sf,int)
|
||||
v2df __builtin_ia32_vpermilvarpd (v2df,v2di)
|
||||
v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di)
|
||||
v4sf __builtin_ia32_vpermilvarps (v4sf,v4si)
|
||||
v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si)
|
||||
int __builtin_ia32_vtestcpd (v2df,v2df,ptest)
|
||||
int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest)
|
||||
int __builtin_ia32_vtestcps (v4sf,v4sf,ptest)
|
||||
int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest)
|
||||
int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest)
|
||||
int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest)
|
||||
int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest)
|
||||
int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest)
|
||||
int __builtin_ia32_vtestzpd (v2df,v2df,ptest)
|
||||
int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest)
|
||||
int __builtin_ia32_vtestzps (v4sf,v4sf,ptest)
|
||||
int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest)
|
||||
void __builtin_ia32_vzeroall (void)
|
||||
void __builtin_ia32_vzeroupper (void)
|
||||
v4df __builtin_ia32_xorpd256 (v4df,v4df)
|
||||
v8sf __builtin_ia32_xorps256 (v8sf,v8sf)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-maes} is
|
||||
used. All of them generate the machine instruction that is part of the
|
||||
name.
|
||||
|
@ -563,7 +563,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mpreferred-stack-boundary=@var{num}
|
||||
-mincoming-stack-boundary=@var{num}
|
||||
-mcld -mcx16 -msahf -mrecip @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
|
||||
-maes -mpclmul @gol
|
||||
-msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
@ -10865,6 +10865,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mno-sse4.2
|
||||
@itemx -msse4
|
||||
@itemx -mno-sse4
|
||||
@itemx -mavx
|
||||
@itemx -mno-avx
|
||||
@itemx -maes
|
||||
@itemx -mno-aes
|
||||
@itemx -mpclmul
|
||||
@ -10886,7 +10888,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@opindex m3dnow
|
||||
@opindex mno-3dnow
|
||||
These switches enable or disable the use of instructions in the MMX,
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or
|
||||
3DNow!@: extended instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{X86 Built-in Functions}, for details of the functions enabled and
|
||||
@ -10895,6 +10897,10 @@ disabled by these switches.
|
||||
To have SSE/SSE2 instructions generated automatically from floating-point
|
||||
code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
|
||||
|
||||
GCC depresses SSEx instructions when @option{-mavx} is used. Instead, it
|
||||
generates new AVX instructions or AVX equivalence for all SSEx instructions
|
||||
when needed.
|
||||
|
||||
These options will enable GCC to use these extended instructions in
|
||||
generated code, even without @option{-mfpmath=sse}. Applications which
|
||||
perform runtime CPU detection must compile separate files for each
|
||||
|
Loading…
Reference in New Issue
Block a user