re PR target/34000 (GCC pedwarns about use of static inline functions from system headers in extern inline functions)

PR target/34000
	PR target/35553
	* config/i386/xmmintrin.h: Change all static inline functions to
	extern inline and add __gnu_inline__ attribute.
	* config/i386/bmintrin.h: Ditto.
	* config/i386/smmintrin.h: Ditto.
	* config/i386/tmmintrin.h: Ditto.
	* config/i386/mmintrin-common.h: Ditto.
	* config/i386/ammintrin.h: Ditto.
	* config/i386/emmintrin.h: Ditto.
	* config/i386/pmmintrin.h: Ditto.
	* config/i386/mmintrin.h: Ditto.
	* config/i386/mm3dnow.h: Ditto.

testsuite/ChangeLog:

	PR target/34000
	PR target/35553
	* g++.dg/other/i386-3.C: New test.
	* gcc.target/i386/sse-13.c: Redefine extern instead of static.
	* gcc.target/i386/sse-14.c: Ditto.
	* gcc.target/i386/mmx-1.c: Ditto.
	* gcc.target/i386/mmx-2.c: Ditto.
	* gcc.target/i386/3dnow-1.c: Ditto.
	* gcc.target/i386/3dnow-2.c: Ditto.
	* gcc.target/i386/3dnowA-1.c: Ditto.
	* gcc.target/i386/3dnowA-2.c: Ditto.

From-SVN: r133169
This commit is contained in:
Uros Bizjak 2008-03-13 14:33:47 +01:00 committed by Uros Bizjak
parent 5d2edb29da
commit 1359ef3975
21 changed files with 909 additions and 873 deletions

View File

@ -1,3 +1,19 @@
2008-03-13 Uros Bizjak <ubizjak@gmail.com>
PR target/34000
PR target/35553
* config/i386/xmmintrin.h: Change all static inline functions to
extern inline and add __gnu_inline__ attribute.
* config/i386/bmintrin.h: Ditto.
* config/i386/smmintrin.h: Ditto.
* config/i386/tmmintrin.h: Ditto.
* config/i386/mmintrin-common.h: Ditto.
* config/i386/ammintrin.h: Ditto.
* config/i386/emmintrin.h: Ditto.
* config/i386/pmmintrin.h: Ditto.
* config/i386/mmintrin.h: Ditto.
* config/i386/mm3dnow.h: Ditto.
2008-03-13 Jakub Jelinek <jakub@redhat.com>
PR middle-end/35185

View File

@ -37,26 +37,26 @@
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_sd (double * __P, __m128d __Y)
{
__builtin_ia32_movntsd (__P, (__v2df) __Y);
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_ss (float * __P, __m128 __Y)
{
__builtin_ia32_movntss (__P, (__v4sf) __Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_si64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
}
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
@ -67,14 +67,14 @@ _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
(unsigned int)(I), (unsigned int)(L)))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_si64 (__m128i __X,__m128i __Y)
{
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
}
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -37,145 +37,145 @@
/* Internal data types for implementing the intrinsics. */
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms (void)
{
__builtin_ia32_femms();
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgusb (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2id (__m64 __A)
{
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfadd (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpeq (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpge (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpgt (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmax (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmin (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmul (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcp (__m64 __A)
{
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit2 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqrt (__m64 __A)
{
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsub (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsubr (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fd (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhrw (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetch (void *__P)
{
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetchw (void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_float (float __A)
{
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
}
static __inline float __attribute__((__always_inline__, __artificial__))
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_float (__m64 __A)
{
union { __v2sf v; float a[2]; } __tmp;
@ -185,31 +185,31 @@ _m_to_float (__m64 __A)
#ifdef __3dNOW_A__
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2iw (__m64 __A)
{
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfpnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fw (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pswapd (__m64 __A)
{
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);

View File

@ -60,7 +60,7 @@
/* Test Instruction */
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) == 0. */
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
@ -68,7 +68,7 @@ _mm_testz_si128 (__m128i __M, __m128i __V)
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & ~__M) == 0. */
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
@ -76,7 +76,7 @@ _mm_testc_si128 (__m128i __M, __m128i __V)
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) != 0 && (__V & ~__M) != 0. */
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
@ -93,13 +93,13 @@ _mm_testnzc_si128 (__m128i __M, __m128i __V)
/* Packed/scalar double precision floating point rounding. */
#ifdef __OPTIMIZE__
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_pd (__m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_sd(__m128d __D, __m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
@ -118,13 +118,13 @@ _mm_round_sd(__m128d __D, __m128d __V, const int __M)
/* Packed/scalar single precision floating point rounding. */
#ifdef __OPTIMIZE__
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ps (__m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
}
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ss (__m128 __D, __m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundss ((__v4sf)__D,

View File

@ -45,26 +45,26 @@ typedef char __v8qi __attribute__ ((__vector_size__ (8)));
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
/* Empty the multimedia state. */
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty (void)
{
__builtin_ia32_emms ();
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_empty (void)
{
_mm_empty ();
}
/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si64 (int __i)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int (int __i)
{
return _mm_cvtsi32_si64 (__i);
@ -74,26 +74,26 @@ _m_from_int (int __i)
/* Convert I to a __m64 object. */
/* Intel intrinsic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int64 (long long __i)
{
return (__m64) __i;
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_m64 (long long __i)
{
return (__m64) __i;
}
/* Microsoft intrinsic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si64 (long long __i)
{
return (__m64) __i;
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi64x (long long __i)
{
return (__m64) __i;
@ -101,13 +101,13 @@ _mm_set_pi64x (long long __i)
#endif
/* Convert the lower 32 bits of the __m64 object into an integer. */
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si32 (__m64 __i)
{
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int (__m64 __i)
{
return _mm_cvtsi64_si32 (__i);
@ -117,20 +117,20 @@ _m_to_int (__m64 __i)
/* Convert the __m64 object to a 64bit integer. */
/* Intel intrinsic. */
static __inline long long __attribute__((__always_inline__, __artificial__))
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int64 (__m64 __i)
{
return (long long)__i;
}
static __inline long long __attribute__((__always_inline__, __artificial__))
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtm64_si64 (__m64 __i)
{
return (long long)__i;
}
/* Microsoft intrinsic. */
static __inline long long __attribute__((__always_inline__, __artificial__))
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si64x (__m64 __i)
{
return (long long)__i;
@ -140,13 +140,13 @@ _mm_cvtsi64_si64x (__m64 __i)
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with signed saturation. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packsswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi16 (__m1, __m2);
@ -155,13 +155,13 @@ _m_packsswb (__m64 __m1, __m64 __m2)
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
the result, and the two 32-bit values from M2 into the upper two 16-bit
values of the result, all with signed saturation. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packssdw (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi32 (__m1, __m2);
@ -170,13 +170,13 @@ _m_packssdw (__m64 __m1, __m64 __m2)
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with unsigned saturation. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packuswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pu16 (__m1, __m2);
@ -184,13 +184,13 @@ _m_packuswb (__m64 __m1, __m64 __m2)
/* Interleave the four 8-bit values from the high half of M1 with the four
8-bit values from the high half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhbw (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi8 (__m1, __m2);
@ -198,13 +198,13 @@ _m_punpckhbw (__m64 __m1, __m64 __m2)
/* Interleave the two 16-bit values from the high half of M1 with the two
16-bit values from the high half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhwd (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi16 (__m1, __m2);
@ -212,13 +212,13 @@ _m_punpckhwd (__m64 __m1, __m64 __m2)
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
value from the high half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhdq (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi32 (__m1, __m2);
@ -226,13 +226,13 @@ _m_punpckhdq (__m64 __m1, __m64 __m2)
/* Interleave the four 8-bit values from the low half of M1 with the four
8-bit values from the low half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklbw (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi8 (__m1, __m2);
@ -240,13 +240,13 @@ _m_punpcklbw (__m64 __m1, __m64 __m2)
/* Interleave the two 16-bit values from the low half of M1 with the two
16-bit values from the low half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklwd (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi16 (__m1, __m2);
@ -254,52 +254,52 @@ _m_punpcklwd (__m64 __m1, __m64 __m2)
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
value from the low half of M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckldq (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi32 (__m1, __m2);
}
/* Add the 8-bit values in M1 to the 8-bit values in M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddb (__m64 __m1, __m64 __m2)
{
return _mm_add_pi8 (__m1, __m2);
}
/* Add the 16-bit values in M1 to the 16-bit values in M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddw (__m64 __m1, __m64 __m2)
{
return _mm_add_pi16 (__m1, __m2);
}
/* Add the 32-bit values in M1 to the 32-bit values in M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddd (__m64 __m1, __m64 __m2)
{
return _mm_add_pi32 (__m1, __m2);
@ -307,7 +307,7 @@ _m_paddd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
@ -316,13 +316,13 @@ _mm_add_si64 (__m64 __m1, __m64 __m2)
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi8 (__m1, __m2);
@ -330,13 +330,13 @@ _m_paddsb (__m64 __m1, __m64 __m2)
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
saturated arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi16 (__m1, __m2);
@ -344,13 +344,13 @@ _m_paddsw (__m64 __m1, __m64 __m2)
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
saturated arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu8 (__m1, __m2);
@ -358,52 +358,52 @@ _m_paddusb (__m64 __m1, __m64 __m2)
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
saturated arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu16 (__m1, __m2);
}
/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubb (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi8 (__m1, __m2);
}
/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubw (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi16 (__m1, __m2);
}
/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubd (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi32 (__m1, __m2);
@ -411,7 +411,7 @@ _m_psubd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
@ -420,13 +420,13 @@ _mm_sub_si64 (__m64 __m1, __m64 __m2)
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi8 (__m1, __m2);
@ -434,13 +434,13 @@ _m_psubsb (__m64 __m1, __m64 __m2)
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
signed saturating arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi16 (__m1, __m2);
@ -448,13 +448,13 @@ _m_psubsw (__m64 __m1, __m64 __m2)
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
unsigned saturating arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu8 (__m1, __m2);
@ -462,13 +462,13 @@ _m_psubusb (__m64 __m1, __m64 __m2)
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
unsigned saturating arithmetic. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu16 (__m1, __m2);
@ -477,13 +477,13 @@ _m_psubusw (__m64 __m1, __m64 __m2)
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
four 32-bit intermediate results, which are then summed by pairs to
produce two 32-bit results. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaddwd (__m64 __m1, __m64 __m2)
{
return _mm_madd_pi16 (__m1, __m2);
@ -491,13 +491,13 @@ _m_pmaddwd (__m64 __m1, __m64 __m2)
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
M2 and produce the high 16 bits of the 32-bit results. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhw (__m64 __m1, __m64 __m2)
{
return _mm_mulhi_pi16 (__m1, __m2);
@ -505,226 +505,226 @@ _m_pmulhw (__m64 __m1, __m64 __m2)
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
the low 16 bits of the results. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmullw (__m64 __m1, __m64 __m2)
{
return _mm_mullo_pi16 (__m1, __m2);
}
/* Shift four 16-bit values in M left by COUNT. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllw (__m64 __m, __m64 __count)
{
return _mm_sll_pi16 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllwi (__m64 __m, int __count)
{
return _mm_slli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M left by COUNT. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslld (__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslldi (__m64 __m, int __count)
{
return _mm_slli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllq (__m64 __m, __m64 __count)
{
return _mm_sll_si64 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllqi (__m64 __m, int __count)
{
return _mm_slli_si64 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psraw (__m64 __m, __m64 __count)
{
return _mm_sra_pi16 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrawi (__m64 __m, int __count)
{
return _mm_srai_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrad (__m64 __m, __m64 __count)
{
return _mm_sra_pi32 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psradi (__m64 __m, int __count)
{
return _mm_srai_pi32 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlw (__m64 __m, __m64 __count)
{
return _mm_srl_pi16 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlwi (__m64 __m, int __count)
{
return _mm_srli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrld (__m64 __m, __m64 __count)
{
return _mm_srl_pi32 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrldi (__m64 __m, int __count)
{
return _mm_srli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlq (__m64 __m, __m64 __count)
{
return _mm_srl_si64 (__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlqi (__m64 __m, int __count)
{
return _mm_srli_si64 (__m, __count);
}
/* Bit-wise AND the 64-bit values in M1 and M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pand (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pand (__m64 __m1, __m64 __m2)
{
return _mm_and_si64 (__m1, __m2);
@ -732,39 +732,39 @@ _m_pand (__m64 __m1, __m64 __m2)
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
64-bit value in M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pandn (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pandn (__m64 __m1, __m64 __m2)
{
return _mm_andnot_si64 (__m1, __m2);
}
/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_por (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_por (__m64 __m1, __m64 __m2)
{
return _mm_or_si64 (__m1, __m2);
}
/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pxor (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pxor (__m64 __m1, __m64 __m2)
{
return _mm_xor_si64 (__m1, __m2);
@ -772,25 +772,25 @@ _m_pxor (__m64 __m1, __m64 __m2)
/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
test is true and zero if false. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqb (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi8 (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtb (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi8 (__m1, __m2);
@ -798,25 +798,25 @@ _m_pcmpgtb (__m64 __m1, __m64 __m2)
/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
the test is true and zero if false. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqw (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi16 (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtw (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi16 (__m1, __m2);
@ -824,53 +824,53 @@ _m_pcmpgtw (__m64 __m1, __m64 __m2)
/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
the test is true and zero if false. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqd (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi32 (__m1, __m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtd (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi32 (__m1, __m2);
}
/* Creates a 64-bit zero. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64 (void)
{
return (__m64)0LL;
}
/* Creates a vector of two 32-bit values; I0 is least significant. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32 (int __i1, int __i0)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
}
/* Creates a vector of four 16-bit values; W0 is least significant. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
{
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
}
/* Creates a vector of eight 8-bit values; B0 is least significant. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
char __b3, char __b2, char __b1, char __b0)
{
@ -879,19 +879,19 @@ _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
}
/* Similar, but with the arguments in reverse order. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi32 (int __i0, int __i1)
{
return _mm_set_pi32 (__i1, __i0);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
char __b4, char __b5, char __b6, char __b7)
{
@ -899,21 +899,21 @@ _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
}
/* Creates a vector of two 32-bit values, both elements containing I. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi32 (int __i)
{
return _mm_set_pi32 (__i, __i);
}
/* Creates a vector of four 16-bit values, all elements containing W. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi16 (short __w)
{
return _mm_set_pi16 (__w, __w, __w, __w);
}
/* Creates a vector of eight 8-bit values, all elements containing B. */
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi8 (char __b)
{
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);

View File

@ -47,79 +47,79 @@
#define _MM_GET_DENORMALS_ZERO_MODE() \
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
}
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
}
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd (double const *__P)
{
return _mm_load1_pd (__P);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd (__m128d __X)
{
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
{
__builtin_ia32_monitor (__P, __E, __H);
}
static __inline void __attribute__((__always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwait (unsigned int __E, unsigned int __H)
{
__builtin_ia32_mwait (__E, __H);

View File

@ -45,7 +45,7 @@
constant/variable mask. */
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
@ -58,7 +58,7 @@ _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
(__v8hi)(__m128i)(Y), (int)(M)))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
{
return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
@ -70,7 +70,7 @@ _mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
@ -83,7 +83,7 @@ _mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
(__v4sf)(__m128)(Y), (int)(M)))
#endif
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
{
return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
@ -95,7 +95,7 @@ _mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
@ -108,7 +108,7 @@ _mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
(__v2df)(__m128d)(Y), (int)(M)))
#endif
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
{
return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
@ -120,7 +120,7 @@ _mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
of result. */
#ifdef __OPTIMIZE__
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
@ -128,7 +128,7 @@ _mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
__M);
}
static __inline __m128d __attribute__((__always_inline__, __artificial__))
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
@ -147,7 +147,7 @@ _mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
@ -155,49 +155,49 @@ _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
/* Min/max packed integer instructions. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
@ -205,7 +205,7 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
/* Packed integer 32-bit multiplication with truncation of upper
halves of results. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
@ -213,7 +213,7 @@ _mm_mullo_epi32 (__m128i __X, __m128i __Y)
/* Packed integer 32-bit multiplication of 2 pairs of operands
with two 64-bit results. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
@ -225,7 +225,7 @@ _mm_mul_epi32 (__m128i __X, __m128i __Y)
zeroing mask for D. */
#ifdef __OPTIMIZE__
static __inline __m128 __attribute__((__always_inline__, __artificial__))
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
{
return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
@ -245,7 +245,7 @@ _mm_insert_ps (__m128 __D, __m128 __S, const int __N)
single precision array element of X selected by index N. */
#ifdef __OPTIMIZE__
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_ps (__m128 __X, const int __N)
{
union { int i; float f; } __tmp;
@ -278,14 +278,14 @@ _mm_extract_ps (__m128 __X, const int __N)
selected by index N. */
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi8 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
__S, __N);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi32 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
@ -293,7 +293,7 @@ _mm_insert_epi32 (__m128i __D, int __S, const int __N)
}
#ifdef __x86_64__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
@ -320,20 +320,20 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N)
index N. */
#ifdef __OPTIMIZE__
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi32 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
}
#ifdef __x86_64__
static __inline long long __attribute__((__always_inline__, __artificial__))
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi64 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
@ -353,7 +353,7 @@ _mm_extract_epi64 (__m128i __X, const int __N)
/* Return horizontal packed word minimum and its index in bits [15:0]
and bits [18:16] respectively. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_minpos_epu16 (__m128i __X)
{
return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
@ -361,37 +361,37 @@ _mm_minpos_epu16 (__m128i __X)
/* Packed integer sign-extension. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
@ -399,37 +399,37 @@ _mm_cvtepi8_epi16 (__m128i __X)
/* Packed integer zero-extension. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
@ -437,7 +437,7 @@ _mm_cvtepu8_epi16 (__m128i __X)
/* Pack 8 double words from 2 operands into 8 words of result with
unsigned saturation. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
@ -448,7 +448,7 @@ _mm_packus_epi32 (__m128i __X, __m128i __Y)
operands are determined by the 3rd mask operand. */
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
@ -461,7 +461,7 @@ _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
#endif
/* Load double quadword using non-temporal aligned hint. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_load_si128 (__m128i *__X)
{
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
@ -498,7 +498,7 @@ _mm_stream_load_si128 (__m128i *__X)
/* Intrinsics for text/string processing. */
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
@ -506,7 +506,7 @@ _mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
@ -514,7 +514,7 @@ _mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
@ -522,7 +522,7 @@ _mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
@ -551,7 +551,7 @@ _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
EFlags. */
#ifdef __OPTIMIZE__
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
@ -559,7 +559,7 @@ _mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
@ -567,7 +567,7 @@ _mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
@ -575,7 +575,7 @@ _mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
@ -583,7 +583,7 @@ _mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
@ -591,7 +591,7 @@ _mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
@ -599,7 +599,7 @@ _mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
@ -607,7 +607,7 @@ _mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
@ -615,7 +615,7 @@ _mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
@ -623,7 +623,7 @@ _mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
__M);
}
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
@ -671,21 +671,21 @@ _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
/* Calculate a number of bits set to 1. */
static __inline int __attribute__((__always_inline__, __artificial__))
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u32 (unsigned int __X)
{
return __builtin_popcount (__X);
}
#ifdef __x86_64__
static __inline long long __attribute__((__always_inline__, __artificial__))
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u64 (unsigned long long __X)
{
return __builtin_popcountll (__X);
@ -693,26 +693,26 @@ _mm_popcnt_u64 (unsigned long long __X)
#endif
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
static __inline unsigned int __attribute__((__always_inline__, __artificial__))
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
{
return __builtin_ia32_crc32qi (__C, __V);
}
static __inline unsigned int __attribute__((__always_inline__, __artificial__))
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
{
return __builtin_ia32_crc32hi (__C, __V);
}
static __inline unsigned int __attribute__((__always_inline__, __artificial__))
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
#ifdef __x86_64__
static __inline unsigned long long __attribute__((__always_inline__, __artificial__))
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V);

View File

@ -37,159 +37,159 @@
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
}
#ifdef __OPTIMIZE__
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
{
return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
(__v2di)__Y, __N * 8);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
{
return (__m64) __builtin_ia32_palignr ((long long)__X,
@ -206,37 +206,37 @@ _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
(int)(N) * 8))
#endif
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__, __artificial__))
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
}
static __inline __m64 __attribute__((__always_inline__, __artificial__))
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,17 @@
2008-03-13 Uros Bizjak <ubizjak@gmail.com>
PR target/34000
PR target/35553
* g++.dg/other/i386-3.C: New test.
* gcc.target/i386/sse-13.c: Redefine extern instead of static.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/mmx-1.c: Ditto.
* gcc.target/i386/mmx-2.c: Ditto.
* gcc.target/i386/3dnow-1.c: Ditto.
* gcc.target/i386/3dnow-2.c: Ditto.
* gcc.target/i386/3dnowA-1.c: Ditto.
* gcc.target/i386/3dnowA-2.c: Ditto.
2008-03-13 Paolo Bonzini <bonzini@gnu.org>
PR tree-opt/35422

View File

@ -0,0 +1,8 @@
/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -fkeep-inline-functions. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */
#include <bmmintrin.h>
#include <smmintrin.h>
#include <mm3dnow.h>

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mm3dnow.h>

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mm3dnow.h>

View File

@ -1,13 +1,12 @@
/* { dg-do assemble } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow -march=athlon" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow" } */
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mm3dnow.h>

View File

@ -1,13 +1,12 @@
/* { dg-do assemble } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow -march=athlon" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow" } */
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mm3dnow.h>

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mmintrin.h>

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
builtin functions. Defining away "static" and "__inline" results in
builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <mmintrin.h>

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "static" and
that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
/* Following intrinsics require immediate arguments. */

View File

@ -3,10 +3,10 @@
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "static" and
that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
#define static
#define extern
#define __inline
#include <bmmintrin.h>