util/bufferiszero: Use __attribute__((target)) for avx2/avx512
Use the attribute, which is supported by clang, instead of the #pragma, which is not supported and, for some reason, also not detected by the meson probe, so we fail by -Werror. Include only <immintrin.h> as that is the outermost "official" header for these intrinsics -- emmintrin.h and smmintrin -- are older SSE2 and SSE4 specific headers, while the immintrin.h includes all of the Intel intrinsics. Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
5584e2dbe8
commit
701ea5870d
@ -2338,11 +2338,9 @@ config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
|
||||
config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
|
||||
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
|
||||
.require(cc.links('''
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx2")
|
||||
#include <cpuid.h>
|
||||
#include <immintrin.h>
|
||||
static int bar(void *a) {
|
||||
static int __attribute__((target("avx2"))) bar(void *a) {
|
||||
__m256i x = *(__m256i *)a;
|
||||
return _mm256_testz_si256(x, x);
|
||||
}
|
||||
@ -2352,11 +2350,9 @@ config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
|
||||
config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
|
||||
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
|
||||
.require(cc.links('''
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512f")
|
||||
#include <cpuid.h>
|
||||
#include <immintrin.h>
|
||||
static int bar(void *a) {
|
||||
static int __attribute__((target("avx512f"))) bar(void *a) {
|
||||
__m512i x = *(__m512i *)a;
|
||||
return _mm512_test_epi64_mask(x, x);
|
||||
}
|
||||
|
@ -64,18 +64,11 @@ buffer_zero_int(const void *buf, size_t len)
|
||||
}
|
||||
|
||||
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
|
||||
/* Do not use push_options pragmas unnecessarily, because clang
|
||||
* does not support them.
|
||||
*/
|
||||
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse2")
|
||||
#endif
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
/* Note that each of these vectorized functions require len >= 64. */
|
||||
|
||||
static bool
|
||||
static bool __attribute__((target("sse2")))
|
||||
buffer_zero_sse2(const void *buf, size_t len)
|
||||
{
|
||||
__m128i t = _mm_loadu_si128(buf);
|
||||
@ -104,20 +97,9 @@ buffer_zero_sse2(const void *buf, size_t len)
|
||||
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
|
||||
}
|
||||
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AVX2_OPT
|
||||
/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
|
||||
* the includes have to be within the corresponding push_options region, and
|
||||
* therefore the regions themselves have to be ordered with increasing ISA.
|
||||
*/
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4")
|
||||
#include <smmintrin.h>
|
||||
|
||||
static bool
|
||||
static bool __attribute__((target("sse4")))
|
||||
buffer_zero_sse4(const void *buf, size_t len)
|
||||
{
|
||||
__m128i t = _mm_loadu_si128(buf);
|
||||
@ -145,12 +127,7 @@ buffer_zero_sse4(const void *buf, size_t len)
|
||||
return _mm_testz_si128(t, t);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx2")
|
||||
#include <immintrin.h>
|
||||
|
||||
static bool
|
||||
static bool __attribute__((target("avx2")))
|
||||
buffer_zero_avx2(const void *buf, size_t len)
|
||||
{
|
||||
/* Begin with an unaligned head of 32 bytes. */
|
||||
@ -176,15 +153,10 @@ buffer_zero_avx2(const void *buf, size_t len)
|
||||
|
||||
return _mm256_testz_si256(t, t);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif /* CONFIG_AVX2_OPT */
|
||||
|
||||
#ifdef CONFIG_AVX512F_OPT
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512f")
|
||||
#include <immintrin.h>
|
||||
|
||||
static bool
|
||||
static bool __attribute__((target("avx512f")))
|
||||
buffer_zero_avx512(const void *buf, size_t len)
|
||||
{
|
||||
/* Begin with an unaligned head of 64 bytes. */
|
||||
@ -210,8 +182,7 @@ buffer_zero_avx512(const void *buf, size_t len)
|
||||
return !_mm512_test_epi64_mask(t, t);
|
||||
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif /* CONFIG_AVX512F_OPT */
|
||||
|
||||
|
||||
/* Note that for test_buffer_is_zero_next_accel, the most preferred
|
||||
|
Loading…
Reference in New Issue
Block a user