libstdc++: Add std::experimental::simd from the Parallelism TS 2
Adds <experimental/simd>. This implements the simd and simd_mask class templates via [[gnu::vector_size(N)]] data members. It implements overloads for all of <cmath> for simd. Explicit vectorization of the <cmath> functions is not finished. The majority of functions are marked as [[gnu::always_inline]] to enable quasi-ODR-conforming linking of TUs with different -m flags. Performance optimization was done for x86_64. ARM, Aarch64, and POWER rely on the compiler to recognize reduction, conversion, and shuffle patterns. Besides verification using many different machine flages, the code was also verified with different fast-math flags. libstdc++-v3/ChangeLog: * doc/xml/manual/status_cxx2017.xml: Add implementation status of the Parallelism TS 2. Document implementation-defined types and behavior. * include/Makefile.am: Add new headers. * include/Makefile.in: Regenerate. * include/experimental/simd: New file. New header for Parallelism TS 2. * include/experimental/bits/numeric_traits.h: New file. Implementation of P1841R1 using internal naming. Addition of missing IEC559 functionality query. * include/experimental/bits/simd.h: New file. Definition of the public simd interfaces and general implementation helpers. * include/experimental/bits/simd_builtin.h: New file. Implementation of the _VecBuiltin simd_abi. * include/experimental/bits/simd_converter.h: New file. Generic simd conversions. * include/experimental/bits/simd_detail.h: New file. Internal macros for the simd implementation. * include/experimental/bits/simd_fixed_size.h: New file. Simd fixed_size ABI specific implementations. * include/experimental/bits/simd_math.h: New file. Math overloads for simd. * include/experimental/bits/simd_neon.h: New file. Simd NEON specific implementations. * include/experimental/bits/simd_ppc.h: New file. Implement bit shifts to avoid invalid results for integral types smaller than int. * include/experimental/bits/simd_scalar.h: New file. Simd scalar ABI specific implementations. * include/experimental/bits/simd_x86.h: New file. Simd x86 specific implementations. * include/experimental/bits/simd_x86_conversions.h: New file. x86 specific conversion optimizations. The conversion patterns work around missing conversion patterns in the compiler and should be removed as soon as PR85048 is resolved. * testsuite/experimental/simd/standard_abi_usable.cc: New file. Test that all (not all fixed_size<N>, though) standard simd and simd_mask types are usable. * testsuite/experimental/simd/standard_abi_usable_2.cc: New file. As above but with -ffast-math. * testsuite/libstdc++-dg/conformance.exp: Don't build simd tests from the standard test loop. Instead use check_vect_support_and_set_flags to build simd tests with the relevant machine flags.
This commit is contained in:
parent
c91db798ec
commit
2bcceb6fc5
@ -2869,6 +2869,17 @@ since C++14 and the implementation is complete.
|
||||
<entry>Library Fundamentals 2 TS</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0214r9.pdf">
|
||||
P0214R9
|
||||
</link>
|
||||
</entry>
|
||||
<entry>Data-Parallel Types</entry>
|
||||
<entry>Y</entry>
|
||||
<entry>Parallelism 2 TS</entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
@ -3014,6 +3025,211 @@ since C++14 and the implementation is complete.
|
||||
If <code>!is_regular_file(p)</code>, an error is reported.
|
||||
</para>
|
||||
|
||||
<section xml:id="iso.2017.par2ts" xreflabel="Implementation Specific Behavior of the Parallelism 2 TS"><info><title>Parallelism 2 TS</title></info>
|
||||
|
||||
<para>
|
||||
<emphasis>9.3 [parallel.simd.abi]</emphasis>
|
||||
<code>max_fixed_size<T></code> is 32, except when targetting
|
||||
AVX512BW and <code>sizeof(T)</code> is 1.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
When targeting 32-bit x86,
|
||||
<classname>simd_abi::compatible<T></classname> is an alias for
|
||||
<classname>simd_abi::scalar</classname>.
|
||||
When targeting 64-bit x86 (including x32) or Aarch64,
|
||||
<classname>simd_abi::compatible<T></classname> is an alias for
|
||||
<classname>simd_abi::_VecBuiltin<16></classname>,
|
||||
unless <code>T</code> is <code>long double</code>, in which case it is
|
||||
an alias for <classname>simd_abi::scalar</classname>.
|
||||
When targeting ARM (but not Aarch64) with NEON support,
|
||||
<classname>simd_abi::compatible<T></classname> is an alias for
|
||||
<classname>simd_abi::_VecBuiltin<16></classname>,
|
||||
unless <code>sizeof(T) > 4</code>, in which case it is
|
||||
an alias for <classname>simd_abi::scalar</classname>. Additionally,
|
||||
<classname>simd_abi::compatible<float></classname> is an alias for
|
||||
<classname>simd_abi::scalar</classname> unless compiling with
|
||||
-ffast-math.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
When targeting x86 (both 32-bit and 64-bit),
|
||||
<classname>simd_abi::native<T></classname> is an alias for one of
|
||||
<classname>simd_abi::scalar</classname>,
|
||||
<classname>simd_abi::_VecBuiltin<16></classname>,
|
||||
<classname>simd_abi::_VecBuiltin<32></classname>, or
|
||||
<classname>simd_abi::_VecBltnBtmsk<64></classname>, depending on
|
||||
<code>T</code> and the machine options the compiler was invoked with.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
When targeting ARM/Aarch64 or POWER,
|
||||
<classname>simd_abi::native<T></classname> is an alias for
|
||||
<classname>simd_abi::scalar</classname> or
|
||||
<classname>simd_abi::_VecBuiltin<16></classname>, depending on
|
||||
<code>T</code> and the machine options the compiler was invoked with.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For any other targeted machine
|
||||
<classname>simd_abi::compatible<T></classname> and
|
||||
<classname>simd_abi::native<T></classname> are aliases for
|
||||
<classname>simd_abi::scalar</classname>. (subject to change)
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The extended ABI tag types defined in the
|
||||
<code>std::experimental::parallelism_v2::simd_abi</code> namespace are:
|
||||
<classname>simd_abi::_VecBuiltin<Bytes></classname>, and
|
||||
<classname>simd_abi::_VecBltnBtmsk<Bytes></classname>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<classname>simd_abi::deduce<T, N, Abis...>::type</classname>,
|
||||
with <code>N > 1</code> is an alias for an extended ABI tag, if a
|
||||
supported extended ABI tag exists. Otherwise it is an alias for
|
||||
<classname>simd_abi::fixed_size<N></classname>. The <classname>
|
||||
simd_abi::_VecBltnBtmsk</classname> ABI tag is preferred over
|
||||
<classname>simd_abi::_VecBuiltin</classname>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>9.4 [parallel.simd.traits]</emphasis>
|
||||
<classname>memory_alignment<T, U>::value</classname> is
|
||||
<code>sizeof(U) * T::size()</code> rounded up to the next power-of-two
|
||||
value.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>9.6.1 [parallel.simd.overview]</emphasis>
|
||||
On ARM, <classname>simd<T, _VecBuiltin<Bytes>></classname>
|
||||
is supported if <code>__ARM_NEON</code> is defined and
|
||||
<code>sizeof(T) <= 4</code>. Additionally,
|
||||
<code>sizeof(T) == 8</code> with integral <code>T</code> is supported if
|
||||
<code>__ARM_ARCH >= 8</code>, and <code>double</code> is supported if
|
||||
<code>__aarch64__</code> is defined.
|
||||
|
||||
On POWER, <classname>simd<T, _VecBuiltin<Bytes>></classname>
|
||||
is supported if <code>__ALTIVEC__</code> is defined and <code>sizeof(T)
|
||||
< 8</code>. Additionally, <code>double</code> is supported if
|
||||
<code>__VSX__</code> is defined, and any <code>T</code> with <code>
|
||||
sizeof(T) ≤ 8</code> is supported if <code>__POWER8_VECTOR__</code>
|
||||
is defined.
|
||||
|
||||
On x86, given an extended ABI tag <code>Abi</code>,
|
||||
<classname>simd<T, Abi></classname> is supported according to the
|
||||
following table:
|
||||
<table frame="all" xml:id="table.par2ts_simd_support">
|
||||
<title>Support for Extended ABI Tags</title>
|
||||
|
||||
<tgroup cols="4" align="left" colsep="0" rowsep="1">
|
||||
<colspec colname="c1"/>
|
||||
<colspec colname="c2"/>
|
||||
<colspec colname="c3"/>
|
||||
<colspec colname="c4"/>
|
||||
<thead>
|
||||
<row>
|
||||
<entry>ABI tag <code>Abi</code></entry>
|
||||
<entry>value type <code>T</code></entry>
|
||||
<entry>values for <code>Bytes</code></entry>
|
||||
<entry>required machine option</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry morerows="5">
|
||||
<classname>_VecBuiltin<Bytes></classname>
|
||||
</entry>
|
||||
<entry morerows="1"><code>float</code></entry>
|
||||
<entry>8, 12, 16</entry>
|
||||
<entry>"-msse"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>20, 24, 28, 32</entry>
|
||||
<entry>"-mavx"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry morerows="1"><code>double</code></entry>
|
||||
<entry>16</entry>
|
||||
<entry>"-msse2"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>24, 32</entry>
|
||||
<entry>"-mavx"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry morerows="1">
|
||||
integral types other than <code>bool</code>
|
||||
</entry>
|
||||
<entry>
|
||||
<code>Bytes</code> ≤ 16 and <code>Bytes</code> divisible by
|
||||
<code>sizeof(T)</code>
|
||||
</entry>
|
||||
<entry>"-msse2"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
16 < <code>Bytes</code> ≤ 32 and <code>Bytes</code>
|
||||
divisible by <code>sizeof(T)</code>
|
||||
</entry>
|
||||
<entry>"-mavx2"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry morerows="1">
|
||||
<classname>_VecBuiltin<Bytes></classname> and
|
||||
<classname>_VecBltnBtmsk<Bytes></classname>
|
||||
</entry>
|
||||
<entry>
|
||||
vectorizable types with <code>sizeof(T)</code> ≥ 4
|
||||
</entry>
|
||||
<entry morerows="1">
|
||||
32 < <code>Bytes</code> ≤ 64 and <code>Bytes</code>
|
||||
divisible by <code>sizeof(T)</code>
|
||||
</entry>
|
||||
<entry>"-mavx512f"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
vectorizable types with <code>sizeof(T)</code> < 4
|
||||
</entry>
|
||||
<entry>"-mavx512bw"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry morerows="1">
|
||||
<classname>_VecBltnBtmsk<Bytes></classname>
|
||||
</entry>
|
||||
<entry>
|
||||
vectorizable types with <code>sizeof(T)</code> ≥ 4
|
||||
</entry>
|
||||
<entry morerows="1">
|
||||
<code>Bytes</code> ≤ 32 and <code>Bytes</code> divisible by
|
||||
<code>sizeof(T)</code>
|
||||
</entry>
|
||||
<entry>"-mavx512vl"</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
vectorizable types with <code>sizeof(T)</code> < 4
|
||||
</entry>
|
||||
<entry>"-mavx512bw" and "-mavx512vl"</entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
</para>
|
||||
|
||||
</section>
|
||||
|
||||
</section>
|
||||
|
||||
|
@ -747,6 +747,7 @@ experimental_headers = \
|
||||
${experimental_srcdir}/ratio \
|
||||
${experimental_srcdir}/regex \
|
||||
${experimental_srcdir}/set \
|
||||
${experimental_srcdir}/simd \
|
||||
${experimental_srcdir}/socket \
|
||||
${experimental_srcdir}/source_location \
|
||||
${experimental_srcdir}/string \
|
||||
@ -766,7 +767,19 @@ experimental_bits_builddir = ./experimental/bits
|
||||
experimental_bits_headers = \
|
||||
${experimental_bits_srcdir}/lfts_config.h \
|
||||
${experimental_bits_srcdir}/net.h \
|
||||
${experimental_bits_srcdir}/numeric_traits.h \
|
||||
${experimental_bits_srcdir}/shared_ptr.h \
|
||||
${experimental_bits_srcdir}/simd.h \
|
||||
${experimental_bits_srcdir}/simd_builtin.h \
|
||||
${experimental_bits_srcdir}/simd_converter.h \
|
||||
${experimental_bits_srcdir}/simd_detail.h \
|
||||
${experimental_bits_srcdir}/simd_fixed_size.h \
|
||||
${experimental_bits_srcdir}/simd_math.h \
|
||||
${experimental_bits_srcdir}/simd_neon.h \
|
||||
${experimental_bits_srcdir}/simd_ppc.h \
|
||||
${experimental_bits_srcdir}/simd_scalar.h \
|
||||
${experimental_bits_srcdir}/simd_x86.h \
|
||||
${experimental_bits_srcdir}/simd_x86_conversions.h \
|
||||
${experimental_bits_srcdir}/string_view.tcc \
|
||||
${experimental_bits_filesystem_headers}
|
||||
|
||||
|
@ -1097,6 +1097,7 @@ experimental_headers = \
|
||||
${experimental_srcdir}/ratio \
|
||||
${experimental_srcdir}/regex \
|
||||
${experimental_srcdir}/set \
|
||||
${experimental_srcdir}/simd \
|
||||
${experimental_srcdir}/socket \
|
||||
${experimental_srcdir}/source_location \
|
||||
${experimental_srcdir}/string \
|
||||
@ -1116,7 +1117,19 @@ experimental_bits_builddir = ./experimental/bits
|
||||
experimental_bits_headers = \
|
||||
${experimental_bits_srcdir}/lfts_config.h \
|
||||
${experimental_bits_srcdir}/net.h \
|
||||
${experimental_bits_srcdir}/numeric_traits.h \
|
||||
${experimental_bits_srcdir}/shared_ptr.h \
|
||||
${experimental_bits_srcdir}/simd.h \
|
||||
${experimental_bits_srcdir}/simd_builtin.h \
|
||||
${experimental_bits_srcdir}/simd_converter.h \
|
||||
${experimental_bits_srcdir}/simd_detail.h \
|
||||
${experimental_bits_srcdir}/simd_fixed_size.h \
|
||||
${experimental_bits_srcdir}/simd_math.h \
|
||||
${experimental_bits_srcdir}/simd_neon.h \
|
||||
${experimental_bits_srcdir}/simd_ppc.h \
|
||||
${experimental_bits_srcdir}/simd_scalar.h \
|
||||
${experimental_bits_srcdir}/simd_x86.h \
|
||||
${experimental_bits_srcdir}/simd_x86_conversions.h \
|
||||
${experimental_bits_srcdir}/string_view.tcc \
|
||||
${experimental_bits_filesystem_headers}
|
||||
|
||||
|
567
libstdc++-v3/include/experimental/bits/numeric_traits.h
Normal file
567
libstdc++-v3/include/experimental/bits/numeric_traits.h
Normal file
@ -0,0 +1,567 @@
|
||||
// Definition of numeric_limits replacement traits P1841R1 -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace std {
|
||||
|
||||
template <template <typename> class _Trait, typename _Tp, typename = void>
|
||||
struct __value_exists_impl : false_type {};
|
||||
|
||||
template <template <typename> class _Trait, typename _Tp>
|
||||
struct __value_exists_impl<_Trait, _Tp, void_t<decltype(_Trait<_Tp>::value)>>
|
||||
: true_type {};
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __digits_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __digits_impl<_Tp, true>
|
||||
{
|
||||
static inline constexpr int value
|
||||
= sizeof(_Tp) * __CHAR_BIT__ - is_signed_v<_Tp>;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct __digits_impl<float, true>
|
||||
{ static inline constexpr int value = __FLT_MANT_DIG__; };
|
||||
|
||||
template <>
|
||||
struct __digits_impl<double, true>
|
||||
{ static inline constexpr int value = __DBL_MANT_DIG__; };
|
||||
|
||||
template <>
|
||||
struct __digits_impl<long double, true>
|
||||
{ static inline constexpr int value = __LDBL_MANT_DIG__; };
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __digits10_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __digits10_impl<_Tp, true>
|
||||
{
|
||||
// The fraction 643/2136 approximates log10(2) to 7 significant digits.
|
||||
static inline constexpr int value = __digits_impl<_Tp>::value * 643L / 2136;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct __digits10_impl<float, true>
|
||||
{ static inline constexpr int value = __FLT_DIG__; };
|
||||
|
||||
template <>
|
||||
struct __digits10_impl<double, true>
|
||||
{ static inline constexpr int value = __DBL_DIG__; };
|
||||
|
||||
template <>
|
||||
struct __digits10_impl<long double, true>
|
||||
{ static inline constexpr int value = __LDBL_DIG__; };
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __max_digits10_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_digits10_impl<_Tp, true>
|
||||
{
|
||||
static inline constexpr int value
|
||||
= is_floating_point_v<_Tp> ? 2 + __digits_impl<_Tp>::value * 643L / 2136
|
||||
: __digits10_impl<_Tp>::value + 1;
|
||||
};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_exponent_impl {};
|
||||
|
||||
template <>
|
||||
struct __max_exponent_impl<float>
|
||||
{ static inline constexpr int value = __FLT_MAX_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __max_exponent_impl<double>
|
||||
{ static inline constexpr int value = __DBL_MAX_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __max_exponent_impl<long double>
|
||||
{ static inline constexpr int value = __LDBL_MAX_EXP__; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_exponent10_impl {};
|
||||
|
||||
template <>
|
||||
struct __max_exponent10_impl<float>
|
||||
{ static inline constexpr int value = __FLT_MAX_10_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __max_exponent10_impl<double>
|
||||
{ static inline constexpr int value = __DBL_MAX_10_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __max_exponent10_impl<long double>
|
||||
{ static inline constexpr int value = __LDBL_MAX_10_EXP__; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __min_exponent_impl {};
|
||||
|
||||
template <>
|
||||
struct __min_exponent_impl<float>
|
||||
{ static inline constexpr int value = __FLT_MIN_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __min_exponent_impl<double>
|
||||
{ static inline constexpr int value = __DBL_MIN_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __min_exponent_impl<long double>
|
||||
{ static inline constexpr int value = __LDBL_MIN_EXP__; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __min_exponent10_impl {};
|
||||
|
||||
template <>
|
||||
struct __min_exponent10_impl<float>
|
||||
{ static inline constexpr int value = __FLT_MIN_10_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __min_exponent10_impl<double>
|
||||
{ static inline constexpr int value = __DBL_MIN_10_EXP__; };
|
||||
|
||||
template <>
|
||||
struct __min_exponent10_impl<long double>
|
||||
{ static inline constexpr int value = __LDBL_MIN_10_EXP__; };
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __radix_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __radix_impl<_Tp, true>
|
||||
{
|
||||
static inline constexpr int value
|
||||
= is_floating_point_v<_Tp> ? __FLT_RADIX__ : 2;
|
||||
};
|
||||
|
||||
// [num.traits.util], numeric utility traits
|
||||
template <template <typename> class _Trait, typename _Tp>
|
||||
struct __value_exists : __value_exists_impl<_Trait, _Tp> {};
|
||||
|
||||
template <template <typename> class _Trait, typename _Tp>
|
||||
inline constexpr bool __value_exists_v = __value_exists<_Trait, _Tp>::value;
|
||||
|
||||
template <template <typename> class _Trait, typename _Tp, typename _Up = _Tp>
|
||||
inline constexpr _Up
|
||||
__value_or(_Up __def = _Up()) noexcept
|
||||
{
|
||||
if constexpr (__value_exists_v<_Trait, _Tp>)
|
||||
return static_cast<_Up>(_Trait<_Tp>::value);
|
||||
else
|
||||
return __def;
|
||||
}
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __norm_min_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __norm_min_impl<_Tp, true>
|
||||
{ static inline constexpr _Tp value = 1; };
|
||||
|
||||
template <>
|
||||
struct __norm_min_impl<float, true>
|
||||
{ static inline constexpr float value = __FLT_MIN__; };
|
||||
|
||||
template <>
|
||||
struct __norm_min_impl<double, true>
|
||||
{ static inline constexpr double value = __DBL_MIN__; };
|
||||
|
||||
template <>
|
||||
struct __norm_min_impl<long double, true>
|
||||
{ static inline constexpr long double value = __LDBL_MIN__; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __denorm_min_impl : __norm_min_impl<_Tp> {};
|
||||
|
||||
#if __FLT_HAS_DENORM__
|
||||
template <>
|
||||
struct __denorm_min_impl<float>
|
||||
{ static inline constexpr float value = __FLT_DENORM_MIN__; };
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_DENORM__
|
||||
template <>
|
||||
struct __denorm_min_impl<double>
|
||||
{ static inline constexpr double value = __DBL_DENORM_MIN__; };
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_DENORM__
|
||||
template <>
|
||||
struct __denorm_min_impl<long double>
|
||||
{ static inline constexpr long double value = __LDBL_DENORM_MIN__; };
|
||||
#endif
|
||||
|
||||
template <typename _Tp>
|
||||
struct __epsilon_impl {};
|
||||
|
||||
template <>
|
||||
struct __epsilon_impl<float>
|
||||
{ static inline constexpr float value = __FLT_EPSILON__; };
|
||||
|
||||
template <>
|
||||
struct __epsilon_impl<double>
|
||||
{ static inline constexpr double value = __DBL_EPSILON__; };
|
||||
|
||||
template <>
|
||||
struct __epsilon_impl<long double>
|
||||
{ static inline constexpr long double value = __LDBL_EPSILON__; };
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __finite_min_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __finite_min_impl<_Tp, true>
|
||||
{
|
||||
static inline constexpr _Tp value
|
||||
= is_unsigned_v<_Tp> ? _Tp()
|
||||
: -2 * (_Tp(1) << __digits_impl<_Tp>::value - 1);
|
||||
};
|
||||
|
||||
template <>
|
||||
struct __finite_min_impl<float, true>
|
||||
{ static inline constexpr float value = -__FLT_MAX__; };
|
||||
|
||||
template <>
|
||||
struct __finite_min_impl<double, true>
|
||||
{ static inline constexpr double value = -__DBL_MAX__; };
|
||||
|
||||
template <>
|
||||
struct __finite_min_impl<long double, true>
|
||||
{ static inline constexpr long double value = -__LDBL_MAX__; };
|
||||
|
||||
template <typename _Tp, bool = is_arithmetic_v<_Tp>>
|
||||
struct __finite_max_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __finite_max_impl<_Tp, true>
|
||||
{ static inline constexpr _Tp value = ~__finite_min_impl<_Tp>::value; };
|
||||
|
||||
template <>
|
||||
struct __finite_max_impl<float, true>
|
||||
{ static inline constexpr float value = __FLT_MAX__; };
|
||||
|
||||
template <>
|
||||
struct __finite_max_impl<double, true>
|
||||
{ static inline constexpr double value = __DBL_MAX__; };
|
||||
|
||||
template <>
|
||||
struct __finite_max_impl<long double, true>
|
||||
{ static inline constexpr long double value = __LDBL_MAX__; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __infinity_impl {};
|
||||
|
||||
#if __FLT_HAS_INFINITY__
|
||||
template <>
|
||||
struct __infinity_impl<float>
|
||||
{ static inline constexpr float value = __builtin_inff(); };
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_INFINITY__
|
||||
template <>
|
||||
struct __infinity_impl<double>
|
||||
{ static inline constexpr double value = __builtin_inf(); };
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_INFINITY__
|
||||
template <>
|
||||
struct __infinity_impl<long double>
|
||||
{ static inline constexpr long double value = __builtin_infl(); };
|
||||
#endif
|
||||
|
||||
template <typename _Tp>
|
||||
struct __quiet_NaN_impl {};
|
||||
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __quiet_NaN_impl<float>
|
||||
{ static inline constexpr float value = __builtin_nanf(""); };
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __quiet_NaN_impl<double>
|
||||
{ static inline constexpr double value = __builtin_nan(""); };
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __quiet_NaN_impl<long double>
|
||||
{ static inline constexpr long double value = __builtin_nanl(""); };
|
||||
#endif
|
||||
|
||||
template <typename _Tp, bool = is_floating_point_v<_Tp>>
|
||||
struct __reciprocal_overflow_threshold_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __reciprocal_overflow_threshold_impl<_Tp, true>
|
||||
{
|
||||
// This typically yields a subnormal value. Is this incorrect for
|
||||
// flush-to-zero configurations?
|
||||
static constexpr _Tp _S_search(_Tp __ok, _Tp __overflows)
|
||||
{
|
||||
const _Tp __mid = (__ok + __overflows) / 2;
|
||||
// 1/__mid without -ffast-math is not a constant expression if it
|
||||
// overflows. Therefore divide 1 by the radix before division.
|
||||
// Consequently finite_max (the threshold) must be scaled by the
|
||||
// same value.
|
||||
if (__mid == __ok || __mid == __overflows)
|
||||
return __ok;
|
||||
else if (_Tp(1) / (__radix_impl<_Tp>::value * __mid)
|
||||
<= __finite_max_impl<_Tp>::value / __radix_impl<_Tp>::value)
|
||||
return _S_search(__mid, __overflows);
|
||||
else
|
||||
return _S_search(__ok, __mid);
|
||||
}
|
||||
|
||||
static inline constexpr _Tp value
|
||||
= _S_search(_Tp(1.01) / __finite_max_impl<_Tp>::value,
|
||||
_Tp(0.99) / __finite_max_impl<_Tp>::value);
|
||||
};
|
||||
|
||||
template <typename _Tp, bool = is_floating_point_v<_Tp>>
|
||||
struct __round_error_impl {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __round_error_impl<_Tp, true>
|
||||
{ static inline constexpr _Tp value = 0.5; };
|
||||
|
||||
template <typename _Tp>
|
||||
struct __signaling_NaN_impl {};
|
||||
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __signaling_NaN_impl<float>
|
||||
{ static inline constexpr float value = __builtin_nansf(""); };
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __signaling_NaN_impl<double>
|
||||
{ static inline constexpr double value = __builtin_nans(""); };
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __signaling_NaN_impl<long double>
|
||||
{ static inline constexpr long double value = __builtin_nansl(""); };
|
||||
#endif
|
||||
|
||||
// [num.traits.val], numeric distinguished value traits
|
||||
template <typename _Tp>
|
||||
struct __denorm_min : __denorm_min_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __epsilon : __epsilon_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __finite_max : __finite_max_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __finite_min : __finite_min_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __infinity : __infinity_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __norm_min : __norm_min_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __quiet_NaN : __quiet_NaN_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __reciprocal_overflow_threshold
|
||||
: __reciprocal_overflow_threshold_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __round_error : __round_error_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __signaling_NaN : __signaling_NaN_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __denorm_min_v = __denorm_min<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __epsilon_v = __epsilon<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __finite_max_v = __finite_max<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __finite_min_v = __finite_min<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __infinity_v = __infinity<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __norm_min_v = __norm_min<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __quiet_NaN_v = __quiet_NaN<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __reciprocal_overflow_threshold_v
|
||||
= __reciprocal_overflow_threshold<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __round_error_v = __round_error<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __signaling_NaN_v = __signaling_NaN<_Tp>::value;
|
||||
|
||||
// [num.traits.char], numeric characteristics traits
|
||||
template <typename _Tp>
|
||||
struct __digits : __digits_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __digits10 : __digits10_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_digits10 : __max_digits10_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_exponent : __max_exponent_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __max_exponent10 : __max_exponent10_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __min_exponent : __min_exponent_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __min_exponent10 : __min_exponent10_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __radix : __radix_impl<remove_cv_t<_Tp>> {};
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __digits_v = __digits<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __digits10_v = __digits10<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __max_digits10_v = __max_digits10<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __max_exponent_v = __max_exponent<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __max_exponent10_v = __max_exponent10<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __min_exponent_v = __min_exponent<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __min_exponent10_v = __min_exponent10<_Tp>::value;
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr auto __radix_v = __radix<_Tp>::value;
|
||||
|
||||
// mkretz's extensions
|
||||
// TODO: does GCC tell me? __GCC_IEC_559 >= 2 is not the right answer
|
||||
template <typename _Tp>
|
||||
struct __has_iec559_storage_format : true_type {};
|
||||
|
||||
template <typename _Tp>
|
||||
inline constexpr bool __has_iec559_storage_format_v
|
||||
= __has_iec559_storage_format<_Tp>::value;
|
||||
|
||||
/* To propose:
|
||||
If __has_iec559_behavior<__quiet_NaN, T> is true the following holds:
|
||||
- nan == nan is false
|
||||
- isnan(nan) is true
|
||||
- isnan(nan + x) is true
|
||||
- isnan(inf/inf) is true
|
||||
- isnan(0/0) is true
|
||||
- isunordered(nan, x) is true
|
||||
|
||||
If __has_iec559_behavior<__infinity, T> is true the following holds (x is
|
||||
neither nan nor inf):
|
||||
- isinf(inf) is true
|
||||
- isinf(inf + x) is true
|
||||
- isinf(1/0) is true
|
||||
*/
|
||||
template <template <typename> class _Trait, typename _Tp>
|
||||
struct __has_iec559_behavior : false_type {};
|
||||
|
||||
template <template <typename> class _Trait, typename _Tp>
|
||||
inline constexpr bool __has_iec559_behavior_v
|
||||
= __has_iec559_behavior<_Trait, _Tp>::value;
|
||||
|
||||
#if !__FINITE_MATH_ONLY__
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__quiet_NaN, float> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__quiet_NaN, double> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__quiet_NaN, long double> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __FLT_HAS_INFINITY__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__infinity, float> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_INFINITY__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__infinity, double> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_INFINITY__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__infinity, long double> : true_type {};
|
||||
#endif
|
||||
|
||||
#ifdef __SUPPORT_SNAN__
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__signaling_NaN, float> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __DBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__signaling_NaN, double> : true_type {};
|
||||
#endif
|
||||
|
||||
#if __LDBL_HAS_QUIET_NAN__
|
||||
template <>
|
||||
struct __has_iec559_behavior<__signaling_NaN, long double> : true_type {};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // __FINITE_MATH_ONLY__
|
||||
|
||||
} // namespace std
|
5051
libstdc++-v3/include/experimental/bits/simd.h
Normal file
5051
libstdc++-v3/include/experimental/bits/simd.h
Normal file
File diff suppressed because it is too large
Load Diff
2949
libstdc++-v3/include/experimental/bits/simd_builtin.h
Normal file
2949
libstdc++-v3/include/experimental/bits/simd_builtin.h
Normal file
File diff suppressed because it is too large
Load Diff
354
libstdc++-v3/include/experimental/bits/simd_converter.h
Normal file
354
libstdc++-v3/include/experimental/bits/simd_converter.h
Normal file
@ -0,0 +1,354 @@
|
||||
// Generic simd conversions -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
_GLIBCXX_SIMD_BEGIN_NAMESPACE
|
||||
// _SimdConverter scalar -> scalar {{{
|
||||
template <typename _From, typename _To>
|
||||
struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar,
|
||||
enable_if_t<!is_same_v<_From, _To>>>
|
||||
{
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _To operator()(_From __a) const noexcept
|
||||
{ return static_cast<_To>(__a); }
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter scalar -> "native" {{{
|
||||
template <typename _From, typename _To, typename _Abi>
|
||||
struct _SimdConverter<_From, simd_abi::scalar, _To, _Abi,
|
||||
enable_if_t<!is_same_v<_Abi, simd_abi::scalar>>>
|
||||
{
|
||||
using _Ret = typename _Abi::template __traits<_To>::_SimdMember;
|
||||
|
||||
template <typename... _More>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(_From __a, _More... __more) const noexcept
|
||||
{
|
||||
static_assert(sizeof...(_More) + 1 == _Abi::template _S_size<_To>);
|
||||
static_assert(conjunction_v<is_same<_From, _More>...>);
|
||||
return __make_vector<_To>(__a, __more...);
|
||||
}
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter "native 1" -> "native 2" {{{
|
||||
template <typename _From, typename _To, typename _AFrom, typename _ATo>
|
||||
struct _SimdConverter<
|
||||
_From, _AFrom, _To, _ATo,
|
||||
enable_if_t<!disjunction_v<
|
||||
__is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
|
||||
is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
|
||||
conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>>>
|
||||
{
|
||||
using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
|
||||
using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
|
||||
using _V = __vector_type_t<_To, simd_size_v<_To, _ATo>>;
|
||||
|
||||
template <typename... _More>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(_Arg __a, _More... __more) const noexcept
|
||||
{ return __vector_convert<_V>(__a, __more...); }
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdConverter scalar -> fixed_size<1> {{{1
|
||||
template <typename _From, typename _To>
|
||||
struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::fixed_size<1>,
|
||||
void>
|
||||
{
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_To, simd_abi::scalar>
|
||||
operator()(_From __x) const noexcept
|
||||
{ return {static_cast<_To>(__x)}; }
|
||||
};
|
||||
|
||||
// _SimdConverter fixed_size<1> -> scalar {{{1
|
||||
template <typename _From, typename _To>
|
||||
struct _SimdConverter<_From, simd_abi::fixed_size<1>, _To, simd_abi::scalar,
|
||||
void>
|
||||
{
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _To
|
||||
operator()(_SimdTuple<_From, simd_abi::scalar> __x) const noexcept
|
||||
{ return {static_cast<_To>(__x.first)}; }
|
||||
};
|
||||
|
||||
// _SimdConverter fixed_size<_Np> -> fixed_size<_Np> {{{1
|
||||
template <typename _From, typename _To, int _Np>
|
||||
struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To,
|
||||
simd_abi::fixed_size<_Np>,
|
||||
enable_if_t<!is_same_v<_From, _To>>>
|
||||
{
|
||||
using _Ret = __fixed_size_storage_t<_To, _Np>;
|
||||
using _Arg = __fixed_size_storage_t<_From, _Np>;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(const _Arg& __x) const noexcept
|
||||
{
|
||||
if constexpr (is_same_v<_From, _To>)
|
||||
return __x;
|
||||
|
||||
// special case (optimize) int signedness casts
|
||||
else if constexpr (sizeof(_From) == sizeof(_To)
|
||||
&& is_integral_v<_From> && is_integral_v<_To>)
|
||||
return __bit_cast<_Ret>(__x);
|
||||
|
||||
// special case if all ABI tags in _Ret are scalar
|
||||
else if constexpr (__is_scalar_abi<typename _Ret::_FirstAbi>())
|
||||
{
|
||||
return __call_with_subscripts(
|
||||
__x, make_index_sequence<_Np>(),
|
||||
[](auto... __values) constexpr->_Ret {
|
||||
return __make_simd_tuple<_To, decltype((void) __values,
|
||||
simd_abi::scalar())...>(
|
||||
static_cast<_To>(__values)...);
|
||||
});
|
||||
}
|
||||
|
||||
// from one vector to one vector
|
||||
else if constexpr (_Arg::_S_first_size == _Ret::_S_first_size)
|
||||
{
|
||||
_SimdConverter<_From, typename _Arg::_FirstAbi, _To,
|
||||
typename _Ret::_FirstAbi>
|
||||
__native_cvt;
|
||||
if constexpr (_Arg::_S_tuple_size == 1)
|
||||
return {__native_cvt(__x.first)};
|
||||
else
|
||||
{
|
||||
constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
|
||||
_SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
|
||||
simd_abi::fixed_size<_NRemain>>
|
||||
__remainder_cvt;
|
||||
return {__native_cvt(__x.first), __remainder_cvt(__x.second)};
|
||||
}
|
||||
}
|
||||
|
||||
// from one vector to multiple vectors
|
||||
else if constexpr (_Arg::_S_first_size > _Ret::_S_first_size)
|
||||
{
|
||||
const auto __multiple_return_chunks
|
||||
= __convert_all<__vector_type_t<_To, _Ret::_S_first_size>>(
|
||||
__x.first);
|
||||
constexpr auto __converted = __multiple_return_chunks.size()
|
||||
* _Ret::_FirstAbi::template _S_size<_To>;
|
||||
constexpr auto __remaining = _Np - __converted;
|
||||
if constexpr (_Arg::_S_tuple_size == 1 && __remaining == 0)
|
||||
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks);
|
||||
else if constexpr (_Arg::_S_tuple_size == 1)
|
||||
{ // e.g. <int, 3> -> <double, 2, 1> or <short, 7> -> <double, 4, 2,
|
||||
// 1>
|
||||
using _RetRem
|
||||
= __remove_cvref_t<decltype(__simd_tuple_pop_front<__converted>(
|
||||
_Ret()))>;
|
||||
const auto __return_chunks2
|
||||
= __convert_all<__vector_type_t<_To, _RetRem::_S_first_size>, 0,
|
||||
__converted>(__x.first);
|
||||
constexpr auto __converted2
|
||||
= __converted
|
||||
+ __return_chunks2.size() * _RetRem::_S_first_size;
|
||||
if constexpr (__converted2 == _Np)
|
||||
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
|
||||
__return_chunks2);
|
||||
else
|
||||
{
|
||||
using _RetRem2 = __remove_cvref_t<
|
||||
decltype(__simd_tuple_pop_front<__return_chunks2.size()
|
||||
* _RetRem::_S_first_size>(
|
||||
_RetRem()))>;
|
||||
const auto __return_chunks3 = __convert_all<
|
||||
__vector_type_t<_To, _RetRem2::_S_first_size>, 0,
|
||||
__converted2>(__x.first);
|
||||
constexpr auto __converted3
|
||||
= __converted2
|
||||
+ __return_chunks3.size() * _RetRem2::_S_first_size;
|
||||
if constexpr (__converted3 == _Np)
|
||||
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
|
||||
__return_chunks2,
|
||||
__return_chunks3);
|
||||
else
|
||||
{
|
||||
using _RetRem3
|
||||
= __remove_cvref_t<decltype(__simd_tuple_pop_front<
|
||||
__return_chunks3.size()
|
||||
* _RetRem2::_S_first_size>(
|
||||
_RetRem2()))>;
|
||||
const auto __return_chunks4 = __convert_all<
|
||||
__vector_type_t<_To, _RetRem3::_S_first_size>, 0,
|
||||
__converted3>(__x.first);
|
||||
constexpr auto __converted4
|
||||
= __converted3
|
||||
+ __return_chunks4.size() * _RetRem3::_S_first_size;
|
||||
if constexpr (__converted4 == _Np)
|
||||
return __to_simd_tuple<_To, _Np>(
|
||||
__multiple_return_chunks, __return_chunks2,
|
||||
__return_chunks3, __return_chunks4);
|
||||
else
|
||||
__assert_unreachable<_To>();
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
|
||||
_SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
|
||||
simd_abi::fixed_size<_NRemain>>
|
||||
__remainder_cvt;
|
||||
return __simd_tuple_concat(
|
||||
__to_simd_tuple<_To, _Arg::_S_first_size>(
|
||||
__multiple_return_chunks),
|
||||
__remainder_cvt(__x.second));
|
||||
}
|
||||
}
|
||||
|
||||
// from multiple vectors to one vector
|
||||
// _Arg::_S_first_size < _Ret::_S_first_size
|
||||
// a) heterogeneous input at the end of the tuple (possible with partial
|
||||
// native registers in _Ret)
|
||||
else if constexpr (_Ret::_S_tuple_size == 1
|
||||
&& _Np % _Arg::_S_first_size != 0)
|
||||
{
|
||||
static_assert(_Ret::_FirstAbi::template _S_is_partial<_To>);
|
||||
return _Ret{__generate_from_n_evaluations<
|
||||
_Np, typename _VectorTraits<typename _Ret::_FirstType>::type>(
|
||||
[&](auto __i) { return static_cast<_To>(__x[__i]); })};
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(_Arg::_S_tuple_size > 1);
|
||||
constexpr auto __n
|
||||
= __div_roundup(_Ret::_S_first_size, _Arg::_S_first_size);
|
||||
return __call_with_n_evaluations<__n>(
|
||||
[&__x](auto... __uncvted) {
|
||||
// assuming _Arg Abi tags for all __i are _Arg::_FirstAbi
|
||||
_SimdConverter<_From, typename _Arg::_FirstAbi, _To,
|
||||
typename _Ret::_FirstAbi>
|
||||
__native_cvt;
|
||||
if constexpr (_Ret::_S_tuple_size == 1)
|
||||
return _Ret{__native_cvt(__uncvted...)};
|
||||
else
|
||||
return _Ret{
|
||||
__native_cvt(__uncvted...),
|
||||
_SimdConverter<
|
||||
_From, simd_abi::fixed_size<_Np - _Ret::_S_first_size>, _To,
|
||||
simd_abi::fixed_size<_Np - _Ret::_S_first_size>>()(
|
||||
__simd_tuple_pop_front<_Ret::_S_first_size>(__x))};
|
||||
},
|
||||
[&__x](auto __i) { return __get_tuple_at<__i>(__x); });
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// _SimdConverter "native" -> fixed_size<_Np> {{{1
|
||||
// i.e. 1 register to ? registers
|
||||
template <typename _From, typename _Ap, typename _To, int _Np>
|
||||
struct _SimdConverter<_From, _Ap, _To, simd_abi::fixed_size<_Np>,
|
||||
enable_if_t<!__is_fixed_size_abi_v<_Ap>>>
|
||||
{
|
||||
static_assert(
|
||||
_Np == simd_size_v<_From, _Ap>,
|
||||
"_SimdConverter to fixed_size only works for equal element counts");
|
||||
|
||||
using _Ret = __fixed_size_storage_t<_To, _Np>;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
|
||||
operator()(typename _SimdTraits<_From, _Ap>::_SimdMember __x) const noexcept
|
||||
{
|
||||
if constexpr (_Ret::_S_tuple_size == 1)
|
||||
return {__vector_convert<typename _Ret::_FirstType::_BuiltinType>(__x)};
|
||||
else
|
||||
{
|
||||
using _FixedNp = simd_abi::fixed_size<_Np>;
|
||||
_SimdConverter<_From, _FixedNp, _To, _FixedNp> __fixed_cvt;
|
||||
using _FromFixedStorage = __fixed_size_storage_t<_From, _Np>;
|
||||
if constexpr (_FromFixedStorage::_S_tuple_size == 1)
|
||||
return __fixed_cvt(_FromFixedStorage{__x});
|
||||
else if constexpr (_FromFixedStorage::_S_tuple_size == 2)
|
||||
{
|
||||
_FromFixedStorage __tmp;
|
||||
static_assert(sizeof(__tmp) <= sizeof(__x));
|
||||
__builtin_memcpy(&__tmp.first, &__x, sizeof(__tmp.first));
|
||||
__builtin_memcpy(&__tmp.second.first,
|
||||
reinterpret_cast<const char*>(&__x)
|
||||
+ sizeof(__tmp.first),
|
||||
sizeof(__tmp.second.first));
|
||||
return __fixed_cvt(__tmp);
|
||||
}
|
||||
else
|
||||
__assert_unreachable<_From>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// _SimdConverter fixed_size<_Np> -> "native" {{{1
|
||||
// i.e. ? register to 1 registers
|
||||
template <typename _From, int _Np, typename _To, typename _Ap>
|
||||
struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To, _Ap,
|
||||
enable_if_t<!__is_fixed_size_abi_v<_Ap>>>
|
||||
{
|
||||
static_assert(
|
||||
_Np == simd_size_v<_To, _Ap>,
|
||||
"_SimdConverter to fixed_size only works for equal element counts");
|
||||
|
||||
using _Arg = __fixed_size_storage_t<_From, _Np>;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr
|
||||
typename _SimdTraits<_To, _Ap>::_SimdMember
|
||||
operator()(_Arg __x) const noexcept
|
||||
{
|
||||
if constexpr (_Arg::_S_tuple_size == 1)
|
||||
return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
|
||||
else if constexpr (_Arg::_S_is_homogeneous)
|
||||
return __call_with_n_evaluations<_Arg::_S_tuple_size>(
|
||||
[](auto... __members) {
|
||||
if constexpr ((is_convertible_v<decltype(__members), _To> && ...))
|
||||
return __vector_type_t<_To, _Np>{static_cast<_To>(__members)...};
|
||||
else
|
||||
return __vector_convert<__vector_type_t<_To, _Np>>(__members...);
|
||||
},
|
||||
[&](auto __i) { return __get_tuple_at<__i>(__x); });
|
||||
else if constexpr (__fixed_size_storage_t<_To, _Np>::_S_tuple_size == 1)
|
||||
{
|
||||
_SimdConverter<_From, simd_abi::fixed_size<_Np>, _To,
|
||||
simd_abi::fixed_size<_Np>>
|
||||
__fixed_cvt;
|
||||
return __fixed_cvt(__x).first;
|
||||
}
|
||||
else
|
||||
{
|
||||
const _SimdWrapper<_From, _Np> __xv
|
||||
= __generate_from_n_evaluations<_Np, __vector_type_t<_From, _Np>>(
|
||||
[&](auto __i) { return __x[__i]; });
|
||||
return __vector_convert<__vector_type_t<_To, _Np>>(__xv);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// }}}1
|
||||
_GLIBCXX_SIMD_END_NAMESPACE
|
||||
#endif // __cplusplus >= 201703L
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
|
||||
|
||||
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
|
306
libstdc++-v3/include/experimental/bits/simd_detail.h
Normal file
306
libstdc++-v3/include/experimental/bits/simd_detail.h
Normal file
@ -0,0 +1,306 @@
|
||||
// Internal macros for the simd implementation -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
|
||||
namespace std _GLIBCXX_VISIBILITY(default) \
|
||||
{ \
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION \
|
||||
namespace experimental { \
|
||||
inline namespace parallelism_v2 {
|
||||
#define _GLIBCXX_SIMD_END_NAMESPACE \
|
||||
} \
|
||||
} \
|
||||
_GLIBCXX_END_NAMESPACE_VERSION \
|
||||
}
|
||||
|
||||
// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
|
||||
// macros ARM{{{
|
||||
#if defined __ARM_NEON
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON 0
|
||||
#endif
|
||||
#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
|
||||
#endif
|
||||
#if defined __ARM_NEON && defined __aarch64__
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
|
||||
#endif
|
||||
//}}}
|
||||
// x86{{{
|
||||
#ifdef __MMX__
|
||||
#define _GLIBCXX_SIMD_HAVE_MMX 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_MMX 0
|
||||
#endif
|
||||
#if defined __SSE__ || defined __x86_64__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE 0
|
||||
#endif
|
||||
#if defined __SSE2__ || defined __x86_64__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE2 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE2 0
|
||||
#endif
|
||||
#ifdef __SSE3__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE3 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE3 0
|
||||
#endif
|
||||
#ifdef __SSSE3__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSSE3 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSSE3 0
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
|
||||
#endif
|
||||
#ifdef __XOP__
|
||||
#define _GLIBCXX_SIMD_HAVE_XOP 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_XOP 0
|
||||
#endif
|
||||
#ifdef __AVX__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX 0
|
||||
#endif
|
||||
#ifdef __AVX2__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX2 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX2 0
|
||||
#endif
|
||||
#ifdef __BMI__
|
||||
#define _GLIBCXX_SIMD_HAVE_BMI1 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_BMI1 0
|
||||
#endif
|
||||
#ifdef __BMI2__
|
||||
#define _GLIBCXX_SIMD_HAVE_BMI2 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_BMI2 0
|
||||
#endif
|
||||
#ifdef __LZCNT__
|
||||
#define _GLIBCXX_SIMD_HAVE_LZCNT 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_LZCNT 0
|
||||
#endif
|
||||
#ifdef __SSE4A__
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4A 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE4A 0
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#define _GLIBCXX_SIMD_HAVE_FMA 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_FMA 0
|
||||
#endif
|
||||
#ifdef __FMA4__
|
||||
#define _GLIBCXX_SIMD_HAVE_FMA4 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_FMA4 0
|
||||
#endif
|
||||
#ifdef __F16C__
|
||||
#define _GLIBCXX_SIMD_HAVE_F16C 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_F16C 0
|
||||
#endif
|
||||
#ifdef __POPCNT__
|
||||
#define _GLIBCXX_SIMD_HAVE_POPCNT 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_POPCNT 0
|
||||
#endif
|
||||
#ifdef __AVX512F__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512F 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512F 0
|
||||
#endif
|
||||
#ifdef __AVX512DQ__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
|
||||
#endif
|
||||
#ifdef __AVX512VL__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
|
||||
#endif
|
||||
#ifdef __AVX512BW__
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_SIMD_HAVE_SSE
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
|
||||
#endif
|
||||
#if _GLIBCXX_SIMD_HAVE_SSE2
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_SIMD_HAVE_AVX
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
|
||||
#endif
|
||||
#if _GLIBCXX_SIMD_HAVE_AVX2
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_SIMD_HAVE_AVX512F
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
|
||||
#endif
|
||||
#if _GLIBCXX_SIMD_HAVE_AVX512BW
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
|
||||
#endif
|
||||
|
||||
#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
|
||||
#error "Use of SSE2 is required on AMD64"
|
||||
#endif
|
||||
//}}}
|
||||
|
||||
#ifdef __clang__
|
||||
#define _GLIBCXX_SIMD_NORMAL_MATH
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_NORMAL_MATH \
|
||||
[[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
|
||||
#endif
|
||||
#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
|
||||
#define _GLIBCXX_SIMD_INTRINSIC \
|
||||
[[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
|
||||
#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
|
||||
#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
|
||||
#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
|
||||
|
||||
#if defined __STRICT_ANSI__ && __STRICT_ANSI__
|
||||
#define _GLIBCXX_SIMD_CONSTEXPR
|
||||
#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_CONSTEXPR constexpr
|
||||
#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
|
||||
#endif
|
||||
|
||||
#if defined __clang__
|
||||
#define _GLIBCXX_SIMD_USE_CONSTEXPR const
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
|
||||
#endif
|
||||
|
||||
#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
|
||||
#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
|
||||
#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
|
||||
__macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
|
||||
|
||||
#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
|
||||
_GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
|
||||
#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
|
||||
_GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
|
||||
#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
|
||||
_GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
|
||||
|
||||
#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
|
||||
#undef _GLIBCXX_SIMD_ALWAYS_INLINE
|
||||
#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
|
||||
#undef _GLIBCXX_SIMD_INTRINSIC
|
||||
#define _GLIBCXX_SIMD_INTRINSIC inline
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
|
||||
#define _GLIBCXX_SIMD_X86INTRIN 1
|
||||
#else
|
||||
#define _GLIBCXX_SIMD_X86INTRIN 0
|
||||
#endif
|
||||
|
||||
// workaround macros {{{
|
||||
// use aliasing loads to help GCC understand the data accesses better
|
||||
// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
|
||||
// fixed_size_simd<float, 16> x.
|
||||
#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
|
||||
|
||||
// vector conversions on x86 not optimized:
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
|
||||
#endif
|
||||
|
||||
// integer division not optimized
|
||||
#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
|
||||
|
||||
// very bad codegen for extraction and concatenation of 128/256 "subregisters"
|
||||
// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
|
||||
#endif
|
||||
|
||||
// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
|
||||
#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
|
||||
|
||||
// bad codegen for zero-extend using simple concat(__x, 0)
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
|
||||
#endif
|
||||
|
||||
// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
|
||||
// of static_simd_cast)
|
||||
#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
|
||||
|
||||
// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
|
||||
// constraint on (static)_simd_cast)
|
||||
#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
|
||||
// }}}
|
||||
|
||||
#endif // __cplusplus >= 201703L
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
||||
|
||||
// vim: foldmethod=marker
|
2066
libstdc++-v3/include/experimental/bits/simd_fixed_size.h
Normal file
2066
libstdc++-v3/include/experimental/bits/simd_fixed_size.h
Normal file
File diff suppressed because it is too large
Load Diff
1500
libstdc++-v3/include/experimental/bits/simd_math.h
Normal file
1500
libstdc++-v3/include/experimental/bits/simd_math.h
Normal file
File diff suppressed because it is too large
Load Diff
519
libstdc++-v3/include/experimental/bits/simd_neon.h
Normal file
519
libstdc++-v3/include/experimental/bits/simd_neon.h
Normal file
@ -0,0 +1,519 @@
|
||||
// Simd NEON specific implementations -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
#if !_GLIBCXX_SIMD_HAVE_NEON
|
||||
#error "simd_neon.h may only be included when NEON on ARM is available"
|
||||
#endif
|
||||
|
||||
_GLIBCXX_SIMD_BEGIN_NAMESPACE
|
||||
|
||||
// _CommonImplNeon {{{
|
||||
struct _CommonImplNeon : _CommonImplBuiltin
|
||||
{
|
||||
// _S_store {{{
|
||||
using _CommonImplBuiltin::_S_store;
|
||||
|
||||
// }}}
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdImplNeon {{{
|
||||
template <typename _Abi>
|
||||
struct _SimdImplNeon : _SimdImplBuiltin<_Abi>
|
||||
{
|
||||
using _Base = _SimdImplBuiltin<_Abi>;
|
||||
|
||||
template <typename _Tp>
|
||||
using _MaskMember = typename _Base::template _MaskMember<_Tp>;
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr size_t _S_max_store_size = 16;
|
||||
|
||||
// _S_masked_load {{{
|
||||
template <typename _Tp, size_t _Np, typename _Up>
|
||||
static inline _SimdWrapper<_Tp, _Np>
|
||||
_S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
|
||||
const _Up* __mem) noexcept
|
||||
{
|
||||
__execute_n_times<_Np>([&](auto __i) {
|
||||
if (__k[__i] != 0)
|
||||
__merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
|
||||
});
|
||||
return __merge;
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_masked_store_nocvt {{{
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static void
|
||||
_S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
|
||||
_MaskMember<_Tp> __k)
|
||||
{
|
||||
__execute_n_times<_Np>([&](auto __i) {
|
||||
if (__k[__i] != 0)
|
||||
__mem[__i] = __v[__i];
|
||||
});
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_reduce {{{
|
||||
template <typename _Tp, typename _BinaryOperation>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp
|
||||
_S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
|
||||
{
|
||||
constexpr size_t _Np = __x.size();
|
||||
if constexpr (sizeof(__x) == 16 && _Np >= 4
|
||||
&& !_Abi::template _S_is_partial<_Tp>)
|
||||
{
|
||||
const auto __halves = split<simd<_Tp, simd_abi::_Neon<8>>>(__x);
|
||||
const auto __y = __binary_op(__halves[0], __halves[1]);
|
||||
return _SimdImplNeon<simd_abi::_Neon<8>>::_S_reduce(
|
||||
__y, static_cast<_BinaryOperation&&>(__binary_op));
|
||||
}
|
||||
else if constexpr (_Np == 8)
|
||||
{
|
||||
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(
|
||||
__x._M_data)));
|
||||
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(
|
||||
__x._M_data)));
|
||||
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(
|
||||
__x._M_data)));
|
||||
return __x[0];
|
||||
}
|
||||
else if constexpr (_Np == 4)
|
||||
{
|
||||
__x
|
||||
= __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<1, 0, 3, 2>(__x._M_data)));
|
||||
__x
|
||||
= __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<3, 2, 1, 0>(__x._M_data)));
|
||||
return __x[0];
|
||||
}
|
||||
else if constexpr (_Np == 2)
|
||||
{
|
||||
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
|
||||
__vector_permute<1, 0>(__x._M_data)));
|
||||
return __x[0];
|
||||
}
|
||||
else
|
||||
return _Base::_S_reduce(__x,
|
||||
static_cast<_BinaryOperation&&>(__binary_op));
|
||||
}
|
||||
|
||||
// }}}
|
||||
// math {{{
|
||||
// _S_sqrt {{{
|
||||
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_sqrt(_Tp __x)
|
||||
{
|
||||
if constexpr (__have_neon_a64)
|
||||
{
|
||||
const auto __intrin = __to_intrin(__x);
|
||||
if constexpr (_TVT::template _S_is<float, 2>)
|
||||
return vsqrt_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<float, 4>)
|
||||
return vsqrtq_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 1>)
|
||||
return vsqrt_f64(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 2>)
|
||||
return vsqrtq_f64(__intrin);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else
|
||||
return _Base::_S_sqrt(__x);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_trunc {{{
|
||||
template <typename _TW, typename _TVT = _VectorTraits<_TW>>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _TW _S_trunc(_TW __x)
|
||||
{
|
||||
using _Tp = typename _TVT::value_type;
|
||||
if constexpr (__have_neon_a32)
|
||||
{
|
||||
const auto __intrin = __to_intrin(__x);
|
||||
if constexpr (_TVT::template _S_is<float, 2>)
|
||||
return vrnd_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<float, 4>)
|
||||
return vrndq_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 1>)
|
||||
return vrnd_f64(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 2>)
|
||||
return vrndq_f64(__intrin);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else if constexpr (is_same_v<_Tp, float>)
|
||||
{
|
||||
auto __intrin = __to_intrin(__x);
|
||||
if constexpr (sizeof(__x) == 16)
|
||||
__intrin = vcvtq_f32_s32(vcvtq_s32_f32(__intrin));
|
||||
else
|
||||
__intrin = vcvt_f32_s32(vcvt_s32_f32(__intrin));
|
||||
return _Base::_S_abs(__x)._M_data < 0x1p23f
|
||||
? __vector_bitcast<float>(__intrin)
|
||||
: __x._M_data;
|
||||
}
|
||||
else
|
||||
return _Base::_S_trunc(__x);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_round {{{
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
|
||||
_S_round(_SimdWrapper<_Tp, _Np> __x)
|
||||
{
|
||||
if constexpr (__have_neon_a32)
|
||||
{
|
||||
const auto __intrin = __to_intrin(__x);
|
||||
if constexpr (sizeof(_Tp) == 4 && sizeof(__x) == 8)
|
||||
return vrnda_f32(__intrin);
|
||||
else if constexpr (sizeof(_Tp) == 4 && sizeof(__x) == 16)
|
||||
return vrndaq_f32(__intrin);
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__x) == 8)
|
||||
return vrnda_f64(__intrin);
|
||||
else if constexpr (sizeof(_Tp) == 8 && sizeof(__x) == 16)
|
||||
return vrndaq_f64(__intrin);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else
|
||||
return _Base::_S_round(__x);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_floor {{{
|
||||
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_floor(_Tp __x)
|
||||
{
|
||||
if constexpr (__have_neon_a32)
|
||||
{
|
||||
const auto __intrin = __to_intrin(__x);
|
||||
if constexpr (_TVT::template _S_is<float, 2>)
|
||||
return vrndm_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<float, 4>)
|
||||
return vrndmq_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 1>)
|
||||
return vrndm_f64(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 2>)
|
||||
return vrndmq_f64(__intrin);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else
|
||||
return _Base::_S_floor(__x);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_ceil {{{
|
||||
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_ceil(_Tp __x)
|
||||
{
|
||||
if constexpr (__have_neon_a32)
|
||||
{
|
||||
const auto __intrin = __to_intrin(__x);
|
||||
if constexpr (_TVT::template _S_is<float, 2>)
|
||||
return vrndp_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<float, 4>)
|
||||
return vrndpq_f32(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 1>)
|
||||
return vrndp_f64(__intrin);
|
||||
else if constexpr (_TVT::template _S_is<double, 2>)
|
||||
return vrndpq_f64(__intrin);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else
|
||||
return _Base::_S_ceil(__x);
|
||||
}
|
||||
|
||||
//}}} }}}
|
||||
}; // }}}
|
||||
// _MaskImplNeonMixin {{{
|
||||
struct _MaskImplNeonMixin
|
||||
{
|
||||
using _Base = _MaskImplBuiltinMixin;
|
||||
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
|
||||
_S_to_bits(_SimdWrapper<_Tp, _Np> __x)
|
||||
{
|
||||
if (__builtin_is_constant_evaluated())
|
||||
return _Base::_S_to_bits(__x);
|
||||
|
||||
using _I = __int_for_sizeof_t<_Tp>;
|
||||
if constexpr (sizeof(__x) == 16)
|
||||
{
|
||||
auto __asint = __vector_bitcast<_I>(__x);
|
||||
#ifdef __aarch64__
|
||||
[[maybe_unused]] constexpr auto __zero = decltype(__asint)();
|
||||
#else
|
||||
[[maybe_unused]] constexpr auto __zero = decltype(__lo64(__asint))();
|
||||
#endif
|
||||
if constexpr (sizeof(_Tp) == 1)
|
||||
{
|
||||
constexpr auto __bitsel
|
||||
= __generate_from_n_evaluations<16, __vector_type_t<_I, 16>>(
|
||||
[&](auto __i) {
|
||||
return static_cast<_I>(
|
||||
__i < _Np ? (__i < 8 ? 1 << __i : 1 << (__i - 8)) : 0);
|
||||
});
|
||||
__asint &= __bitsel;
|
||||
#ifdef __aarch64__
|
||||
return __vector_bitcast<_UShort>(
|
||||
vpaddq_s8(vpaddq_s8(vpaddq_s8(__asint, __zero), __zero),
|
||||
__zero))[0];
|
||||
#else
|
||||
return __vector_bitcast<_UShort>(
|
||||
vpadd_s8(vpadd_s8(vpadd_s8(__lo64(__asint), __hi64(__asint)),
|
||||
__zero),
|
||||
__zero))[0];
|
||||
#endif
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 2)
|
||||
{
|
||||
constexpr auto __bitsel
|
||||
= __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
|
||||
[&](auto __i) {
|
||||
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
|
||||
});
|
||||
__asint &= __bitsel;
|
||||
#ifdef __aarch64__
|
||||
return vpaddq_s16(vpaddq_s16(vpaddq_s16(__asint, __zero), __zero),
|
||||
__zero)[0];
|
||||
#else
|
||||
return vpadd_s16(
|
||||
vpadd_s16(vpadd_s16(__lo64(__asint), __hi64(__asint)), __zero),
|
||||
__zero)[0];
|
||||
#endif
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 4)
|
||||
{
|
||||
constexpr auto __bitsel
|
||||
= __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
|
||||
[&](auto __i) {
|
||||
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
|
||||
});
|
||||
__asint &= __bitsel;
|
||||
#ifdef __aarch64__
|
||||
return vpaddq_s32(vpaddq_s32(__asint, __zero), __zero)[0];
|
||||
#else
|
||||
return vpadd_s32(vpadd_s32(__lo64(__asint), __hi64(__asint)),
|
||||
__zero)[0];
|
||||
#endif
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 8)
|
||||
return (__asint[0] & 1) | (__asint[1] & 2);
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else if constexpr (sizeof(__x) == 8)
|
||||
{
|
||||
auto __asint = __vector_bitcast<_I>(__x);
|
||||
[[maybe_unused]] constexpr auto __zero = decltype(__asint)();
|
||||
if constexpr (sizeof(_Tp) == 1)
|
||||
{
|
||||
constexpr auto __bitsel
|
||||
= __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
|
||||
[&](auto __i) {
|
||||
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
|
||||
});
|
||||
__asint &= __bitsel;
|
||||
return vpadd_s8(vpadd_s8(vpadd_s8(__asint, __zero), __zero),
|
||||
__zero)[0];
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 2)
|
||||
{
|
||||
constexpr auto __bitsel
|
||||
= __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
|
||||
[&](auto __i) {
|
||||
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
|
||||
});
|
||||
__asint &= __bitsel;
|
||||
return vpadd_s16(vpadd_s16(__asint, __zero), __zero)[0];
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 4)
|
||||
{
|
||||
__asint &= __make_vector<_I>(0x1, 0x2);
|
||||
return vpadd_s32(__asint, __zero)[0];
|
||||
}
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
else
|
||||
return _Base::_S_to_bits(__x);
|
||||
}
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _MaskImplNeon {{{
|
||||
template <typename _Abi>
|
||||
struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi>
|
||||
{
|
||||
using _MaskImplBuiltinMixin::_S_to_maskvector;
|
||||
using _MaskImplNeonMixin::_S_to_bits;
|
||||
using _Base = _MaskImplBuiltin<_Abi>;
|
||||
using _Base::_S_convert;
|
||||
|
||||
// _S_all_of {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static bool _S_all_of(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
const auto __kk
|
||||
= __vector_bitcast<char>(__k._M_data)
|
||||
| ~__vector_bitcast<char>(_Abi::template _S_implicit_mask<_Tp>());
|
||||
if constexpr (sizeof(__k) == 16)
|
||||
{
|
||||
const auto __x = __vector_bitcast<long long>(__kk);
|
||||
return __x[0] + __x[1] == -2;
|
||||
}
|
||||
else if constexpr (sizeof(__k) <= 8)
|
||||
return __bit_cast<__int_for_sizeof_t<decltype(__kk)>>(__kk) == -1;
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_any_of {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static bool _S_any_of(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
const auto __kk
|
||||
= __vector_bitcast<char>(__k._M_data)
|
||||
| ~__vector_bitcast<char>(_Abi::template _S_implicit_mask<_Tp>());
|
||||
if constexpr (sizeof(__k) == 16)
|
||||
{
|
||||
const auto __x = __vector_bitcast<long long>(__kk);
|
||||
return (__x[0] | __x[1]) != 0;
|
||||
}
|
||||
else if constexpr (sizeof(__k) <= 8)
|
||||
return __bit_cast<__int_for_sizeof_t<decltype(__kk)>>(__kk) != 0;
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_none_of {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static bool _S_none_of(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
const auto __kk = _Abi::_S_masked(__k._M_data);
|
||||
if constexpr (sizeof(__k) == 16)
|
||||
{
|
||||
const auto __x = __vector_bitcast<long long>(__kk);
|
||||
return (__x[0] | __x[1]) == 0;
|
||||
}
|
||||
else if constexpr (sizeof(__k) <= 8)
|
||||
return __bit_cast<__int_for_sizeof_t<decltype(__kk)>>(__kk) == 0;
|
||||
else
|
||||
__assert_unreachable<_Tp>();
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_some_of {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static bool _S_some_of(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
if constexpr (sizeof(__k) <= 8)
|
||||
{
|
||||
const auto __kk = __vector_bitcast<char>(__k._M_data)
|
||||
| ~__vector_bitcast<char>(
|
||||
_Abi::template _S_implicit_mask<_Tp>());
|
||||
using _Up = make_unsigned_t<__int_for_sizeof_t<decltype(__kk)>>;
|
||||
return __bit_cast<_Up>(__kk) + 1 > 1;
|
||||
}
|
||||
else
|
||||
return _Base::_S_some_of(__k);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_popcount {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
if constexpr (sizeof(_Tp) == 1)
|
||||
{
|
||||
const auto __s8 = __vector_bitcast<_SChar>(__k._M_data);
|
||||
int8x8_t __tmp = __lo64(__s8) + __hi64z(__s8);
|
||||
return -vpadd_s8(vpadd_s8(vpadd_s8(__tmp, int8x8_t()), int8x8_t()),
|
||||
int8x8_t())[0];
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 2)
|
||||
{
|
||||
const auto __s16 = __vector_bitcast<short>(__k._M_data);
|
||||
int16x4_t __tmp = __lo64(__s16) + __hi64z(__s16);
|
||||
return -vpadd_s16(vpadd_s16(__tmp, int16x4_t()), int16x4_t())[0];
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 4)
|
||||
{
|
||||
const auto __s32 = __vector_bitcast<int>(__k._M_data);
|
||||
int32x2_t __tmp = __lo64(__s32) + __hi64z(__s32);
|
||||
return -vpadd_s32(__tmp, int32x2_t())[0];
|
||||
}
|
||||
else if constexpr (sizeof(_Tp) == 8)
|
||||
{
|
||||
static_assert(sizeof(__k) == 16);
|
||||
const auto __s64 = __vector_bitcast<long>(__k._M_data);
|
||||
return -(__s64[0] + __s64[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_find_first_set {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static int
|
||||
_S_find_first_set(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
// TODO: the _Base implementation is not optimal for NEON
|
||||
return _Base::_S_find_first_set(__k);
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_find_last_set {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static int
|
||||
_S_find_last_set(simd_mask<_Tp, _Abi> __k)
|
||||
{
|
||||
// TODO: the _Base implementation is not optimal for NEON
|
||||
return _Base::_S_find_last_set(__k);
|
||||
}
|
||||
|
||||
// }}}
|
||||
}; // }}}
|
||||
|
||||
_GLIBCXX_SIMD_END_NAMESPACE
|
||||
#endif // __cplusplus >= 201703L
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
|
||||
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
|
123
libstdc++-v3/include/experimental/bits/simd_ppc.h
Normal file
123
libstdc++-v3/include/experimental/bits/simd_ppc.h
Normal file
@ -0,0 +1,123 @@
|
||||
// Simd PowerPC specific implementations -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
#ifndef __ALTIVEC__
|
||||
#error "simd_ppc.h may only be included when AltiVec/VMX is available"
|
||||
#endif
|
||||
|
||||
_GLIBCXX_SIMD_BEGIN_NAMESPACE
|
||||
|
||||
// _SimdImplPpc {{{
|
||||
template <typename _Abi>
|
||||
struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
|
||||
{
|
||||
using _Base = _SimdImplBuiltin<_Abi>;
|
||||
|
||||
// Byte and halfword shift instructions on PPC only consider the low 3 or 4
|
||||
// bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more)
|
||||
// is UB without extra measures. To match scalar behavior, byte and halfword
|
||||
// shifts need an extra fixup step.
|
||||
|
||||
// _S_bit_shift_left {{{
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
||||
_S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
|
||||
{
|
||||
__x = _Base::_S_bit_shift_left(__x, __y);
|
||||
if constexpr (sizeof(_Tp) < sizeof(int))
|
||||
__x._M_data
|
||||
= (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data;
|
||||
return __x;
|
||||
}
|
||||
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
||||
_S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
|
||||
{
|
||||
__x = _Base::_S_bit_shift_left(__x, __y);
|
||||
if constexpr (sizeof(_Tp) < sizeof(int))
|
||||
{
|
||||
if (__y >= sizeof(_Tp) * __CHAR_BIT__)
|
||||
return {};
|
||||
}
|
||||
return __x;
|
||||
}
|
||||
|
||||
// }}}
|
||||
// _S_bit_shift_right {{{
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
||||
_S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
|
||||
{
|
||||
if constexpr (sizeof(_Tp) < sizeof(int))
|
||||
{
|
||||
constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
|
||||
if constexpr (is_unsigned_v<_Tp>)
|
||||
return (__y._M_data < __nbits)
|
||||
& _Base::_S_bit_shift_right(__x, __y)._M_data;
|
||||
else
|
||||
{
|
||||
_Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data
|
||||
>= __nbits),
|
||||
__y, __nbits - 1);
|
||||
return _Base::_S_bit_shift_right(__x, __y);
|
||||
}
|
||||
}
|
||||
else
|
||||
return _Base::_S_bit_shift_right(__x, __y);
|
||||
}
|
||||
|
||||
template <typename _Tp, size_t _Np>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
||||
_S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
|
||||
{
|
||||
if constexpr (sizeof(_Tp) < sizeof(int))
|
||||
{
|
||||
constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
|
||||
if (__y >= __nbits)
|
||||
{
|
||||
if constexpr (is_unsigned_v<_Tp>)
|
||||
return {};
|
||||
else
|
||||
return _Base::_S_bit_shift_right(__x, __nbits - 1);
|
||||
}
|
||||
}
|
||||
return _Base::_S_bit_shift_right(__x, __y);
|
||||
}
|
||||
|
||||
// }}}
|
||||
};
|
||||
|
||||
// }}}
|
||||
|
||||
_GLIBCXX_SIMD_END_NAMESPACE
|
||||
#endif // __cplusplus >= 201703L
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
||||
|
||||
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
|
772
libstdc++-v3/include/experimental/bits/simd_scalar.h
Normal file
772
libstdc++-v3/include/experimental/bits/simd_scalar.h
Normal file
@ -0,0 +1,772 @@
|
||||
// Simd scalar ABI specific implementations -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_SCALAR_H_
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD_SCALAR_H_
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
#include <cmath>
|
||||
|
||||
_GLIBCXX_SIMD_BEGIN_NAMESPACE
|
||||
|
||||
// __promote_preserving_unsigned{{{
|
||||
// work around crazy semantics of unsigned integers of lower rank than int:
|
||||
// Before applying an operator the operands are promoted to int. In which case
|
||||
// over- or underflow is UB, even though the operand types were unsigned.
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto)
|
||||
__promote_preserving_unsigned(const _Tp& __x)
|
||||
{
|
||||
if constexpr (is_signed_v<decltype(+__x)> && is_unsigned_v<_Tp>)
|
||||
return static_cast<unsigned int>(__x);
|
||||
else
|
||||
return __x;
|
||||
}
|
||||
|
||||
// }}}
|
||||
|
||||
struct _CommonImplScalar;
|
||||
struct _CommonImplBuiltin;
|
||||
struct _SimdImplScalar;
|
||||
struct _MaskImplScalar;
|
||||
|
||||
// simd_abi::_Scalar {{{
|
||||
struct simd_abi::_Scalar
|
||||
{
|
||||
template <typename _Tp>
|
||||
static constexpr size_t _S_size = 1;
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr size_t _S_full_size = 1;
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr bool _S_is_partial = false;
|
||||
|
||||
struct _IsValidAbiTag : true_type {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct _IsValidSizeFor : true_type {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct _IsValid : __is_vectorizable<_Tp> {};
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
|
||||
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_masked(bool __x)
|
||||
{ return __x; }
|
||||
|
||||
using _CommonImpl = _CommonImplScalar;
|
||||
using _SimdImpl = _SimdImplScalar;
|
||||
using _MaskImpl = _MaskImplScalar;
|
||||
|
||||
template <typename _Tp, bool = _S_is_valid_v<_Tp>>
|
||||
struct __traits : _InvalidTraits {};
|
||||
|
||||
template <typename _Tp>
|
||||
struct __traits<_Tp, true>
|
||||
{
|
||||
using _IsValid = true_type;
|
||||
using _SimdImpl = _SimdImplScalar;
|
||||
using _MaskImpl = _MaskImplScalar;
|
||||
using _SimdMember = _Tp;
|
||||
using _MaskMember = bool;
|
||||
|
||||
static constexpr size_t _S_simd_align = alignof(_SimdMember);
|
||||
static constexpr size_t _S_mask_align = alignof(_MaskMember);
|
||||
|
||||
// nothing the user can spell converts to/from simd/simd_mask
|
||||
struct _SimdCastType { _SimdCastType() = delete; };
|
||||
struct _MaskCastType { _MaskCastType() = delete; };
|
||||
struct _SimdBase {};
|
||||
struct _MaskBase {};
|
||||
};
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _CommonImplScalar {{{
|
||||
struct _CommonImplScalar
|
||||
{
|
||||
// _S_store {{{
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static void _S_store(_Tp __x, void* __addr)
|
||||
{ __builtin_memcpy(__addr, &__x, sizeof(_Tp)); }
|
||||
|
||||
// }}}
|
||||
// _S_store_bool_array(_BitMask) {{{
|
||||
template <size_t _Np, bool _Sanitized>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr void
|
||||
_S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
|
||||
{
|
||||
__make_dependent_t<decltype(__x), _CommonImplBuiltin>::_S_store_bool_array(
|
||||
__x, __mem);
|
||||
}
|
||||
|
||||
// }}}
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _SimdImplScalar {{{
|
||||
struct _SimdImplScalar
|
||||
{
|
||||
// member types {{{2
|
||||
using abi_type = simd_abi::scalar;
|
||||
|
||||
template <typename _Tp>
|
||||
using _TypeTag = _Tp*;
|
||||
|
||||
// _S_broadcast {{{2
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_broadcast(_Tp __x) noexcept
|
||||
{ return __x; }
|
||||
|
||||
// _S_generator {{{2
|
||||
template <typename _Fp, typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_generator(_Fp&& __gen,
|
||||
_TypeTag<_Tp>)
|
||||
{ return __gen(_SizeConstant<0>()); }
|
||||
|
||||
// _S_load {{{2
|
||||
template <typename _Tp, typename _Up>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_load(const _Up* __mem,
|
||||
_TypeTag<_Tp>) noexcept
|
||||
{ return static_cast<_Tp>(__mem[0]); }
|
||||
|
||||
// _S_masked_load {{{2
|
||||
template <typename _Tp, typename _Up>
|
||||
static inline _Tp _S_masked_load(_Tp __merge, bool __k,
|
||||
const _Up* __mem) noexcept
|
||||
{
|
||||
if (__k)
|
||||
__merge = static_cast<_Tp>(__mem[0]);
|
||||
return __merge;
|
||||
}
|
||||
|
||||
// _S_store {{{2
|
||||
template <typename _Tp, typename _Up>
|
||||
static inline void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept
|
||||
{ __mem[0] = static_cast<_Up>(__v); }
|
||||
|
||||
// _S_masked_store {{{2
|
||||
template <typename _Tp, typename _Up>
|
||||
static inline void _S_masked_store(const _Tp __v, _Up* __mem,
|
||||
const bool __k) noexcept
|
||||
{ if (__k) __mem[0] = __v; }
|
||||
|
||||
// _S_negate {{{2
|
||||
template <typename _Tp>
|
||||
static constexpr inline bool _S_negate(_Tp __x) noexcept
|
||||
{ return !__x; }
|
||||
|
||||
// _S_reduce {{{2
|
||||
template <typename _Tp, typename _BinaryOperation>
|
||||
static constexpr inline _Tp
|
||||
_S_reduce(const simd<_Tp, simd_abi::scalar>& __x, _BinaryOperation&)
|
||||
{ return __x._M_data; }
|
||||
|
||||
// _S_min, _S_max {{{2
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_min(const _Tp __a, const _Tp __b)
|
||||
{ return std::min(__a, __b); }
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_max(const _Tp __a, const _Tp __b)
|
||||
{ return std::max(__a, __b); }
|
||||
|
||||
// _S_complement {{{2
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_complement(_Tp __x) noexcept
|
||||
{ return static_cast<_Tp>(~__x); }
|
||||
|
||||
// _S_unary_minus {{{2
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_unary_minus(_Tp __x) noexcept
|
||||
{ return static_cast<_Tp>(-__x); }
|
||||
|
||||
// arithmetic operators {{{2
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_plus(_Tp __x, _Tp __y)
|
||||
{
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
+ __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_minus(_Tp __x, _Tp __y)
|
||||
{
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
- __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_multiplies(_Tp __x, _Tp __y)
|
||||
{
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
* __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_divides(_Tp __x, _Tp __y)
|
||||
{
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
/ __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_modulus(_Tp __x, _Tp __y)
|
||||
{
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
% __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_bit_and(_Tp __x, _Tp __y)
|
||||
{
|
||||
if constexpr (is_floating_point_v<_Tp>)
|
||||
{
|
||||
using _Ip = __int_for_sizeof_t<_Tp>;
|
||||
return __bit_cast<_Tp>(__bit_cast<_Ip>(__x) & __bit_cast<_Ip>(__y));
|
||||
}
|
||||
else
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
& __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_bit_or(_Tp __x, _Tp __y)
|
||||
{
|
||||
if constexpr (is_floating_point_v<_Tp>)
|
||||
{
|
||||
using _Ip = __int_for_sizeof_t<_Tp>;
|
||||
return __bit_cast<_Tp>(__bit_cast<_Ip>(__x) | __bit_cast<_Ip>(__y));
|
||||
}
|
||||
else
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
| __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_bit_xor(_Tp __x, _Tp __y)
|
||||
{
|
||||
if constexpr (is_floating_point_v<_Tp>)
|
||||
{
|
||||
using _Ip = __int_for_sizeof_t<_Tp>;
|
||||
return __bit_cast<_Tp>(__bit_cast<_Ip>(__x) ^ __bit_cast<_Ip>(__y));
|
||||
}
|
||||
else
|
||||
return static_cast<_Tp>(__promote_preserving_unsigned(__x)
|
||||
^ __promote_preserving_unsigned(__y));
|
||||
}
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_bit_shift_left(_Tp __x, int __y)
|
||||
{ return static_cast<_Tp>(__promote_preserving_unsigned(__x) << __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
static constexpr inline _Tp _S_bit_shift_right(_Tp __x, int __y)
|
||||
{ return static_cast<_Tp>(__promote_preserving_unsigned(__x) >> __y); }
|
||||
|
||||
// math {{{2
|
||||
// frexp, modf and copysign implemented in simd_math.h
|
||||
template <typename _Tp>
|
||||
using _ST = _SimdTuple<_Tp, simd_abi::scalar>;
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_acos(_Tp __x)
|
||||
{ return std::acos(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_asin(_Tp __x)
|
||||
{ return std::asin(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_atan(_Tp __x)
|
||||
{ return std::atan(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_cos(_Tp __x)
|
||||
{ return std::cos(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_sin(_Tp __x)
|
||||
{ return std::sin(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_tan(_Tp __x)
|
||||
{ return std::tan(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_acosh(_Tp __x)
|
||||
{ return std::acosh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_asinh(_Tp __x)
|
||||
{ return std::asinh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_atanh(_Tp __x)
|
||||
{ return std::atanh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_cosh(_Tp __x)
|
||||
{ return std::cosh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_sinh(_Tp __x)
|
||||
{ return std::sinh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_tanh(_Tp __x)
|
||||
{ return std::tanh(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_atan2(_Tp __x, _Tp __y)
|
||||
{ return std::atan2(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_exp(_Tp __x)
|
||||
{ return std::exp(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_exp2(_Tp __x)
|
||||
{ return std::exp2(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_expm1(_Tp __x)
|
||||
{ return std::expm1(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_log(_Tp __x)
|
||||
{ return std::log(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_log10(_Tp __x)
|
||||
{ return std::log10(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_log1p(_Tp __x)
|
||||
{ return std::log1p(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_log2(_Tp __x)
|
||||
{ return std::log2(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_logb(_Tp __x)
|
||||
{ return std::logb(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _ST<int> _S_ilogb(_Tp __x)
|
||||
{ return {std::ilogb(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_pow(_Tp __x, _Tp __y)
|
||||
{ return std::pow(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_abs(_Tp __x)
|
||||
{ return std::abs(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fabs(_Tp __x)
|
||||
{ return std::fabs(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_sqrt(_Tp __x)
|
||||
{ return std::sqrt(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_cbrt(_Tp __x)
|
||||
{ return std::cbrt(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_erf(_Tp __x)
|
||||
{ return std::erf(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_erfc(_Tp __x)
|
||||
{ return std::erfc(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_lgamma(_Tp __x)
|
||||
{ return std::lgamma(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_tgamma(_Tp __x)
|
||||
{ return std::tgamma(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_trunc(_Tp __x)
|
||||
{ return std::trunc(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_floor(_Tp __x)
|
||||
{ return std::floor(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_ceil(_Tp __x)
|
||||
{ return std::ceil(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x)
|
||||
{ return std::nearbyint(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x)
|
||||
{ return std::rint(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _ST<long> _S_lrint(_Tp __x)
|
||||
{ return {std::lrint(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _ST<long long> _S_llrint(_Tp __x)
|
||||
{ return {std::llrint(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_round(_Tp __x)
|
||||
{ return std::round(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _ST<long> _S_lround(_Tp __x)
|
||||
{ return {std::lround(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _ST<long long> _S_llround(_Tp __x)
|
||||
{ return {std::llround(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_ldexp(_Tp __x, _ST<int> __y)
|
||||
{ return std::ldexp(__x, __y.first); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_scalbn(_Tp __x, _ST<int> __y)
|
||||
{ return std::scalbn(__x, __y.first); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_scalbln(_Tp __x, _ST<long> __y)
|
||||
{ return std::scalbln(__x, __y.first); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fmod(_Tp __x, _Tp __y)
|
||||
{ return std::fmod(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_remainder(_Tp __x, _Tp __y)
|
||||
{ return std::remainder(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_nextafter(_Tp __x, _Tp __y)
|
||||
{ return std::nextafter(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fdim(_Tp __x, _Tp __y)
|
||||
{ return std::fdim(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fmax(_Tp __x, _Tp __y)
|
||||
{ return std::fmax(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fmin(_Tp __x, _Tp __y)
|
||||
{ return std::fmin(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_fma(_Tp __x, _Tp __y, _Tp __z)
|
||||
{ return std::fma(__x, __y, __z); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_remquo(_Tp __x, _Tp __y, _ST<int>* __z)
|
||||
{ return std::remquo(__x, __y, &__z->first); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static _ST<int> _S_fpclassify(_Tp __x)
|
||||
{ return {std::fpclassify(__x)}; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isfinite(_Tp __x)
|
||||
{ return std::isfinite(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isinf(_Tp __x)
|
||||
{ return std::isinf(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isnan(_Tp __x)
|
||||
{ return std::isnan(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isnormal(_Tp __x)
|
||||
{ return std::isnormal(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_signbit(_Tp __x)
|
||||
{ return std::signbit(__x); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isgreater(_Tp __x, _Tp __y)
|
||||
{ return std::isgreater(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isgreaterequal(_Tp __x,
|
||||
_Tp __y)
|
||||
{ return std::isgreaterequal(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isless(_Tp __x, _Tp __y)
|
||||
{ return std::isless(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_islessequal(_Tp __x, _Tp __y)
|
||||
{ return std::islessequal(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_islessgreater(_Tp __x,
|
||||
_Tp __y)
|
||||
{ return std::islessgreater(__x, __y); }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_isunordered(_Tp __x,
|
||||
_Tp __y)
|
||||
{ return std::isunordered(__x, __y); }
|
||||
|
||||
// _S_increment & _S_decrement{{{2
|
||||
template <typename _Tp>
|
||||
constexpr static inline void _S_increment(_Tp& __x)
|
||||
{ ++__x; }
|
||||
|
||||
template <typename _Tp>
|
||||
constexpr static inline void _S_decrement(_Tp& __x)
|
||||
{ --__x; }
|
||||
|
||||
|
||||
// compares {{{2
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_equal_to(_Tp __x, _Tp __y)
|
||||
{ return __x == __y; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_not_equal_to(_Tp __x,
|
||||
_Tp __y)
|
||||
{ return __x != __y; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_less(_Tp __x, _Tp __y)
|
||||
{ return __x < __y; }
|
||||
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool _S_less_equal(_Tp __x,
|
||||
_Tp __y)
|
||||
{ return __x <= __y; }
|
||||
|
||||
// smart_reference access {{{2
|
||||
template <typename _Tp, typename _Up>
|
||||
constexpr static void _S_set(_Tp& __v, [[maybe_unused]] int __i,
|
||||
_Up&& __x) noexcept
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(__i == 0);
|
||||
__v = static_cast<_Up&&>(__x);
|
||||
}
|
||||
|
||||
// _S_masked_assign {{{2
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static void
|
||||
_S_masked_assign(bool __k, _Tp& __lhs, _Tp __rhs)
|
||||
{ if (__k) __lhs = __rhs; }
|
||||
|
||||
// _S_masked_cassign {{{2
|
||||
template <typename _Op, typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static void
|
||||
_S_masked_cassign(const bool __k, _Tp& __lhs, const _Tp __rhs, _Op __op)
|
||||
{ if (__k) __lhs = __op(_SimdImplScalar{}, __lhs, __rhs); }
|
||||
|
||||
// _S_masked_unary {{{2
|
||||
template <template <typename> class _Op, typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static _Tp _S_masked_unary(const bool __k,
|
||||
const _Tp __v)
|
||||
{ return static_cast<_Tp>(__k ? _Op<_Tp>{}(__v) : __v); }
|
||||
|
||||
// }}}2
|
||||
};
|
||||
|
||||
// }}}
|
||||
// _MaskImplScalar {{{
|
||||
struct _MaskImplScalar
|
||||
{
|
||||
// member types {{{
|
||||
template <typename _Tp>
|
||||
using _TypeTag = _Tp*;
|
||||
|
||||
// }}}
|
||||
// _S_broadcast {{{
|
||||
template <typename>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_broadcast(bool __x)
|
||||
{ return __x; }
|
||||
|
||||
// }}}
|
||||
// _S_load {{{
|
||||
template <typename>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_load(const bool* __mem)
|
||||
{ return __mem[0]; }
|
||||
|
||||
// }}}
|
||||
// _S_to_bits {{{
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<1>
|
||||
_S_to_bits(bool __x)
|
||||
{ return __x; }
|
||||
|
||||
// }}}
|
||||
// _S_convert {{{
|
||||
template <typename, bool _Sanitized>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr bool
|
||||
_S_convert(_BitMask<1, _Sanitized> __x)
|
||||
{ return __x[0]; }
|
||||
|
||||
template <typename, typename _Up, typename _UAbi>
|
||||
_GLIBCXX_SIMD_INTRINSIC static constexpr bool
|
||||
_S_convert(simd_mask<_Up, _UAbi> __x)
|
||||
{ return __x[0]; }
|
||||
|
||||
// }}}
|
||||
// _S_from_bitmask {{{2
|
||||
template <typename _Tp>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_from_bitmask(_SanitizedBitMask<1> __bits, _TypeTag<_Tp>) noexcept
|
||||
{ return __bits[0]; }
|
||||
|
||||
// _S_masked_load {{{2
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_masked_load(bool __merge, bool __mask, const bool* __mem) noexcept
|
||||
{
|
||||
if (__mask)
|
||||
__merge = __mem[0];
|
||||
return __merge;
|
||||
}
|
||||
|
||||
// _S_store {{{2
|
||||
_GLIBCXX_SIMD_INTRINSIC static void _S_store(bool __v, bool* __mem) noexcept
|
||||
{ __mem[0] = __v; }
|
||||
|
||||
// _S_masked_store {{{2
|
||||
_GLIBCXX_SIMD_INTRINSIC static void
|
||||
_S_masked_store(const bool __v, bool* __mem, const bool __k) noexcept
|
||||
{
|
||||
if (__k)
|
||||
__mem[0] = __v;
|
||||
}
|
||||
|
||||
// logical and bitwise operators {{{2
|
||||
static constexpr bool _S_logical_and(bool __x, bool __y)
|
||||
{ return __x && __y; }
|
||||
|
||||
static constexpr bool _S_logical_or(bool __x, bool __y)
|
||||
{ return __x || __y; }
|
||||
|
||||
static constexpr bool _S_bit_not(bool __x)
|
||||
{ return !__x; }
|
||||
|
||||
static constexpr bool _S_bit_and(bool __x, bool __y)
|
||||
{ return __x && __y; }
|
||||
|
||||
static constexpr bool _S_bit_or(bool __x, bool __y)
|
||||
{ return __x || __y; }
|
||||
|
||||
static constexpr bool _S_bit_xor(bool __x, bool __y)
|
||||
{ return __x != __y; }
|
||||
|
||||
// smart_reference access {{{2
|
||||
constexpr static void _S_set(bool& __k, [[maybe_unused]] int __i,
|
||||
bool __x) noexcept
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(__i == 0);
|
||||
__k = __x;
|
||||
}
|
||||
|
||||
// _S_masked_assign {{{2
|
||||
_GLIBCXX_SIMD_INTRINSIC static void _S_masked_assign(bool __k, bool& __lhs,
|
||||
bool __rhs)
|
||||
{
|
||||
if (__k)
|
||||
__lhs = __rhs;
|
||||
}
|
||||
|
||||
// }}}2
|
||||
// _S_all_of {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_all_of(simd_mask<_Tp, _Abi> __k)
|
||||
{ return __k._M_data; }
|
||||
|
||||
// }}}
|
||||
// _S_any_of {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_any_of(simd_mask<_Tp, _Abi> __k)
|
||||
{ return __k._M_data; }
|
||||
|
||||
// }}}
|
||||
// _S_none_of {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_none_of(simd_mask<_Tp, _Abi> __k)
|
||||
{ return !__k._M_data; }
|
||||
|
||||
// }}}
|
||||
// _S_some_of {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static bool
|
||||
_S_some_of(simd_mask<_Tp, _Abi>)
|
||||
{ return false; }
|
||||
|
||||
// }}}
|
||||
// _S_popcount {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static int
|
||||
_S_popcount(simd_mask<_Tp, _Abi> __k)
|
||||
{ return __k._M_data; }
|
||||
|
||||
// }}}
|
||||
// _S_find_first_set {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static int
|
||||
_S_find_first_set(simd_mask<_Tp, _Abi>)
|
||||
{ return 0; }
|
||||
|
||||
// }}}
|
||||
// _S_find_last_set {{{
|
||||
template <typename _Tp, typename _Abi>
|
||||
_GLIBCXX_SIMD_INTRINSIC constexpr static int
|
||||
_S_find_last_set(simd_mask<_Tp, _Abi>)
|
||||
{ return 0; }
|
||||
|
||||
// }}}
|
||||
};
|
||||
|
||||
// }}}
|
||||
|
||||
_GLIBCXX_SIMD_END_NAMESPACE
|
||||
#endif // __cplusplus >= 201703L
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_SCALAR_H_
|
||||
|
||||
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
|
5169
libstdc++-v3/include/experimental/bits/simd_x86.h
Normal file
5169
libstdc++-v3/include/experimental/bits/simd_x86.h
Normal file
File diff suppressed because it is too large
Load Diff
2029
libstdc++-v3/include/experimental/bits/simd_x86_conversions.h
Normal file
2029
libstdc++-v3/include/experimental/bits/simd_x86_conversions.h
Normal file
File diff suppressed because it is too large
Load Diff
70
libstdc++-v3/include/experimental/simd
Normal file
70
libstdc++-v3/include/experimental/simd
Normal file
@ -0,0 +1,70 @@
|
||||
// Components for element-wise operations on data-parallel objects -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/** @file experimental/simd
|
||||
* This is a TS C++ Library header.
|
||||
*/
|
||||
|
||||
//
|
||||
// N4773 §9 data-parallel types library
|
||||
//
|
||||
|
||||
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD
|
||||
#define _GLIBCXX_EXPERIMENTAL_SIMD
|
||||
|
||||
#define __cpp_lib_experimental_parallel_simd 201803
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
// Many [[gnu::vector_size(N)]] types might lead to a -Wpsabi warning which is
|
||||
// irrelevant as those functions never appear on ABI borders
|
||||
#ifndef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wpsabi"
|
||||
#endif
|
||||
|
||||
// If __OPTIMIZE__ is not defined some intrinsics are defined as macros, making
|
||||
// use of C casts internally. This requires us to disable the warning as it
|
||||
// would otherwise yield many false positives.
|
||||
#ifndef __OPTIMIZE__
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
|
||||
#include "bits/simd_detail.h"
|
||||
#include "bits/simd.h"
|
||||
#include "bits/simd_fixed_size.h"
|
||||
#include "bits/simd_scalar.h"
|
||||
#include "bits/simd_builtin.h"
|
||||
#include "bits/simd_converter.h"
|
||||
#if _GLIBCXX_SIMD_X86INTRIN
|
||||
#include "bits/simd_x86.h"
|
||||
#elif _GLIBCXX_SIMD_HAVE_NEON
|
||||
#include "bits/simd_neon.h"
|
||||
#elif __ALTIVEC__
|
||||
#include "bits/simd_ppc.h"
|
||||
#endif
|
||||
#include "bits/simd_math.h"
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif // _GLIBCXX_EXPERIMENTAL_SIMD
|
||||
// vim: ft=cpp
|
@ -0,0 +1,64 @@
|
||||
// { dg-options "-std=c++17 -fno-fast-math" }
|
||||
// { dg-do compile { target c++17 } }
|
||||
|
||||
// Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include <experimental/simd>
|
||||
|
||||
template <typename V>
|
||||
void
|
||||
is_usable()
|
||||
{
|
||||
static_assert(std::is_default_constructible_v<V>);
|
||||
static_assert(std::is_destructible_v <V>);
|
||||
static_assert(std::is_default_constructible_v<typename V::mask_type>);
|
||||
static_assert(std::is_destructible_v <typename V::mask_type>);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
test01()
|
||||
{
|
||||
namespace stdx = std::experimental;
|
||||
is_usable<stdx::simd<T>>();
|
||||
is_usable<stdx::native_simd<T>>();
|
||||
is_usable<stdx::fixed_size_simd<T, 3>>();
|
||||
is_usable<stdx::fixed_size_simd<T, stdx::simd_abi::max_fixed_size<T>>>();
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test01<char>();
|
||||
test01<wchar_t>();
|
||||
test01<char16_t>();
|
||||
test01<char32_t>();
|
||||
|
||||
test01<signed char>();
|
||||
test01<unsigned char>();
|
||||
test01<short>();
|
||||
test01<unsigned short>();
|
||||
test01<int>();
|
||||
test01<unsigned int>();
|
||||
test01<long>();
|
||||
test01<unsigned long>();
|
||||
test01<long long>();
|
||||
test01<unsigned long long>();
|
||||
test01<float>();
|
||||
test01<double>();
|
||||
test01<long double>();
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
// { dg-options "-std=c++17 -ffast-math" }
|
||||
// { dg-do compile }
|
||||
|
||||
#include "standard_abi_usable.cc"
|
@ -89,12 +89,14 @@ if {[info exists tests_file] && [file exists $tests_file]} {
|
||||
# 3. wchar_t tests, if not supported.
|
||||
# 4. thread tests, if not supported.
|
||||
# 5. *_filebuf, if file I/O is not supported.
|
||||
# 6. simd tests.
|
||||
if { [string first _xin $t] == -1
|
||||
&& [string first performance $t] == -1
|
||||
&& (${v3-wchar_t} || [string first wchar_t $t] == -1)
|
||||
&& (${v3-threads} || [string first thread $t] == -1)
|
||||
&& ([string first "_filebuf" $t] == -1
|
||||
|| [check_v3_target_fileio]) } {
|
||||
|| [check_v3_target_fileio])
|
||||
&& [string first "/experimental/simd/" $t] == -1 } {
|
||||
lappend tests $t
|
||||
}
|
||||
}
|
||||
@ -107,5 +109,19 @@ global DEFAULT_CXXFLAGS
|
||||
global PCH_CXXFLAGS
|
||||
dg-runtest $tests "" "$DEFAULT_CXXFLAGS $PCH_CXXFLAGS"
|
||||
|
||||
# Finally run simd tests with extra SIMD-relevant flags
|
||||
global DEFAULT_VECTCFLAGS
|
||||
global EFFECTIVE_TARGETS
|
||||
set DEFAULT_VECTCFLAGS ""
|
||||
set EFFECTIVE_TARGETS ""
|
||||
|
||||
if [check_vect_support_and_set_flags] {
|
||||
lappend DEFAULT_VECTCFLAGS "-O2"
|
||||
lappend DEFAULT_VECTCFLAGS "-Wno-psabi"
|
||||
et-dg-runtest dg-runtest [lsort \
|
||||
[glob -nocomplain $srcdir/experimental/simd/*.cc]] \
|
||||
"$DEFAULT_VECTCFLAGS" "$DEFAULT_CXXFLAGS $PCH_CXXFLAGS"
|
||||
}
|
||||
|
||||
# All done.
|
||||
dg-finish
|
||||
|
Loading…
Reference in New Issue
Block a user