matmul.m4 (matmul_'rtype_code`): Avoid race condition on storing function pointer.

2017-03-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
	    Jakub Jelinek  <jakub@redhat.com>

	* m4/matmul.m4 (matmul_'rtype_code`):  Avoid
	race condition on storing function pointer.
        * generated/matmul_c10.c: Regenerated.
        * generated/matmul_c16.c: Regenerated.
        * generated/matmul_c4.c: Regenerated.
        * generated/matmul_c8.c: Regenerated.
        * generated/matmul_i1.c: Regenerated.
        * generated/matmul_i16.c: Regenerated.
        * generated/matmul_i2.c: Regenerated.
        * generated/matmul_i4.c: Regenerated.
        * generated/matmul_i8.c: Regenerated.
        * generated/matmul_r10.c: Regenerated.
        * generated/matmul_r16.c: Regenerated.
        * generated/matmul_r4.c: Regenerated.
        * generated/matmul_r8.c: Regenerated.


Co-Authored-By: Jakub Jelinek <jakub@redhat.com>

From-SVN: r245839
This commit is contained in:
Thomas Koenig 2017-03-02 12:54:27 +00:00
parent eb0e7c34d7
commit f03e92172a
15 changed files with 257 additions and 154 deletions

View File

@ -1,3 +1,22 @@
2017-03-02 Thomas Koenig <tkoenig@gcc.gnu.org>
Jakub Jelinek <jakub@redhat.com>
* m4/matmul.m4 (matmul_'rtype_code`): Avoid
race condition on storing function pointer.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
2017-03-02 Thomas Koenig <tkoenig@gcc.gnu.org> 2017-03-02 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/78379 PR fortran/78379

View File

@ -2256,19 +2256,24 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_c10 * const restrict retarray, static void (*matmul_p) (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_c10_vanilla; matmul_fn = matmul_c10_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_c10_avx512f; matmul_fn = matmul_c10_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_c10_avx2; matmul_fn = matmul_c10_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_c10_avx; matmul_fn = matmul_c10_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_c16 * const restrict retarray, static void (*matmul_p) (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_c16_vanilla; matmul_fn = matmul_c16_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_c16_avx512f; matmul_fn = matmul_c16_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_c16_avx2; matmul_fn = matmul_c16_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_c16_avx; matmul_fn = matmul_c16_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_c4 * const restrict retarray, static void (*matmul_p) (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_c4_vanilla; matmul_fn = matmul_c4_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_c4_avx512f; matmul_fn = matmul_c4_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_c4_avx2; matmul_fn = matmul_c4_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_c4_avx; matmul_fn = matmul_c4_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_c8 * const restrict retarray, static void (*matmul_p) (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_c8_vanilla; matmul_fn = matmul_c8_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_c8_avx512f; matmul_fn = matmul_c8_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_c8_avx2; matmul_fn = matmul_c8_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_c8_avx; matmul_fn = matmul_c8_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_i1 * const restrict retarray, static void (*matmul_p) (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_i1_vanilla; matmul_fn = matmul_i1_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_i1_avx512f; matmul_fn = matmul_i1_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_i1_avx2; matmul_fn = matmul_i1_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_i1_avx; matmul_fn = matmul_i1_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_i16 * const restrict retarray, static void (*matmul_p) (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_i16_vanilla; matmul_fn = matmul_i16_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_i16_avx512f; matmul_fn = matmul_i16_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_i16_avx2; matmul_fn = matmul_i16_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_i16_avx; matmul_fn = matmul_i16_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_i2 * const restrict retarray, static void (*matmul_p) (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_i2_vanilla; matmul_fn = matmul_i2_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_i2_avx512f; matmul_fn = matmul_i2_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_i2_avx2; matmul_fn = matmul_i2_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_i2_avx; matmul_fn = matmul_i2_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_i4 * const restrict retarray, static void (*matmul_p) (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_i4_vanilla; matmul_fn = matmul_i4_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_i4_avx512f; matmul_fn = matmul_i4_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_i4_avx2; matmul_fn = matmul_i4_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_i4_avx; matmul_fn = matmul_i4_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_i8 * const restrict retarray, static void (*matmul_p) (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_i8_vanilla; matmul_fn = matmul_i8_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_i8_avx512f; matmul_fn = matmul_i8_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_i8_avx2; matmul_fn = matmul_i8_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_i8_avx; matmul_fn = matmul_i8_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_r10 * const restrict retarray, static void (*matmul_p) (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_r10_vanilla; matmul_fn = matmul_r10_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_r10_avx512f; matmul_fn = matmul_r10_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_r10_avx2; matmul_fn = matmul_r10_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_r10_avx; matmul_fn = matmul_r10_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_r16 * const restrict retarray, static void (*matmul_p) (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_r16_vanilla; matmul_fn = matmul_r16_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_r16_avx512f; matmul_fn = matmul_r16_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_r16_avx2; matmul_fn = matmul_r16_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_r16_avx; matmul_fn = matmul_r16_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_r4 * const restrict retarray, static void (*matmul_p) (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_r4_vanilla; matmul_fn = matmul_r4_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_r4_avx512f; matmul_fn = matmul_r4_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_r4_avx2; matmul_fn = matmul_r4_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_r4_avx; matmul_fn = matmul_r4_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -2256,19 +2256,24 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
{ {
static void (*matmul_p) (gfc_array_r8 * const restrict retarray, static void (*matmul_p) (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_r8_vanilla; matmul_fn = matmul_r8_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_r8_avx512f; matmul_fn = matmul_r8_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -2277,8 +2282,8 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_r8_avx2; matmul_fn = matmul_r8_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -2286,15 +2291,16 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_r8_avx; matmul_fn = matmul_r8_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */

View File

@ -121,19 +121,24 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
{ {
static void (*matmul_p) ('rtype` * const restrict retarray, static void (*matmul_p) ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) = NULL; int blas_limit, blas_call gemm);
if (matmul_p == NULL) void (*matmul_fn) ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
if (matmul_fn == NULL)
{ {
matmul_p = matmul_'rtype_code`_vanilla; matmul_fn = matmul_'rtype_code`_vanilla;
if (__cpu_model.__cpu_vendor == VENDOR_INTEL) if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
{ {
/* Run down the available processors in order of preference. */ /* Run down the available processors in order of preference. */
#ifdef HAVE_AVX512F #ifdef HAVE_AVX512F
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
{ {
matmul_p = matmul_'rtype_code`_avx512f; matmul_fn = matmul_'rtype_code`_avx512f;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX512F */ #endif /* HAVE_AVX512F */
@ -142,8 +147,8 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
&& (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{ {
matmul_p = matmul_'rtype_code`_avx2; matmul_fn = matmul_'rtype_code`_avx2;
goto tailcall; goto store;
} }
#endif #endif
@ -151,15 +156,16 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray,
#ifdef HAVE_AVX #ifdef HAVE_AVX
if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
{ {
matmul_p = matmul_'rtype_code`_avx; matmul_fn = matmul_'rtype_code`_avx;
goto tailcall; goto store;
} }
#endif /* HAVE_AVX */ #endif /* HAVE_AVX */
} }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
} }
tailcall: (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
(*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
} }
#else /* Just the vanilla function. */ #else /* Just the vanilla function. */