From f03e92172a78fbb7beab293ced88b161b2cfb4c5 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Thu, 2 Mar 2017 12:54:27 +0000 Subject: [PATCH] matmul.m4 (matmul_'rtype_code`): Avoid race condition on storing function pointer. 2017-03-02 Thomas Koenig Jakub Jelinek * m4/matmul.m4 (matmul_'rtype_code`): Avoid race condition on storing function pointer. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. Co-Authored-By: Jakub Jelinek From-SVN: r245839 --- libgfortran/ChangeLog | 19 +++++++++++++++++++ libgfortran/generated/matmul_c10.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_c16.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_c4.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_c8.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_i1.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_i16.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_i2.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_i4.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_i8.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_r10.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_r16.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_r4.c | 28 +++++++++++++++++----------- libgfortran/generated/matmul_r8.c | 28 +++++++++++++++++----------- libgfortran/m4/matmul.m4 | 28 +++++++++++++++++----------- 15 files changed, 257 insertions(+), 154 deletions(-) diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index ec72c6daa84..ec689fd7285 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,22 @@ +2017-03-02 Thomas Koenig + Jakub Jelinek + + * m4/matmul.m4 (matmul_'rtype_code`): Avoid + race condition on storing function pointer. + * generated/matmul_c10.c: Regenerated. + * generated/matmul_c16.c: Regenerated. + * generated/matmul_c4.c: Regenerated. + * generated/matmul_c8.c: Regenerated. + * generated/matmul_i1.c: Regenerated. + * generated/matmul_i16.c: Regenerated. + * generated/matmul_i2.c: Regenerated. + * generated/matmul_i4.c: Regenerated. + * generated/matmul_i8.c: Regenerated. + * generated/matmul_r10.c: Regenerated. + * generated/matmul_r16.c: Regenerated. + * generated/matmul_r4.c: Regenerated. + * generated/matmul_r8.c: Regenerated. + 2017-03-02 Thomas Koenig PR fortran/78379 diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c index b333a844ea5..c8e7a6c37fb 100644 --- a/libgfortran/generated/matmul_c10.c +++ b/libgfortran/generated/matmul_c10.c @@ -2256,19 +2256,24 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray, { static void (*matmul_p) (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_c10 * const restrict retarray, + gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_c10_vanilla; + matmul_fn = matmul_c10_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_c10_avx512f; - goto tailcall; + matmul_fn = matmul_c10_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_c10_avx2; - goto tailcall; + matmul_fn = matmul_c10_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_c10_avx; - goto tailcall; + matmul_fn = matmul_c10_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c index 0ef66c0f4a2..adbf1c81a7a 100644 --- a/libgfortran/generated/matmul_c16.c +++ b/libgfortran/generated/matmul_c16.c @@ -2256,19 +2256,24 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray, { static void (*matmul_p) (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_c16 * const restrict retarray, + gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_c16_vanilla; + matmul_fn = matmul_c16_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_c16_avx512f; - goto tailcall; + matmul_fn = matmul_c16_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_c16_avx2; - goto tailcall; + matmul_fn = matmul_c16_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_c16_avx; - goto tailcall; + matmul_fn = matmul_c16_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c index b30320b37ae..ff0e76a9275 100644 --- a/libgfortran/generated/matmul_c4.c +++ b/libgfortran/generated/matmul_c4.c @@ -2256,19 +2256,24 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray, { static void (*matmul_p) (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_c4 * const restrict retarray, + gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_c4_vanilla; + matmul_fn = matmul_c4_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_c4_avx512f; - goto tailcall; + matmul_fn = matmul_c4_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_c4_avx2; - goto tailcall; + matmul_fn = matmul_c4_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_c4_avx; - goto tailcall; + matmul_fn = matmul_c4_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c index 75b4680c3a0..483ebf114d7 100644 --- a/libgfortran/generated/matmul_c8.c +++ b/libgfortran/generated/matmul_c8.c @@ -2256,19 +2256,24 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray, { static void (*matmul_p) (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_c8 * const restrict retarray, + gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_c8_vanilla; + matmul_fn = matmul_c8_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_c8_avx512f; - goto tailcall; + matmul_fn = matmul_c8_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_c8_avx2; - goto tailcall; + matmul_fn = matmul_c8_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_c8_avx; - goto tailcall; + matmul_fn = matmul_c8_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c index 924826338d8..51cfcc05360 100644 --- a/libgfortran/generated/matmul_i1.c +++ b/libgfortran/generated/matmul_i1.c @@ -2256,19 +2256,24 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray, { static void (*matmul_p) (gfc_array_i1 * const restrict retarray, gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_i1 * const restrict retarray, + gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_i1_vanilla; + matmul_fn = matmul_i1_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_i1_avx512f; - goto tailcall; + matmul_fn = matmul_i1_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_i1_avx2; - goto tailcall; + matmul_fn = matmul_i1_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_i1_avx; - goto tailcall; + matmul_fn = matmul_i1_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c index f10540ed48a..90a422f4290 100644 --- a/libgfortran/generated/matmul_i16.c +++ b/libgfortran/generated/matmul_i16.c @@ -2256,19 +2256,24 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray, { static void (*matmul_p) (gfc_array_i16 * const restrict retarray, gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_i16 * const restrict retarray, + gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_i16_vanilla; + matmul_fn = matmul_i16_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_i16_avx512f; - goto tailcall; + matmul_fn = matmul_i16_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_i16_avx2; - goto tailcall; + matmul_fn = matmul_i16_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_i16_avx; - goto tailcall; + matmul_fn = matmul_i16_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c index 55ad5c614e6..58be84fc9c8 100644 --- a/libgfortran/generated/matmul_i2.c +++ b/libgfortran/generated/matmul_i2.c @@ -2256,19 +2256,24 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray, { static void (*matmul_p) (gfc_array_i2 * const restrict retarray, gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_i2 * const restrict retarray, + gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_i2_vanilla; + matmul_fn = matmul_i2_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_i2_avx512f; - goto tailcall; + matmul_fn = matmul_i2_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_i2_avx2; - goto tailcall; + matmul_fn = matmul_i2_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_i2_avx; - goto tailcall; + matmul_fn = matmul_i2_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c index 97b4a5b6aa0..1e724d54ffa 100644 --- a/libgfortran/generated/matmul_i4.c +++ b/libgfortran/generated/matmul_i4.c @@ -2256,19 +2256,24 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray, { static void (*matmul_p) (gfc_array_i4 * const restrict retarray, gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_i4 * const restrict retarray, + gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_i4_vanilla; + matmul_fn = matmul_i4_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_i4_avx512f; - goto tailcall; + matmul_fn = matmul_i4_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_i4_avx2; - goto tailcall; + matmul_fn = matmul_i4_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_i4_avx; - goto tailcall; + matmul_fn = matmul_i4_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c index ae78ecfccb6..f5ed84fa288 100644 --- a/libgfortran/generated/matmul_i8.c +++ b/libgfortran/generated/matmul_i8.c @@ -2256,19 +2256,24 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray, { static void (*matmul_p) (gfc_array_i8 * const restrict retarray, gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_i8 * const restrict retarray, + gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_i8_vanilla; + matmul_fn = matmul_i8_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_i8_avx512f; - goto tailcall; + matmul_fn = matmul_i8_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_i8_avx2; - goto tailcall; + matmul_fn = matmul_i8_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_i8_avx; - goto tailcall; + matmul_fn = matmul_i8_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c index 11d059198c9..8f76faceabf 100644 --- a/libgfortran/generated/matmul_r10.c +++ b/libgfortran/generated/matmul_r10.c @@ -2256,19 +2256,24 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray, { static void (*matmul_p) (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_r10 * const restrict retarray, + gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_r10_vanilla; + matmul_fn = matmul_r10_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_r10_avx512f; - goto tailcall; + matmul_fn = matmul_r10_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_r10_avx2; - goto tailcall; + matmul_fn = matmul_r10_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_r10_avx; - goto tailcall; + matmul_fn = matmul_r10_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c index 73e7c9877ad..6a7687fdc33 100644 --- a/libgfortran/generated/matmul_r16.c +++ b/libgfortran/generated/matmul_r16.c @@ -2256,19 +2256,24 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray, { static void (*matmul_p) (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_r16 * const restrict retarray, + gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_r16_vanilla; + matmul_fn = matmul_r16_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_r16_avx512f; - goto tailcall; + matmul_fn = matmul_r16_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_r16_avx2; - goto tailcall; + matmul_fn = matmul_r16_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_r16_avx; - goto tailcall; + matmul_fn = matmul_r16_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c index ac7306fe035..dbb31b05c3b 100644 --- a/libgfortran/generated/matmul_r4.c +++ b/libgfortran/generated/matmul_r4.c @@ -2256,19 +2256,24 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray, { static void (*matmul_p) (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_r4 * const restrict retarray, + gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_r4_vanilla; + matmul_fn = matmul_r4_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_r4_avx512f; - goto tailcall; + matmul_fn = matmul_r4_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_r4_avx2; - goto tailcall; + matmul_fn = matmul_r4_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_r4_avx; - goto tailcall; + matmul_fn = matmul_r4_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c index 8d2e784de60..23efed22e2a 100644 --- a/libgfortran/generated/matmul_r8.c +++ b/libgfortran/generated/matmul_r8.c @@ -2256,19 +2256,24 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray, { static void (*matmul_p) (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) (gfc_array_r8 * const restrict retarray, + gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_r8_vanilla; + matmul_fn = matmul_r8_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_r8_avx512f; - goto tailcall; + matmul_fn = matmul_r8_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -2277,8 +2282,8 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_r8_avx2; - goto tailcall; + matmul_fn = matmul_r8_avx2; + goto store; } #endif @@ -2286,15 +2291,16 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_r8_avx; - goto tailcall; + matmul_fn = matmul_r8_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */ diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4 index 812a7e7e571..7976fda8bb4 100644 --- a/libgfortran/m4/matmul.m4 +++ b/libgfortran/m4/matmul.m4 @@ -121,19 +121,24 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, { static void (*matmul_p) ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, - int blas_limit, blas_call gemm) = NULL; + int blas_limit, blas_call gemm); - if (matmul_p == NULL) + void (*matmul_fn) ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) { - matmul_p = matmul_'rtype_code`_vanilla; + matmul_fn = matmul_'rtype_code`_vanilla; if (__cpu_model.__cpu_vendor == VENDOR_INTEL) { /* Run down the available processors in order of preference. */ #ifdef HAVE_AVX512F if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F)) { - matmul_p = matmul_'rtype_code`_avx512f; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx512f; + goto store; } #endif /* HAVE_AVX512F */ @@ -142,8 +147,8 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2)) && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) { - matmul_p = matmul_'rtype_code`_avx2; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx2; + goto store; } #endif @@ -151,15 +156,16 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, #ifdef HAVE_AVX if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) { - matmul_p = matmul_'rtype_code`_avx; - goto tailcall; + matmul_fn = matmul_'rtype_code`_avx; + goto store; } #endif /* HAVE_AVX */ } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -tailcall: - (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm); + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); } #else /* Just the vanilla function. */