nvptx: Add support for subword compare-and-swap
This adds support for __sync_val_compare_and_swap and __sync_bool_compare_and_swap for 1-byte and 2-byte long values, which are not natively supported on nvptx. Build and reg-tested on nvptx. Build and reg-tested libgomp on x86_64 with nvptx accelerator. 2020-07-16 Kwok Cheung Yeung <kcy@codesourcery.com> libgcc/ * config/nvptx/atomic.c: New. * config/nvptx/t-nvptx (LIB2ADD): Add atomic.c. gcc/testsuite/ * gcc.target/nvptx/ia64-sync-5.c: New. libgomp/ * testsuite/libgomp.c-c++-common/reduction-16.c: New.
This commit is contained in:
parent
f91770216e
commit
17dc08edc2
|
@ -0,0 +1,2 @@
|
|||
/* { dg-do run } */
|
||||
#include "../../gcc.dg/ia64-sync-5.c"
|
|
@ -0,0 +1,73 @@
|
|||
/* NVPTX atomic operations
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
Contributed by Mentor Graphics.
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 3, or (at your option) any
|
||||
later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Implement __sync_val_compare_and_swap and __sync_bool_compare_and_swap
|
||||
for 1 and 2-byte values (which are not natively supported) in terms of
|
||||
__sync_val_compare_and_swap for 4-byte values (which is supported).
|
||||
This assumes that the contents of the word surrounding the subword
|
||||
value that we are interested in are accessible as well (which should
|
||||
normally be the case). Note that if the contents of the word surrounding
|
||||
the subword changes between the __sync_val_compare_and_swap_4 and the
|
||||
preceeding load of oldword, while the subword does not, the implementation
|
||||
loops, which may manifest worst-case as a hang. */
|
||||
|
||||
#define __SYNC_SUBWORD_COMPARE_AND_SWAP(TYPE, SIZE) \
|
||||
\
|
||||
TYPE \
|
||||
__sync_val_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
|
||||
{ \
|
||||
unsigned int *wordptr = (unsigned int *)((__UINTPTR_TYPE__ ) ptr & ~3UL); \
|
||||
int shift = ((__UINTPTR_TYPE__ ) ptr & 3UL) * 8; \
|
||||
unsigned int valmask = (1 << (SIZE * 8)) - 1; \
|
||||
unsigned int wordmask = ~(valmask << shift); \
|
||||
unsigned int oldword = *wordptr; \
|
||||
for (;;) \
|
||||
{ \
|
||||
TYPE prevval = (oldword >> shift) & valmask; \
|
||||
/* Exit if the subword value previously read from memory is not */ \
|
||||
/* equal to the expected value OLDVAL. */ \
|
||||
if (__builtin_expect (prevval != oldval, 0)) \
|
||||
return prevval; \
|
||||
unsigned int newword = oldword & wordmask; \
|
||||
newword |= ((unsigned int) newval) << shift; \
|
||||
unsigned int prevword \
|
||||
= __sync_val_compare_and_swap_4 (wordptr, oldword, newword); \
|
||||
/* Exit only if the compare-and-swap succeeds on the whole word */ \
|
||||
/* (i.e. the contents of *WORDPTR have not changed since the last */ \
|
||||
/* memory read). */ \
|
||||
if (__builtin_expect (prevword == oldword, 1)) \
|
||||
return oldval; \
|
||||
oldword = prevword; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
bool \
|
||||
__sync_bool_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
|
||||
{ \
|
||||
return __sync_val_compare_and_swap_##SIZE (ptr, oldval, newval) == oldval; \
|
||||
}
|
||||
|
||||
__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned char, 1)
|
||||
__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned short, 2)
|
|
@ -1,5 +1,6 @@
|
|||
LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
|
||||
$(srcdir)/config/nvptx/mgomp.c
|
||||
$(srcdir)/config/nvptx/mgomp.c \
|
||||
$(srcdir)/config/nvptx/atomic.c
|
||||
|
||||
LIB2ADDEH=
|
||||
LIB2FUNCS_EXCLUDE=__main
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 512
|
||||
|
||||
#define GENERATE_TEST(T) \
|
||||
int test_##T (void) \
|
||||
{ \
|
||||
T a[N], res = 0; \
|
||||
\
|
||||
for (int i = 0; i < N; ++i) \
|
||||
a[i] = i & 1; \
|
||||
\
|
||||
_Pragma("omp target teams distribute reduction(||:res) defaultmap(tofrom:scalar)") \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
res = res || a[i]; \
|
||||
\
|
||||
/* res should be non-zero. */\
|
||||
if (!res) \
|
||||
return 1; \
|
||||
\
|
||||
_Pragma("omp target teams distribute reduction(&&:res) defaultmap(tofrom:scalar)") \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
res = res && a[i]; \
|
||||
\
|
||||
/* res should be zero. */ \
|
||||
return res; \
|
||||
}
|
||||
|
||||
GENERATE_TEST(char)
|
||||
GENERATE_TEST(short)
|
||||
GENERATE_TEST(int)
|
||||
GENERATE_TEST(long)
|
||||
#ifdef __SIZEOF_INT128__
|
||||
GENERATE_TEST(__int128)
|
||||
#endif
|
||||
|
||||
int main(void)
|
||||
{
|
||||
if (test_char ())
|
||||
abort ();
|
||||
if (test_short ())
|
||||
abort ();
|
||||
if (test_int ())
|
||||
abort ();
|
||||
if (test_long ())
|
||||
abort ();
|
||||
#ifdef __SIZEOF_INT128__
|
||||
if (test___int128 ())
|
||||
abort ();
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue