nvptx: Add support for subword compare-and-swap

This adds support for __sync_val_compare_and_swap and
__sync_bool_compare_and_swap for 1-byte and 2-byte long
values, which are not natively supported on nvptx.

Build and reg-tested on nvptx.
Build and reg-tested libgomp on x86_64 with nvptx accelerator.

2020-07-16  Kwok Cheung Yeung  <kcy@codesourcery.com>

	libgcc/
	* config/nvptx/atomic.c: New.
	* config/nvptx/t-nvptx (LIB2ADD): Add atomic.c.

	gcc/testsuite/
	* gcc.target/nvptx/ia64-sync-5.c: New.

	libgomp/
	* testsuite/libgomp.c-c++-common/reduction-16.c: New.
This commit is contained in:
Kwok Cheung Yeung 2020-08-03 17:38:13 +02:00 committed by Tom de Vries
parent f91770216e
commit 17dc08edc2
4 changed files with 130 additions and 1 deletions

View File

@ -0,0 +1,2 @@
/* { dg-do run } */
#include "../../gcc.dg/ia64-sync-5.c"

View File

@ -0,0 +1,73 @@
/* NVPTX atomic operations
Copyright (C) 2020 Free Software Foundation, Inc.
Contributed by Mentor Graphics.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include <stdbool.h>
/* Implement __sync_val_compare_and_swap and __sync_bool_compare_and_swap
for 1 and 2-byte values (which are not natively supported) in terms of
__sync_val_compare_and_swap for 4-byte values (which is supported).
This assumes that the contents of the word surrounding the subword
value that we are interested in are accessible as well (which should
normally be the case). Note that if the contents of the word surrounding
the subword changes between the __sync_val_compare_and_swap_4 and the
preceeding load of oldword, while the subword does not, the implementation
loops, which may manifest worst-case as a hang. */
#define __SYNC_SUBWORD_COMPARE_AND_SWAP(TYPE, SIZE) \
\
TYPE \
__sync_val_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
{ \
unsigned int *wordptr = (unsigned int *)((__UINTPTR_TYPE__ ) ptr & ~3UL); \
int shift = ((__UINTPTR_TYPE__ ) ptr & 3UL) * 8; \
unsigned int valmask = (1 << (SIZE * 8)) - 1; \
unsigned int wordmask = ~(valmask << shift); \
unsigned int oldword = *wordptr; \
for (;;) \
{ \
TYPE prevval = (oldword >> shift) & valmask; \
/* Exit if the subword value previously read from memory is not */ \
/* equal to the expected value OLDVAL. */ \
if (__builtin_expect (prevval != oldval, 0)) \
return prevval; \
unsigned int newword = oldword & wordmask; \
newword |= ((unsigned int) newval) << shift; \
unsigned int prevword \
= __sync_val_compare_and_swap_4 (wordptr, oldword, newword); \
/* Exit only if the compare-and-swap succeeds on the whole word */ \
/* (i.e. the contents of *WORDPTR have not changed since the last */ \
/* memory read). */ \
if (__builtin_expect (prevword == oldword, 1)) \
return oldval; \
oldword = prevword; \
} \
} \
\
bool \
__sync_bool_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
{ \
return __sync_val_compare_and_swap_##SIZE (ptr, oldval, newval) == oldval; \
}
__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned char, 1)
__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned short, 2)

View File

@ -1,5 +1,6 @@
LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
$(srcdir)/config/nvptx/mgomp.c
$(srcdir)/config/nvptx/mgomp.c \
$(srcdir)/config/nvptx/atomic.c
LIB2ADDEH=
LIB2FUNCS_EXCLUDE=__main

View File

@ -0,0 +1,53 @@
/* { dg-do run } */
#include <stdlib.h>
#define N 512
#define GENERATE_TEST(T) \
int test_##T (void) \
{ \
T a[N], res = 0; \
\
for (int i = 0; i < N; ++i) \
a[i] = i & 1; \
\
_Pragma("omp target teams distribute reduction(||:res) defaultmap(tofrom:scalar)") \
for (int i = 0; i < N; ++i) \
res = res || a[i]; \
\
/* res should be non-zero. */\
if (!res) \
return 1; \
\
_Pragma("omp target teams distribute reduction(&&:res) defaultmap(tofrom:scalar)") \
for (int i = 0; i < N; ++i) \
res = res && a[i]; \
\
/* res should be zero. */ \
return res; \
}
GENERATE_TEST(char)
GENERATE_TEST(short)
GENERATE_TEST(int)
GENERATE_TEST(long)
#ifdef __SIZEOF_INT128__
GENERATE_TEST(__int128)
#endif
int main(void)
{
if (test_char ())
abort ();
if (test_short ())
abort ();
if (test_int ())
abort ();
if (test_long ())
abort ();
#ifdef __SIZEOF_INT128__
if (test___int128 ())
abort ();
#endif
}