GCN libgomp port

2019-11-13  Andrew Stubbs  <ams@codesourcery.com>
	    Kwok Cheung Yeung  <kcy@codesourcery.com>
	    Julian Brown  <julian@codesourcery.com>
	    Tom de Vries  <tom@codesourcery.com>

	include/
	* gomp-constants.h (GOMP_DEVICE_GCN): Define.
	(GOMP_VERSION_GCN): Define.

	libgomp/
	* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
	* Makefile.in: Regenerate.
	* config.h.in (PLUGIN_GCN): Add new undef.
	* config/accel/openacc.f90 (acc_device_gcn): New parameter.
	* config/gcn/affinity-fmt.c: New file.
	* config/gcn/bar.c: New file.
	* config/gcn/bar.h: New file.
	* config/gcn/doacross.h: New file.
	* config/gcn/icv-device.c: New file.
	* config/gcn/oacc-target.c: New file.
	* config/gcn/simple-bar.h: New file.
	* config/gcn/target.c: New file.
	* config/gcn/task.c: New file.
	* config/gcn/team.c: New file.
	* config/gcn/time.c: New file.
	* configure.ac: Add amdgcn*-*-*.
	* configure: Regenerate.
	* configure.tgt: Add amdgcn*-*-*.
	* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
	* libgomp.h (gcn_thrs): Add amdgcn variant.
	(set_gcn_thrs): Likewise.
	(gomp_thread): Likewise.
	* oacc-int.h (goacc_thread): Likewise.
	* oacc-target.c: New file.
	* openacc.f90 (acc_device_gcn): New parameter.
	* openacc.h (acc_device_t): Add acc_device_gcn.
	* team.c (gomp_free_pool_helper): Add amdgcn support.

Co-Authored-By: Julian Brown <julian@codesourcery.com>
Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com>
Co-Authored-By: Tom de Vries <tom@codesourcery.com>

From-SVN: r278135
This commit is contained in:
Andrew Stubbs 2019-11-13 12:38:04 +00:00 committed by Andrew Stubbs
parent d2903ce05b
commit fa4999953d
28 changed files with 1125 additions and 7 deletions

View File

@ -1,3 +1,11 @@
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Kwok Cheung Yeung <kcy@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* gomp-constants.h (GOMP_DEVICE_GCN): Define.
(GOMP_VERSION_GCN): Define.
2019-08-08 Martin Liska <mliska@suse.cz>
PR bootstrap/91352

View File

@ -174,6 +174,7 @@ enum gomp_map_kind
#define GOMP_DEVICE_NVIDIA_PTX 5
#define GOMP_DEVICE_INTEL_MIC 6
#define GOMP_DEVICE_HSA 7
#define GOMP_DEVICE_GCN 8
#define GOMP_DEVICE_ICV -1
#define GOMP_DEVICE_HOST_FALLBACK -2
@ -215,6 +216,7 @@ enum gomp_map_kind
#define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0
#define GOMP_VERSION_HSA 0
#define GOMP_VERSION_GCN 1
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)

View File

@ -1,3 +1,36 @@
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Kwok Cheung Yeung <kcy@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
* Makefile.am (libgomp_la_SOURCES): Add oacc-target.c.
* Makefile.in: Regenerate.
* config.h.in (PLUGIN_GCN): Add new undef.
* config/accel/openacc.f90 (acc_device_gcn): New parameter.
* config/gcn/affinity-fmt.c: New file.
* config/gcn/bar.c: New file.
* config/gcn/bar.h: New file.
* config/gcn/doacross.h: New file.
* config/gcn/icv-device.c: New file.
* config/gcn/oacc-target.c: New file.
* config/gcn/simple-bar.h: New file.
* config/gcn/target.c: New file.
* config/gcn/task.c: New file.
* config/gcn/team.c: New file.
* config/gcn/time.c: New file.
* configure.ac: Add amdgcn*-*-*.
* configure: Regenerate.
* configure.tgt: Add amdgcn*-*-*.
* libgomp-plugin.h (offload_target_type): Add OFFLOAD_TARGET_TYPE_GCN.
* libgomp.h (gcn_thrs): Add amdgcn variant.
(set_gcn_thrs): Likewise.
(gomp_thread): Likewise.
* oacc-int.h (goacc_thread): Likewise.
* oacc-target.c: New file.
* openacc.f90 (acc_device_gcn): New parameter.
* openacc.h (acc_device_t): Add acc_device_gcn.
* team.c (gomp_free_pool_helper): Add amdgcn support.
2019-11-13 Andrew Stubbs <ams@codesourcery.com>
Julian Brown <julian@codesourcery.com>

View File

@ -65,7 +65,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c oacc-profiling.c
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c
include $(top_srcdir)/plugin/Makefrag.am

View File

@ -217,7 +217,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
teams.lo oacc-profiling.lo $(am__objects_1)
teams.lo oacc-profiling.lo oacc-target.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@ -552,7 +552,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
affinity-fmt.c teams.c oacc-profiling.c oacc-target.c \
$(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@ -755,6 +756,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@

View File

@ -170,6 +170,9 @@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if the GCN plugin is built, 0 if not. */
#undef PLUGIN_GCN
/* Define to 1 if the HSA plugin is built, 0 if not. */
#undef PLUGIN_HSA

View File

@ -51,6 +51,7 @@ module openacc_kinds
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
integer (acc_device_kind), parameter :: acc_device_gcn = 8
end module

View File

@ -0,0 +1,51 @@
/* Copyright (C) 2018-2019 Free Software Foundation, Inc.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_INTTYPES_H
# include <inttypes.h> /* For PRIx64. */
#endif
#ifdef HAVE_UNAME
#include <sys/utsname.h>
#endif
/* The HAVE_GETPID and HAVE_GETHOSTNAME configure tests are passing for nvptx,
while the nvptx newlib implementation does not support those functions.
Override the configure test results here. */
#undef HAVE_GETPID
#undef HAVE_GETHOSTNAME
/* The GCN newlib implementation does not support fwrite, but it does support
write. Map fwrite to write. */
#undef fwrite
#define fwrite(ptr, size, nmemb, stream) write (1, (ptr), (nmemb) * (size))
#include "../../affinity-fmt.c"

232
libgomp/config/gcn/bar.c Normal file
View File

@ -0,0 +1,232 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an AMD GCN specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and s_barrier instruction. It
uses MEMMODEL_RELAXED here because barriers are within workgroups and
therefore don't need to flush caches. */
#include <limits.h>
#include "libgomp.h"
void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
bar->awaited = bar->total;
__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELAXED);
}
asm ("s_barrier" ::: "memory");
}
void
gomp_barrier_wait (gomp_barrier_t *bar)
{
gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
/* Like gomp_barrier_wait, except that if the encountering thread
is not the last one to hit the barrier, it returns immediately.
The intended usage is that a thread which intends to gomp_barrier_destroy
this barrier calls gomp_barrier_wait, while all other threads
call gomp_barrier_wait_last. When gomp_barrier_wait returns,
the barrier can be safely destroyed. */
void
gomp_barrier_wait_last (gomp_barrier_t *bar)
{
/* Deferring to gomp_barrier_wait does not use the optimization opportunity
allowed by the interface contract for all-but-last participants. The
original implementation in config/linux/bar.c handles this better. */
gomp_barrier_wait (bar);
}
void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
asm ("s_barrier" ::: "memory");
}
void
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state &= ~BAR_CANCELLED;
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
asm ("s_barrier" ::: "memory");
return;
}
}
generation = state;
state &= ~BAR_CANCELLED;
int retry = 100;
do
{
if (retry-- == 0)
{
/* It really shouldn't happen that barriers get out of sync, but
if they do then this will loop until they realign, so we need
to avoid an infinite loop where the thread just isn't there. */
const char msg[] = ("Barrier sync failed (another thread died?);"
" aborting.");
write (2, msg, sizeof (msg)-1);
abort();
}
asm ("s_barrier" ::: "memory");
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
}
void
gomp_team_barrier_wait (gomp_barrier_t *bar)
{
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
{
gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
if (__builtin_expect (state & BAR_WAS_LAST, 0))
bar->awaited_final = bar->total;
gomp_team_barrier_wait_end (bar, state);
}
bool
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
{
unsigned int generation, gen;
if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
/* BAR_CANCELLED should never be set in state here, because
cancellation means that at least one of the threads has been
cancelled, thus on a cancellable barrier we should never see
all threads to arrive. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
state &= ~BAR_WAS_LAST;
}
else
{
state += BAR_INCR - BAR_WAS_LAST;
__atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
asm ("s_barrier" ::: "memory");
return false;
}
}
if (__builtin_expect (state & BAR_CANCELLED, 0))
return true;
generation = state;
int retry = 100;
do
{
if (retry-- == 0)
{
/* It really shouldn't happen that barriers get out of sync, but
if they do then this will loop until they realign, so we need
to avoid an infinite loop where the thread just isn't there. */
const char msg[] = ("Barrier sync failed (another thread died?);"
" aborting.");
write (2, msg, sizeof (msg)-1);
abort();
}
asm ("s_barrier" ::: "memory");
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
if (__builtin_expect (gen & BAR_CANCELLED, 0))
return true;
if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
while (gen != state + BAR_INCR);
return false;
}
bool
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
{
return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
}
void
gomp_team_barrier_cancel (struct gomp_team *team)
{
gomp_mutex_lock (&team->task_lock);
if (team->barrier.generation & BAR_CANCELLED)
{
gomp_mutex_unlock (&team->task_lock);
return;
}
team->barrier.generation |= BAR_CANCELLED;
gomp_mutex_unlock (&team->task_lock);
gomp_team_barrier_wake (&team->barrier, INT_MAX);
}

168
libgomp/config/gcn/bar.h Normal file
View File

@ -0,0 +1,168 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is an AMD GCN specific implementation of a barrier synchronization
mechanism for libgomp. This type is private to the library. This
implementation uses atomic instructions and s_barrier instruction. It
uses MEMMODEL_RELAXED here because barriers are within workgroups and
therefore don't need to flush caches. */
#ifndef GOMP_BARRIER_H
#define GOMP_BARRIER_H 1
#include "mutex.h"
typedef struct
{
unsigned total;
unsigned generation;
unsigned awaited;
unsigned awaited_final;
} gomp_barrier_t;
typedef unsigned int gomp_barrier_state_t;
/* The generation field contains a counter in the high bits, with a few
low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
share space because WAS_LAST is never stored back to generation. */
#define BAR_TASK_PENDING 1
#define BAR_WAS_LAST 1
#define BAR_WAITING_FOR_TASK 2
#define BAR_CANCELLED 4
#define BAR_INCR 8
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
{
bar->total = count;
bar->awaited = count;
bar->awaited_final = count;
bar->generation = 0;
}
static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
{
__atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_RELAXED);
bar->total = count;
}
static inline void gomp_barrier_destroy (gomp_barrier_t *bar)
{
}
extern void gomp_barrier_wait (gomp_barrier_t *);
extern void gomp_barrier_wait_last (gomp_barrier_t *);
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
extern void gomp_team_barrier_wait (gomp_barrier_t *);
extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
gomp_barrier_state_t);
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
struct gomp_team;
extern void gomp_team_barrier_cancel (struct gomp_team *);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
ret &= -BAR_INCR | BAR_CANCELLED;
/* A memory barrier is needed before exiting from the various forms
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
2.8.6 flush Construct, which says there is an implicit flush during
a barrier region. This is a convenient place to add the barrier,
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_RELAXED) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline gomp_barrier_state_t
gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
{
return gomp_barrier_wait_start (bar);
}
/* This is like gomp_barrier_wait_start, except it decrements
bar->awaited_final rather than bar->awaited and should be used
for the gomp_team_end barrier only. */
static inline gomp_barrier_state_t
gomp_barrier_wait_final_start (gomp_barrier_t *bar)
{
unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
ret &= -BAR_INCR | BAR_CANCELLED;
/* See above gomp_barrier_wait_start comment. */
if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_RELAXED) == 0)
ret |= BAR_WAS_LAST;
return ret;
}
static inline bool
gomp_barrier_last_thread (gomp_barrier_state_t state)
{
return state & BAR_WAS_LAST;
}
/* All the inlines below must be called with team->task_lock
held. */
static inline void
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
{
bar->generation |= BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
{
bar->generation &= ~BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
{
bar->generation |= BAR_WAITING_FOR_TASK;
}
static inline bool
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
{
return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
}
static inline bool
gomp_team_barrier_cancelled (gomp_barrier_t *bar)
{
return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
}
static inline void
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
#endif /* GOMP_BARRIER_H */

View File

@ -0,0 +1,58 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is the AMD GCN implementation of doacross spinning. */
#ifndef GOMP_DOACROSS_H
#define GOMP_DOACROSS_H 1
#include "libgomp.h"
static inline int
cpu_relax (void)
{
/* This can be implemented as just a memory barrier, but a sleep seems
like it should allow the wavefront to yield (maybe?)
Use the shortest possible sleep time of 1*64 cycles. */
asm volatile ("s_sleep\t1" ::: "memory");
return 0;
}
static inline void doacross_spin (unsigned long *addr, unsigned long expected,
unsigned long cur)
{
/* Prevent compiler from optimizing based on bounds of containing object. */
asm ("" : "+r" (addr));
do
{
/* An alternative implementation might use s_setprio to lower the
priority temporarily, and then restore it after. */
int i = cpu_relax ();
cur = addr[i];
}
while (cur <= expected);
}
#endif /* GOMP_DOACROSS_H */

View File

@ -0,0 +1,72 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file defines OpenMP API entry points that accelerator targets are
expected to replace. */
#include "libgomp.h"
void
omp_set_default_device (int device_num __attribute__((unused)))
{
}
int
omp_get_default_device (void)
{
return 0;
}
int
omp_get_num_devices (void)
{
return 0;
}
int
omp_get_num_teams (void)
{
return gomp_num_teams_var + 1;
}
int __attribute__ ((__optimize__ ("O2")))
omp_get_team_num (void)
{
return __builtin_gcn_dim_pos (0);
}
int
omp_is_initial_device (void)
{
/* AMD GCN is an accelerator-only target. */
return 0;
}
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)

View File

@ -0,0 +1,31 @@
/* Oversized reductions lock variable
Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Graphics.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a global lock variable for reductions on objects larger than
64 bits. Until and unless proven that lock contention for
different reductions is a problem, a single lock will suffice. */
unsigned volatile __reduction_lock = 0;

View File

@ -0,0 +1,61 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is a simplified barrier that is suitable for thread pool
synchronizaton. Only a subset of full barrier API (bar.h) is exposed.
Here in the AMD GCN-specific implementation, we expect that thread pool
corresponds to the wavefronts within a work group. */
#ifndef GOMP_SIMPLE_BARRIER_H
#define GOMP_SIMPLE_BARRIER_H 1
/* AMD GCN has no use for this type. */
typedef int gomp_simple_barrier_t;
/* GCN barriers block all wavefronts, so the count is not interesting. */
static inline void
gomp_simple_barrier_init (gomp_simple_barrier_t *bar, unsigned count)
{
}
static inline void
gomp_simple_barrier_destroy (gomp_simple_barrier_t *bar)
{
}
static inline void
gomp_simple_barrier_wait (gomp_simple_barrier_t *bar)
{
asm volatile ("s_barrier" ::: "memory");
}
static inline void
gomp_simple_barrier_wait_last (gomp_simple_barrier_t *bar)
{
/* GCN has no way to signal a barrier without waiting. */
asm volatile ("s_barrier" ::: "memory");
}
#endif /* GOMP_SIMPLE_BARRIER_H */

View File

@ -0,0 +1,67 @@
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp.h"
#include <limits.h>
void
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
{
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->thread_limit_var
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
}
unsigned int num_workgroups, workgroup_id;
num_workgroups = __builtin_gcn_dim_size (0);
workgroup_id = __builtin_gcn_dim_pos (0);
if (!num_teams || num_teams >= num_workgroups)
num_teams = num_workgroups;
else if (workgroup_id >= num_teams)
{
gomp_free_thread (gcn_thrs ());
exit (0);
}
gomp_num_teams_var = num_teams - 1;
}
int
omp_pause_resource (omp_pause_resource_t kind, int device_num)
{
(void) kind;
(void) device_num;
return -1;
}
int
omp_pause_resource_all (omp_pause_resource_t kind)
{
(void) kind;
return -1;
}
ialias (omp_pause_resource)
ialias (omp_pause_resource_all)

39
libgomp/config/gcn/task.c Normal file
View File

@ -0,0 +1,39 @@
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles the maintainence of tasks in response to task
creation and termination. */
#include "libgomp.h"
/* AMD GCN is an accelerator-only target, so this should never be called. */
bool
gomp_target_task_fn (void *data)
{
__builtin_unreachable ();
}
#include "../../task.c"

202
libgomp/config/gcn/team.c Normal file
View File

@ -0,0 +1,202 @@
/* Copyright (C) 2017-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file handles maintainance of threads on AMD GCN. */
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
static void gomp_thread_start (struct gomp_thread_pool *);
/* This externally visible function handles target region entry. It
sets up a per-team thread pool and transfers control by returning to
the kernel in the master thread or gomp_thread_start in other threads.
The name of this function is part of the interface with the compiler: for
each OpenMP kernel the compiler configures the stack, then calls here.
Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue. */
void
gomp_gcn_enter_kernel (void)
{
int threadid = __builtin_gcn_dim_pos (1);
if (threadid == 0)
{
int numthreads = __builtin_gcn_dim_size (1);
int teamid = __builtin_gcn_dim_pos(0);
/* Set up the global state.
Every team will do this, but that should be harmless. */
gomp_global_icv.nthreads_var = 16;
gomp_global_icv.thread_limit_var = numthreads;
/* Starting additional threads is not supported. */
gomp_global_icv.dyn_var = true;
/* Allocate and initialize the team-local-storage data. */
struct gomp_thread *thrs = gomp_malloc_cleared (sizeof (*thrs)
* numthreads);
set_gcn_thrs (thrs);
/* Allocate and initailize a pool of threads in the team.
The threads are already running, of course, we just need to manage
the communication between them. */
struct gomp_thread_pool *pool = gomp_malloc (sizeof (*pool));
pool->threads = gomp_malloc (sizeof (void *) * numthreads);
for (int tid = 0; tid < numthreads; tid++)
pool->threads[tid] = &thrs[tid];
pool->threads_size = numthreads;
pool->threads_used = numthreads;
pool->threads_busy = 1;
pool->last_team = NULL;
gomp_simple_barrier_init (&pool->threads_dock, numthreads);
thrs->thread_pool = pool;
asm ("s_barrier" ::: "memory");
return; /* Return to kernel. */
}
else
{
asm ("s_barrier" ::: "memory");
gomp_thread_start (gcn_thrs ()[0].thread_pool);
/* gomp_thread_start does not return. */
}
}
void
gomp_gcn_exit_kernel (void)
{
gomp_free_thread (gcn_thrs ());
free (gcn_thrs ());
}
/* This function contains the idle loop in which a thread waits
to be called up to become part of a team. */
static void
gomp_thread_start (struct gomp_thread_pool *pool)
{
struct gomp_thread *thr = gomp_thread ();
gomp_sem_init (&thr->release, 0);
thr->thread_pool = pool;
/* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
which contains "s_endpgm", or an infinite no-op loop is
suspected (this happens when the thread master crashes). */
int nul_limit = 99;
do
{
gomp_simple_barrier_wait (&pool->threads_dock);
if (!thr->fn)
{
if (nul_limit-- > 0)
continue;
else
{
const char msg[] = ("team master not responding;"
" slave thread aborting");
write (2, msg, sizeof (msg)-1);
abort();
}
}
thr->fn (thr->data);
thr->fn = NULL;
struct gomp_task *task = thr->task;
gomp_team_barrier_wait_final (&thr->ts.team->barrier);
gomp_finish_task (task);
}
while (1);
}
/* Launch a team. */
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
unsigned flags, struct gomp_team *team,
struct gomp_taskgroup *taskgroup)
{
struct gomp_thread *thr, *nthr;
struct gomp_task *task;
struct gomp_task_icv *icv;
struct gomp_thread_pool *pool;
unsigned long nthreads_var;
thr = gomp_thread ();
pool = thr->thread_pool;
task = thr->task;
icv = task ? &task->icv : &gomp_global_icv;
/* Always save the previous state, even if this isn't a nested team.
In particular, we should save any work share state from an outer
orphaned work share construct. */
team->prev_ts = thr->ts;
thr->ts.team = team;
thr->ts.team_id = 0;
++thr->ts.level;
if (nthreads > 1)
++thr->ts.active_level;
thr->ts.work_share = &team->work_shares[0];
thr->ts.last_work_share = NULL;
thr->ts.single_count = 0;
thr->ts.static_trip = 0;
thr->task = &team->implicit_task[0];
nthreads_var = icv->nthreads_var;
gomp_init_task (thr->task, task, icv);
team->implicit_task[0].icv.nthreads_var = nthreads_var;
team->implicit_task[0].taskgroup = taskgroup;
if (nthreads == 1)
return;
/* Release existing idle threads. */
for (unsigned i = 1; i < nthreads; ++i)
{
nthr = pool->threads[i];
nthr->ts.team = team;
nthr->ts.work_share = &team->work_shares[0];
nthr->ts.last_work_share = NULL;
nthr->ts.team_id = i;
nthr->ts.level = team->prev_ts.level + 1;
nthr->ts.active_level = thr->ts.active_level;
nthr->ts.single_count = 0;
nthr->ts.static_trip = 0;
nthr->task = &team->implicit_task[i];
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
team->implicit_task[i].taskgroup = taskgroup;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
}
gomp_simple_barrier_wait (&pool->threads_dock);
}
#include "../../team.c"

52
libgomp/config/gcn/time.c Normal file
View File

@ -0,0 +1,52 @@
/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file implements timer routines for AMD GCN. */
#include "libgomp.h"
/* According to AMD:
dGPU RTC is 27MHz
AGPU RTC is 100MHz
FIXME: DTRT on an APU. */
#define RTC_TICKS (1.0 / 27000000.0) /* 27MHz */
double
omp_get_wtime (void)
{
uint64_t clock;
asm ("s_memrealtime %0\n\t"
"s_waitcnt 0" : "=r" (clock));
return clock * RTC_TICKS;
}
double
omp_get_wtick (void)
{
return RTC_TICKS;
}
ialias (omp_get_wtime)
ialias (omp_get_wtick)

2
libgomp/configure vendored
View File

@ -14921,7 +14921,7 @@ case "$host" in
*-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time.
;;
nvptx*-*-*)
nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
# NVPTX is an accelerator-only target

View File

@ -176,7 +176,7 @@ case "$host" in
*-*-rtems*)
# RTEMS supports Pthreads, but the library is not available at GCC build time.
;;
nvptx*-*-*)
nvptx*-*-* | amdgcn*-*-*)
# NVPTX does not support Pthreads, has its own code replacement.
libgomp_use_pthreads=no
# NVPTX is an accelerator-only target

View File

@ -164,6 +164,10 @@ case "${target}" in
fi
;;
amdgcn*-*-*)
config_path="gcn accel"
;;
*)
;;

View File

@ -50,7 +50,8 @@ enum offload_target_type
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
OFFLOAD_TARGET_TYPE_HSA = 7
OFFLOAD_TARGET_TYPE_HSA = 7,
OFFLOAD_TARGET_TYPE_GCN = 8
};
/* Opaque type to represent plugin-dependent implementation of an

View File

@ -692,6 +692,24 @@ static inline struct gomp_thread *gomp_thread (void)
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
return nvptx_thrs + tid;
}
#elif defined __AMDGCN__
static inline struct gomp_thread *gcn_thrs (void)
{
/* The value is at the bottom of LDS. */
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
return *thrs;
}
static inline void set_gcn_thrs (struct gomp_thread *val)
{
/* The value is at the bottom of LDS. */
struct gomp_thread * __lds *thrs = (struct gomp_thread * __lds *)4;
*thrs = val;
}
static inline struct gomp_thread *gomp_thread (void)
{
int tid = __builtin_gcn_dim_pos(1);
return gcn_thrs () + tid;
}
#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct gomp_thread gomp_tls_data;
static inline struct gomp_thread *gomp_thread (void)

View File

@ -82,7 +82,14 @@ struct goacc_thread
void *target_tls;
};
#if defined HAVE_TLS || defined USE_EMUTLS
#ifdef __AMDGCN__
static inline struct goacc_thread *
goacc_thread (void)
{
/* Unused in the offload libgomp for OpenACC: return a dummy value. */
return 0;
}
#elif defined HAVE_TLS || defined USE_EMUTLS
extern __thread struct goacc_thread *goacc_tls_data;
static inline struct goacc_thread *
goacc_thread (void)

1
libgomp/oacc-target.c Normal file
View File

@ -0,0 +1 @@
/* Nothing needed here. */

View File

@ -46,6 +46,7 @@ module openacc_kinds
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
integer (acc_device_kind), parameter :: acc_device_gcn = 8
public :: acc_handle_kind

View File

@ -55,6 +55,7 @@ typedef enum acc_device_t {
/* acc_device_host_nonshm = 3 removed. */
acc_device_not_host = 4,
acc_device_nvidia = 5,
acc_device_gcn = 8,
_ACC_device_hwm,
/* Ensure enumeration is layout compatible with int. */
_ACC_highest = __INT_MAX__,

View File

@ -239,6 +239,9 @@ gomp_free_pool_helper (void *thread_pool)
pthread_exit (NULL);
#elif defined(__nvptx__)
asm ("exit;");
#elif defined(__AMDGCN__)
asm ("s_dcache_wb\n\t"
"s_endpgm");
#else
#error gomp_free_pool_helper must terminate the thread
#endif