amdgcn: libgomp plugin USM implementation

Implement the Unified Shared Memory API calls in the GCN plugin.

The allocate and free are pretty straight-forward because all "target" memory
allocations are compatible with USM, on the right hardware.  However, there's
no known way to check what memory region was used, after the fact, so we use a
splay tree to record allocations so we can answer "is_usm_ptr" later.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (struct usm_splay_tree_key_s): New.
	(usm_splay_compare): New.
	(splay_tree_prefix): New.
	(GOMP_OFFLOAD_usm_alloc): New.
	(GOMP_OFFLOAD_usm_free): New.
	(GOMP_OFFLOAD_is_usm_ptr): New.
	(GOMP_OFFLOAD_supported_features): Move into the OpenMP API fold.
	Add GOMP_REQUIRES_UNIFIED_ADDRESS and
	GOMP_REQUIRES_UNIFIED_SHARED_MEMORY.
	(gomp_fatal): New.
	(splay_tree_c): New.
	* testsuite/lib/libgomp.exp (check_effective_target_omp_usm): New.
	* testsuite/libgomp.c++/usm-1.C: Use dg-require-effective-target.
	* testsuite/libgomp.c-c++-common/requires-1.c: Likewise.
	* testsuite/libgomp.c/usm-1.c: Likewise.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.
	* testsuite/libgomp.c/usm-5.c: Likewise.
	* testsuite/libgomp.c/usm-6.c: Likewise.
This commit is contained in:
Andrew Stubbs 2022-06-20 15:51:15 +01:00
parent d1eb334f7b
commit cdddaf7fdf
10 changed files with 128 additions and 11 deletions

View File

@ -3825,6 +3825,89 @@ GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
return !isa || isa_code (isa) == agent->device_isa;
}
/* Use a splay tree to track USM allocations. */
typedef struct usm_splay_tree_node_s *usm_splay_tree_node;
typedef struct usm_splay_tree_s *usm_splay_tree;
typedef struct usm_splay_tree_key_s *usm_splay_tree_key;
struct usm_splay_tree_key_s {
void *addr;
size_t size;
};
static inline int
usm_splay_compare (usm_splay_tree_key x, usm_splay_tree_key y)
{
if ((x->addr <= y->addr && x->addr + x->size > y->addr)
|| (y->addr <= x->addr && y->addr + y->size > x->addr))
return 0;
return (x->addr > y->addr ? 1 : -1);
}
#define splay_tree_prefix usm
#include "../splay-tree.h"
static struct usm_splay_tree_s usm_map = { NULL };
/* Allocate memory suitable for Unified Shared Memory.
In fact, AMD memory need only be "coarse grained", which target
allocations already are. We do need to track allocations so that
GOMP_OFFLOAD_is_usm_ptr can look them up. */
void *
GOMP_OFFLOAD_usm_alloc (int device, size_t size)
{
void *ptr = GOMP_OFFLOAD_alloc (device, size);
usm_splay_tree_node node = malloc (sizeof (struct usm_splay_tree_node_s));
node->key.addr = ptr;
node->key.size = size;
node->left = NULL;
node->right = NULL;
usm_splay_tree_insert (&usm_map, node);
return ptr;
}
/* Free memory allocated via GOMP_OFFLOAD_usm_alloc. */
bool
GOMP_OFFLOAD_usm_free (int device, void *ptr)
{
struct usm_splay_tree_key_s key = { ptr, 1 };
usm_splay_tree_key node = usm_splay_tree_lookup (&usm_map, &key);
if (node)
{
usm_splay_tree_remove (&usm_map, &key);
free (node);
}
return GOMP_OFFLOAD_free (device, ptr);
}
/* True if the memory was allocated via GOMP_OFFLOAD_usm_alloc. */
bool
GOMP_OFFLOAD_is_usm_ptr (void *ptr)
{
struct usm_splay_tree_key_s key = { ptr, 1 };
return usm_splay_tree_lookup (&usm_map, &key);
}
/* Indicate which GOMP_REQUIRES_* features are supported. */
bool
GOMP_OFFLOAD_supported_features (unsigned int *mask)
{
*mask &= ~(GOMP_REQUIRES_UNIFIED_ADDRESS
| GOMP_REQUIRES_UNIFIED_SHARED_MEMORY);
return (*mask == 0);
}
/* }}} */
/* {{{ OpenACC Plugin API */
@ -4126,12 +4209,19 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
free (data);
}
/* Indicate which GOMP_REQUIRES_* features are supported, currently none. */
bool
GOMP_OFFLOAD_supported_features (unsigned int *mask)
{
return (*mask == 0);
}
/* }}} */
/* {{{ USM splay tree */
/* Include this now so that splay-tree.c doesn't include it later. This
avoids a conflict with splay_tree_prefix. */
#include "libgomp.h"
/* This allows splay-tree.c to call gomp_fatal in this context. The splay
tree code doesn't use the variadic arguments right now. */
#define gomp_fatal(MSG, ...) GOMP_PLUGIN_fatal (MSG)
/* Include the splay tree code inline, with the prefixes added. */
#define splay_tree_prefix usm
#define splay_tree_c
#include "../splay-tree.h"
/* }}} */

View File

@ -537,3 +537,25 @@ int main() {
return 0;
} } "-lcuda -lcudart" ]
}
# return 1 if OpenMP Unified Share Memory is supported
proc check_effective_target_omp_usm { } {
if { [libgomp_check_effective_target_offload_target "nvptx"] } {
return 1
}
if { [libgomp_check_effective_target_offload_target "amdgcn"] } {
return [check_no_compiler_messages omp_usm executable {
#pragma omp requires unified_shared_memory
int main () {
#pragma omp target
;
return 0;
}
}]
}
return 0
}

View File

@ -1,5 +1,5 @@
/* { dg-do run } */
/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
/* { dg-require-effective-target omp_usm } */
#include <stdint.h>
#pragma omp requires unified_shared_memory

View File

@ -1,4 +1,5 @@
/* { dg-additional-sources requires-1-aux.c } */
/* { dg-require-effective-target omp_usm } */
#pragma omp requires unified_shared_memory

View File

@ -1,4 +1,5 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
#include <omp.h>
#include <stdint.h>

View File

@ -1,4 +1,5 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
#include <omp.h>
#include <stdint.h>

View File

@ -1,4 +1,5 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
#include <omp.h>
#include <stdint.h>

View File

@ -1,4 +1,5 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
#include <omp.h>
#include <stdint.h>

View File

@ -1,5 +1,5 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
/* { dg-require-effective-target omp_usm } */
#include <omp.h>
#include <stdint.h>

View File

@ -1,5 +1,5 @@
/* { dg-do run } */
/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
/* { dg-require-effective-target omp_usm } */
#include <stdint.h>
#include <stdlib.h>