gcc/liboffloadmic/runtime/offload_omp_host.cpp
Ilya Verbin df26a50d0d backport: Makefile.am (myo_inc_dir): Remove.
Merge liboffloadmic from upstream

liboffloadmic/
	* Makefile.am (myo_inc_dir): Remove.
	(toolexeclib_LTLIBRARIES): Remove libmyo-client.la and
	libmyo-service.la.
	(liboffloadmic_cppflags): Remove -DMYO_SUPPORT.
	(liboffloadmic_host_la_SOURCES): Remove offload_myo_host.cpp.
	(liboffloadmic_target_la_SOURCES): Remove offload_myo_target.cpp.
	(liboffloadmic_target_la_LIBADD): Remove libmyo-service.la.
	(libmyo_client_la_SOURCES, libmyo_service_la_SOURCES): Remove.
	(libmyo_client_la_DEPENDENCIES, libmyo_service_la_DEPENDENCIES): Remove.
	(libmyo_client_la_CPPFLAGS, libmyo_service_la_CPPFLAGS): Remove.
	(libmyo_client_la_LDFLAGS, libmyo_service_la_LDFLAGS): Remove.
	* Makefile.in: Regenerate.
	* doc/doxygen/header.tex: Merge from upstream, version 20160715
	<https://openmprtl.org/sites/default/files/liboffload_oss_20160715.tgz>.
	* runtime/cean_util.cpp: Likewise.
	* runtime/cean_util.h: Likewise.
	* runtime/coi/coi_client.cpp: Likewise.
	* runtime/coi/coi_client.h: Likewise.
	* runtime/coi/coi_server.cpp: Likewise.
	* runtime/coi/coi_server.h: Likewise.
	* runtime/compiler_if_host.cpp: Likewise.
	* runtime/compiler_if_host.h: Likewise.
	* runtime/compiler_if_target.cpp: Likewise.
	* runtime/compiler_if_target.h: Likewise.
	* runtime/dv_util.cpp: Likewise.
	* runtime/dv_util.h: Likewise.
	* runtime/liboffload_error.c: Likewise.
	* runtime/liboffload_error_codes.h: Likewise.
	* runtime/liboffload_msg.c: Likewise.
	* runtime/liboffload_msg.h: Likewise.
	* runtime/mic_lib.f90: Likewise.
	* runtime/offload.h: Likewise.
	* runtime/offload_common.cpp: Likewise.
	* runtime/offload_common.h: Likewise.
	* runtime/offload_engine.cpp: Likewise.
	* runtime/offload_engine.h: Likewise.
	* runtime/offload_env.cpp: Likewise.
	* runtime/offload_env.h: Likewise.
	* runtime/offload_host.cpp: Likewise.
	* runtime/offload_host.h: Likewise.
	* runtime/offload_iterator.h: Likewise.
	* runtime/offload_myo_host.cpp: Likewise.
	* runtime/offload_myo_host.h: Likewise.
	* runtime/offload_myo_target.cpp: Likewise.
	* runtime/offload_myo_target.h: Likewise.
	* runtime/offload_omp_host.cpp: Likewise.
	* runtime/offload_omp_target.cpp: Likewise.
	* runtime/offload_orsl.cpp: Likewise.
	* runtime/offload_orsl.h: Likewise.
	* runtime/offload_table.cpp: Likewise.
	* runtime/offload_table.h: Likewise.
	* runtime/offload_target.cpp: Likewise.
	* runtime/offload_target.h: Likewise.
	* runtime/offload_target_main.cpp: Likewise.
	* runtime/offload_timer.h: Likewise.
	* runtime/offload_timer_host.cpp: Likewise.
	* runtime/offload_timer_target.cpp: Likewise.
	* runtime/offload_trace.cpp: Likewise.
	* runtime/offload_trace.h: Likewise.
	* runtime/offload_util.cpp: Likewise.
	* runtime/offload_util.h: Likewise.
	* runtime/ofldbegin.cpp: Likewise.
	* runtime/ofldend.cpp: Likewise.
	* runtime/orsl-lite/include/orsl-lite.h: Likewise.
	* runtime/orsl-lite/lib/orsl-lite.c: Likewise.
	* runtime/use_mpss2.txt: Remove.
	* include/coi/common/COIEngine_common.h: Merge from upstream, MPSS
	version 3.7.1
	<http://registrationcenter-download.intel.com/akdlm/irc_nas/9226/
	mpss-3.7.1-linux.tar>.
	* include/coi/common/COIEvent_common.h: Likewise.
	* include/coi/common/COIMacros_common.h: Likewise.
	* include/coi/common/COIPerf_common.h: Likewise.
	* include/coi/common/COIResult_common.h: Likewise.
	* include/coi/common/COISysInfo_common.h: Likewise.
	* include/coi/common/COITypes_common.h: Likewise.
	* include/coi/sink/COIBuffer_sink.h: Likewise.
	* include/coi/sink/COIPipeline_sink.h: Likewise.
	* include/coi/sink/COIProcess_sink.h: Likewise.
	* include/coi/source/COIBuffer_source.h: Likewise.
	* include/coi/source/COIEngine_source.h: Likewise.
	* include/coi/source/COIEvent_source.h: Likewise.
	* include/coi/source/COIPipeline_source.h: Likewise.
	* include/coi/source/COIProcess_source.h: Likewise.
	* include/myo/myo.h: Remove.
	* include/myo/myoimpl.h: Remove.
	* include/myo/myotypes.h: Remove.
	* plugin/Makefile.am (AM_LDFLAGS): Remove -lmyo-service.
	* plugin/Makefile.in: Regenerate.
	* plugin/libgomp-plugin-intelmic.cpp (LD_LIBRARY_PATH_ENV): Remove.
	(MIC_LD_LIBRARY_PATH_ENV): Remove.
	(init): Do not set MIC_LD_LIBRARY_PATH.  Now liboffloadmic uses only
	LD_LIBRARY_PATH.
	* plugin/offload_target_main.cpp: Update copyright years.
	* runtime/emulator/coi_common.h: Likewise.
	* runtime/emulator/coi_device.cpp: Likewise.
	* runtime/emulator/coi_device.h: Likewise.
	* runtime/emulator/coi_host.cpp: Likewise.
	(COIBufferCreate): Allow COI_BUFFER_OPENCL.
	(COIEngineGetInfo): Return COI_DEVICE_KNL instead of COI_ISA_x86_64.
	* runtime/emulator/coi_host.h: Update copyright years.
	* runtime/emulator/coi_version_asm.h: Likewise.
	* runtime/emulator/coi_version_linker_script.map: Likewise.
	* runtime/emulator/myo_client.cpp: Remove.
	* runtime/emulator/myo_service.cpp: Remove.
	* runtime/emulator/myo_service.h: Remove.
	* runtime/emulator/myo_version_asm.h: Remove.
	* runtime/emulator/myo_version_linker_script.map: Remove.

From-SVN: r238603
2016-07-21 18:41:27 +00:00

1268 lines
39 KiB
C++

/*
Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <omp.h>
//#include <stdlib.h>
//#include "offload.h"
#include "compiler_if_host.h"
// OpenMP API
void omp_set_default_device(int num) __GOMP_NOTHROW
{
if (num >= 0) {
__omp_device_num = num;
}
}
int omp_get_default_device(void) __GOMP_NOTHROW
{
return __omp_device_num;
}
int omp_get_num_devices() __GOMP_NOTHROW
{
__offload_init_library();
return mic_engines_total;
}
// OpenMP 4.5 APIs
// COI supports 3-dim multiD transfers
#define MAX_ARRAY_RANK 3
int omp_get_initial_device(
void
) __GOMP_NOTHROW
{
return -1;
}
void* omp_target_alloc(
size_t size,
int device_num
) __GOMP_NOTHROW
{
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_alloc(%lld, %d)\n", size, device_num);
if (device_num < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
void* result = 0;
// malloc on CPU
if (device_num == -1) {
// We do not check for malloc returning NULL because the
// specification of this API includes the possibility of failure.
// The user will check the returned result
result = malloc(size);
return result;
}
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
TARGET_MIC, device_num, 0, NULL, __func__, 0);
if (ofld != 0) {
VarDesc vars[2] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(size);
vars[0].count = 1;
vars[0].ptr = &size;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_out;
vars[1].size = sizeof(result);
vars[1].count = 1;
vars[1].ptr = &result;
OFFLOAD_OFFLOAD(ofld, "omp_target_alloc_target",
0, 2, vars, NULL, 0, 0, 0);
}
return result;
}
void omp_target_free(
void *device_ptr,
int device_num
) __GOMP_NOTHROW
{
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_free(%p, %d)\n", device_ptr, device_num);
if (device_num < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
// free on CPU
if (device_num == -1) {
free(device_ptr);
return;
}
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
TARGET_MIC, device_num, 0, NULL, __func__, 0);
if (ofld) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(device_ptr);
vars[0].count = 1;
vars[0].ptr = &device_ptr;
OFFLOAD_OFFLOAD(ofld, "omp_target_free_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
int omp_target_is_present(
void *ptr,
int device_num
) __GOMP_NOTHROW
{
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_is_present(%p, %d)\n", ptr, device_num);
if (device_num < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
if (device_num == -1) {
return false;
}
// If OpenMP allows wrap-around for device numbers, enable next line
//device_num %= mic_engines_total;
// lookup existing association in pointer table
PtrData* ptr_data = mic_engines[device_num].find_ptr_data(ptr);
if (ptr_data == 0) {
OFFLOAD_TRACE(3, "Address %p is not mapped on device %d\n",
ptr, device_num);
return false;
}
OFFLOAD_TRACE(3, "Address %p found mapped on device %d\n",
ptr, device_num);
return true;
}
int omp_target_memcpy(
void *dst,
void *src,
size_t length,
size_t dst_offset,
size_t src_offset,
int dst_device,
int src_device
) __GOMP_NOTHROW
{
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_memcpy(%p, %p, %lld, %lld, %lld, %d, %d)\n",
dst, src, length, dst_offset, src_offset, dst_device, src_device);
if (dst_device < -1 || src_device < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
char* srcp = (char *)src + src_offset;
char* dstp = (char *)dst + dst_offset;
if (src_device == -1) {
// Source is CPU
if (dst_device == -1) {
// CPU -> CPU
memcpy(dstp, srcp, length);
return 0;
} else {
// CPU -> MIC
// COIBufferWrite
// If OpenMP allows wrap-around for device numbers, enable next line
//dst_device %= mic_engines_total;
OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", dstp);
COIBUFFER mic_buf;
COIRESULT res = COI::BufferCreateFromMemory(length,
COI_BUFFER_NORMAL, COI_SINK_MEMORY, dstp,
1, &mic_engines[dst_device].get_process(),
&mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
return 1;
}
res = COI::BufferWrite(mic_buf, 0, srcp, length,
COI_COPY_UNSPECIFIED, 0, 0, 0);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_write, res);
return 1;
}
res = COI::BufferDestroy(mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_destroy, res);
return 1;
}
return 0;
}
} else {
// Source is device
if (dst_device == -1) {
// MIC -> CPU
// COIBufferRead
// If OpenMP allows wrap-around for device numbers, enable next line
//src_device %= mic_engines_total;
OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", srcp);
COIBUFFER mic_buf;
COIRESULT res = COI::BufferCreateFromMemory(length,
COI_BUFFER_NORMAL, COI_SINK_MEMORY, srcp,
1, &mic_engines[src_device].get_process(),
&mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
return 1;
}
res = COI::BufferRead(mic_buf, 0, dstp, length,
COI_COPY_UNSPECIFIED, 0, 0, 0);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_read, res);
return 1;
}
res = COI::BufferDestroy(mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_destroy, res);
return 1;
}
return 0;
} else {
// some MIC -> some MIC
if (src_device == dst_device) {
// MIC local copy will be done as remote memcpy
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
0, NULL, __func__, 0);
if (ofld) {
VarDesc vars[3] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(dstp);
vars[0].count = 1;
vars[0].ptr = &dstp;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_in;
vars[1].size = sizeof(srcp);
vars[1].count = 1;
vars[1].ptr = &srcp;
vars[2].type.src = c_data;
vars[2].type.dst = c_data;
vars[2].direction.bits = c_parameter_in;
vars[2].size = sizeof(length);
vars[2].count = 1;
vars[2].ptr = &length;
OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_target",
0, 3, vars, NULL, 0, 0, 0);
return 0;
} else {
return 1;
}
} else {
// MICx -> MICy
// Allocate CPU buffer
char *cpu_mem = (char *)malloc(length);
if (cpu_mem == 0) {
LIBOFFLOAD_ERROR(c_malloc);
return 1;
}
int retval = 1;
if (omp_target_memcpy(
cpu_mem, srcp, length, 0, 0, -1, src_device) == 0) {
retval = omp_target_memcpy(
dstp, cpu_mem, length, 0, 0, dst_device, -1);
}
free(cpu_mem);
return retval;
}
}
}
}
static size_t bytesize_at_this_dimension(
size_t element_size,
int num_dims,
const size_t* dimensions
)
{
if (num_dims > 1) {
return dimensions[1] *
bytesize_at_this_dimension(
element_size, num_dims-1, dimensions+1);
} else {
return element_size;
}
}
static void memcpy_rect(
char *dst,
char *src,
size_t element_size,
int num_dims,
const size_t *volume,
const size_t *dst_offsets,
const size_t *src_offsets,
const size_t *dst_dimensions,
const size_t *src_dimensions
)
{
if (num_dims > 1) {
int count = volume[0];
int dst_index = dst_offsets[0];
int src_index = src_offsets[0];
size_t dst_element_size =
bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
size_t src_element_size =
bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
for (; count>0; dst_index++, src_index++, count--) {
memcpy_rect(dst+dst_element_size*dst_index,
src+src_element_size*src_index,
element_size, num_dims-1, volume+1,
dst_offsets+1, src_offsets+1,
dst_dimensions+1, src_dimensions+1);
}
} else {
memcpy(dst+dst_offsets[0]*element_size,
src+src_offsets[0]*element_size,
element_size * volume[0]);
}
}
int omp_target_memcpy_rect(
void *dst_,
void *src_,
size_t element_size,
int num_dims,
const size_t *volume,
const size_t *dst_offsets,
const size_t *src_offsets,
const size_t *dst_dimensions,
const size_t *src_dimensions,
int dst_device,
int src_device
) __GOMP_NOTHROW
{
char *dst = (char *)dst_;
char *src = (char *)src_;
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d, "
"%p, %p, %p, %p, %p, %d, %d)\n",
dst, src, element_size, num_dims,
volume, dst_offsets, src_offsets,
dst_dimensions, src_dimensions, dst_device, src_device);
// MAX_ARRAY_RANK dimensions are supported
if (dst == 0 && src == 0) {
return MAX_ARRAY_RANK;
}
if (num_dims < 1 || num_dims > MAX_ARRAY_RANK ||
element_size < 1 ||
volume == 0 || dst_offsets == 0 || src_offsets == 0 ||
dst_dimensions == 0 || src_dimensions == 0) {
return 1;
}
if (dst_device < -1 || src_device < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
if (src_device == -1) {
// Source is CPU
if (dst_device == -1) {
// CPU -> CPU
memcpy_rect((char*)dst, (char*)src, element_size, num_dims, volume,
dst_offsets, src_offsets,
dst_dimensions, src_dimensions);
return 0;
} else {
// CPU -> MIC
// COIBufferWriteMultiD
struct arr_desc dst_desc;
struct arr_desc src_desc;
dst_desc.base = (int64_t)dst;
dst_desc.rank = num_dims;
src_desc.base = (int64_t)src;
src_desc.rank = num_dims;
for (int i=0; i<num_dims; i++)
{
dst_desc.dim[i].size = bytesize_at_this_dimension(
element_size,
num_dims - i,
dst_dimensions + i);
dst_desc.dim[i].lindex = 0;
dst_desc.dim[i].lower = dst_offsets[i];
dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
dst_desc.dim[i].stride = 1;
src_desc.dim[i].size = bytesize_at_this_dimension(
element_size,
num_dims - i,
src_dimensions + i);
src_desc.dim[i].lindex = 0;
src_desc.dim[i].lower = src_offsets[i];
src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
src_desc.dim[i].stride = 1;
}
__arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
__arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
// If OpenMP allows wrap-around for device numbers, enable next line
//dst_device %= mic_engines_total;
// Compute MIC buffer size
size_t dst_length = dst_dimensions[0] * bytesize_at_this_dimension(
element_size,
num_dims,
dst_dimensions);
OFFLOAD_TRACE(3,
"Creating buffer from sink memory %llx of size %lld\n",
dst, dst_length);
COIBUFFER mic_buf;
COIRESULT res = COI::BufferCreateFromMemory(dst_length,
COI_BUFFER_NORMAL, COI_SINK_MEMORY, dst,
1, &mic_engines[dst_device].get_process(),
&mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
return 1;
}
res = COI::BufferWriteMultiD(mic_buf,
mic_engines[dst_device].get_process(),
0, &dst_desc, &src_desc,
COI_COPY_UNSPECIFIED, 0, 0, 0);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_write, res);
return 1;
}
res = COI::BufferDestroy(mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_destroy, res);
return 1;
}
return 0;
}
} else {
// Source is device
if (dst_device == -1) {
// COIBufferReadMultiD
struct arr_desc dst_desc;
struct arr_desc src_desc;
dst_desc.base = (int64_t)dst;
dst_desc.rank = num_dims;
src_desc.base = (int64_t)src;
src_desc.rank = num_dims;
for (int i=0; i<num_dims; i++)
{
dst_desc.dim[i].size = bytesize_at_this_dimension(
element_size,
num_dims - i,
dst_dimensions + i);
dst_desc.dim[i].lindex = 0;
dst_desc.dim[i].lower = dst_offsets[i];
dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
dst_desc.dim[i].stride = 1;
src_desc.dim[i].size = bytesize_at_this_dimension(
element_size,
num_dims - i,
src_dimensions + i);
src_desc.dim[i].lindex = 0;
src_desc.dim[i].lower = src_offsets[i];
src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
src_desc.dim[i].stride = 1;
}
__arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
__arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
// If OpenMP allows wrap-around for device numbers, enable next line
//src_device %= mic_engines_total;
// Compute MIC buffer size
size_t src_length = src_dimensions[0] * bytesize_at_this_dimension(
element_size,
num_dims,
src_dimensions);
OFFLOAD_TRACE(3,
"Creating buffer from sink memory %llx of size %lld\n",
src, src_length);
COIBUFFER mic_buf;
COIRESULT res = COI::BufferCreateFromMemory(src_length,
COI_BUFFER_NORMAL, COI_SINK_MEMORY, src,
1, &mic_engines[src_device].get_process(),
&mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
return 1;
}
res = COI::BufferReadMultiD(mic_buf, 0,
&dst_desc, &src_desc,
COI_COPY_UNSPECIFIED, 0, 0, 0);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_write, res);
return 1;
}
res = COI::BufferDestroy(mic_buf);
if (res != COI_SUCCESS) {
LIBOFFLOAD_ERROR(c_buf_destroy, res);
return 1;
}
return 0;
} else {
// some MIC -> some MIC
if (src_device == dst_device) {
// MIC local copy will be done as remote memcpy_rect
struct parameters {
void *dst;
void *src;
size_t element_size;
int num_dims;
size_t array_info[MAX_ARRAY_RANK*5];
} parameters = {dst, src, element_size, num_dims};
int result;
for (int i=0; i<num_dims; i++)
{
parameters.array_info[i] = volume[i];
parameters.array_info[i+num_dims] = dst_offsets[i];
parameters.array_info[i+num_dims*2] = src_offsets[i];
parameters.array_info[i+num_dims*3] = dst_dimensions[i];
parameters.array_info[i+num_dims*4] = src_dimensions[i];
}
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
0, NULL, __func__, 0);
if (ofld) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(parameters) -
(MAX_ARRAY_RANK - num_dims) *
5 * sizeof(size_t);
vars[0].count = 1;
vars[0].ptr = &parameters;
OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_rect_target",
0, 1, vars, NULL, 0, 0, 0);
return 0;
} else {
return 1;
}
} else {
// MICx -> MICy
// Compute transfer byte-count
size_t dst_length = element_size;
for (int i=0; i<num_dims; i++) {
dst_length *= volume[i];
}
// Allocate CPU buffer
char *cpu_mem = (char *)malloc(dst_length);
if (cpu_mem == 0) {
LIBOFFLOAD_ERROR(c_malloc);
return 1;
}
// Create CPU offset and dimension arrays
// The CPU array collects the data in a contiguous block
size_t cpu_offsets[MAX_ARRAY_RANK];
size_t cpu_dimensions[MAX_ARRAY_RANK];
for (int i=0; i<num_dims; i++) {
cpu_offsets[i] = 0;
cpu_dimensions[i] = volume[i];
}
int retval = 1;
if (omp_target_memcpy_rect(
cpu_mem, src, element_size, num_dims, volume,
cpu_offsets, src_offsets,
cpu_dimensions, src_dimensions,
-1, src_device) == 0) {
retval = omp_target_memcpy_rect(
dst, cpu_mem, element_size, num_dims, volume,
dst_offsets, cpu_offsets,
dst_dimensions, cpu_dimensions,
dst_device, -1);
}
free(cpu_mem);
return retval;
}
}
}
}
// host_ptr is key in table that yields association on device
// A COIBUFFER of specified size is created from the memory at
// device_ptr+device_offset on device_num
int omp_target_associate_ptr(
void *host_ptr,
void *device_ptr,
size_t size,
size_t device_offset,
int device_num
) __GOMP_NOTHROW
{
COIRESULT res;
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_associate_ptr(%p, %p, %lld, %lld, %d)\n",
host_ptr, device_ptr, size, device_offset, device_num);
if (device_num < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
// Associating to CPU is treated as failure
if (device_num == -1) {
return 1;
}
// An incorrect size is treated as failure
if (size < 0) {
return 1;
}
// If OpenMP allows wrap-around for device numbers, enable next line
//Engine& device = mic_engines[device_num % mic_engines_total];
Engine& device = mic_engines[device_num];
// Does host pointer have association already?
// lookup existing association in pointer table
PtrData* ptr_data = device.find_ptr_data(host_ptr);
if (ptr_data != 0) {
OFFLOAD_TRACE(3, "Address %p is already mapped on device %d\n",
host_ptr, device_num);
// Is current device pointer and offset same as existing?
if ((void*)ptr_data->mic_addr == device_ptr &&
(size_t)ptr_data->alloc_disp == device_offset) {
return 0;
} else {
return 1;
}
}
// Create association
OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
host_ptr, size);
bool is_new;
ptr_data = device.insert_ptr_data(host_ptr, size, is_new);
ptr_data->is_omp_associate = true;
// create CPU buffer
OFFLOAD_TRACE(3,
"Creating buffer from source memory %p, length %lld\n",
host_ptr, size);
// result is not checked because we can continue without cpu
// buffer. In this case we will use COIBufferRead/Write
// instead of COIBufferCopy.
COI::BufferCreateFromMemory(size,
COI_BUFFER_OPENCL,
0,
host_ptr,
1,
&device.get_process(),
&ptr_data->cpu_buf);
// create MIC buffer
OFFLOAD_TRACE(3,
"Creating buffer from sink memory: addr %p, size %lld\n",
(char *)device_ptr + device_offset, size);
res = COI::BufferCreateFromMemory(size,
COI_BUFFER_NORMAL,
COI_SINK_MEMORY,
device_ptr,
1,
&device.get_process(),
&ptr_data->mic_buf);
if (res != COI_SUCCESS) {
ptr_data->alloc_ptr_data_lock.unlock();
return 1;
}
// make buffer valid on the device.
res = COI::BufferSetState(ptr_data->mic_buf,
device.get_process(),
COI_BUFFER_VALID,
COI_BUFFER_NO_MOVE,
0, 0, 0);
if (res != COI_SUCCESS) {
ptr_data->alloc_ptr_data_lock.unlock();
return 1;
}
res = COI::BufferSetState(ptr_data->mic_buf,
COI_PROCESS_SOURCE,
COI_BUFFER_INVALID,
COI_BUFFER_NO_MOVE,
0, 0, 0);
if (res != COI_SUCCESS) {
ptr_data->alloc_ptr_data_lock.unlock();
return 1;
}
ptr_data->alloc_disp = device_offset;
ptr_data->alloc_ptr_data_lock.unlock();
return 0;
}
int omp_target_disassociate_ptr(
void *host_ptr,
int device_num
) __GOMP_NOTHROW
{
COIRESULT res;
__offload_init_library();
OFFLOAD_TRACE(2, "omp_target_disassociate_ptr(%p, %d)\n",
host_ptr, device_num);
if (device_num < -1) {
LIBOFFLOAD_ERROR(c_invalid_device_number);
exit(1);
}
// Dissociating from CPU is treated as failure
if (device_num == -1) {
return 1;
}
// If OpenMP allows wrap-around for device numbers, enable next line
//Engine& device = mic_engines[device_num % mic_engines_total];
Engine& device = mic_engines[device_num];
// Lookup existing association in pointer table
PtrData* ptr_data = device.find_ptr_data(host_ptr);
// Attempt to disassociate unassociated pointer is a failure
if (ptr_data == 0) {
return 1;
}
// Destroy buffers
if (ptr_data->cpu_buf != 0) {
OFFLOAD_TRACE(3, "Destroying CPU buffer %p\n", ptr_data->cpu_buf);
COI::BufferDestroy(ptr_data->cpu_buf);
}
if (ptr_data->mic_buf != 0) {
OFFLOAD_TRACE(3, "Destroying MIC buffer %p\n", ptr_data->mic_buf);
COI::BufferDestroy(ptr_data->mic_buf);
}
// Remove association from map
OFFLOAD_TRACE(3, "Removing association for addr %p\n",
ptr_data->cpu_addr.start());
device.remove_ptr_data(ptr_data->cpu_addr.start());
return 0;
}
// End of OpenMP 4.5 APIs
// OpenMP API wrappers
static void omp_set_int_target(
TARGET_TYPE target_type,
int target_number,
int setting,
const char* f_name
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
f_name, 0);
if (ofld) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(int);
vars[0].count = 1;
vars[0].ptr = &setting;
OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
}
}
static int omp_get_int_target(
TARGET_TYPE target_type,
int target_number,
const char * f_name
)
{
int setting = 0;
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
f_name, 0);
if (ofld) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_out;
vars[0].size = sizeof(int);
vars[0].count = 1;
vars[0].ptr = &setting;
OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
}
return setting;
}
void omp_set_num_threads_target(
TARGET_TYPE target_type,
int target_number,
int num_threads
)
{
omp_set_int_target(target_type, target_number, num_threads,
"omp_set_num_threads_target");
}
int omp_get_max_threads_target(
TARGET_TYPE target_type,
int target_number
)
{
return omp_get_int_target(target_type, target_number,
"omp_get_max_threads_target");
}
int omp_get_num_procs_target(
TARGET_TYPE target_type,
int target_number
)
{
return omp_get_int_target(target_type, target_number,
"omp_get_num_procs_target");
}
void omp_set_dynamic_target(
TARGET_TYPE target_type,
int target_number,
int num_threads
)
{
omp_set_int_target(target_type, target_number, num_threads,
"omp_set_dynamic_target");
}
int omp_get_dynamic_target(
TARGET_TYPE target_type,
int target_number
)
{
return omp_get_int_target(target_type, target_number,
"omp_get_dynamic_target");
}
void omp_set_nested_target(
TARGET_TYPE target_type,
int target_number,
int nested
)
{
omp_set_int_target(target_type, target_number, nested,
"omp_set_nested_target");
}
int omp_get_nested_target(
TARGET_TYPE target_type,
int target_number
)
{
return omp_get_int_target(target_type, target_number,
"omp_get_nested_target");
}
void omp_set_schedule_target(
TARGET_TYPE target_type,
int target_number,
omp_sched_t kind,
int modifier
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[2] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(omp_sched_t);
vars[0].count = 1;
vars[0].ptr = &kind;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_in;
vars[1].size = sizeof(int);
vars[1].count = 1;
vars[1].ptr = &modifier;
OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
0, 2, vars, NULL, 0, 0, 0);
}
}
void omp_get_schedule_target(
TARGET_TYPE target_type,
int target_number,
omp_sched_t *kind,
int *modifier
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[2] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_out;
vars[0].size = sizeof(omp_sched_t);
vars[0].count = 1;
vars[0].ptr = kind;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_out;
vars[1].size = sizeof(int);
vars[1].count = 1;
vars[1].ptr = modifier;
OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
0, 2, vars, NULL, 0, 0, 0);
}
}
// lock API functions
void omp_init_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_out;
vars[0].size = sizeof(omp_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_destroy_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(omp_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_set_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_unset_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
int omp_test_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_lock_target_t *lock
)
{
int result = 0;
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[2] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_out;
vars[1].size = sizeof(int);
vars[1].count = 1;
vars[1].ptr = &result;
OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
0, 2, vars, NULL, 0, 0, 0);
}
return result;
}
// nested lock API functions
void omp_init_nest_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_nest_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_out;
vars[0].size = sizeof(omp_nest_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_destroy_nest_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_nest_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_in;
vars[0].size = sizeof(omp_nest_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_set_nest_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_nest_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_nest_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
void omp_unset_nest_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_nest_lock_target_t *lock
)
{
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[1] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_nest_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
0, 1, vars, NULL, 0, 0, 0);
}
}
int omp_test_nest_lock_target(
TARGET_TYPE target_type,
int target_number,
omp_nest_lock_target_t *lock
)
{
int result = 0;
OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
__func__, 0);
if (ofld != 0) {
VarDesc vars[2] = {0};
vars[0].type.src = c_data;
vars[0].type.dst = c_data;
vars[0].direction.bits = c_parameter_inout;
vars[0].size = sizeof(omp_nest_lock_target_t);
vars[0].count = 1;
vars[0].ptr = lock;
vars[1].type.src = c_data;
vars[1].type.dst = c_data;
vars[1].direction.bits = c_parameter_out;
vars[1].size = sizeof(int);
vars[1].count = 1;
vars[1].ptr = &result;
OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
0, 2, vars, NULL, 0, 0, 0);
}
return result;
}