2eab96661b
Merge liboffloadmic from upstream, version 20150803. liboffloadmic/ * Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and libmyo-client. liboffloadmic_host loads them dynamically. * Makefile.in: Regenerate. * doc/doxygen/header.tex: Merge from upstream, version 20150803 <https://openmprtl.org/sites/default/files/liboffload_oss_20150803.tgz>. * runtime/cean_util.cpp: Likewise. * runtime/cean_util.h: Likewise. * runtime/coi/coi_client.cpp: Likewise. * runtime/coi/coi_client.h: Likewise. * runtime/coi/coi_server.cpp: Likewise. * runtime/coi/coi_server.h: Likewise. * runtime/compiler_if_host.cpp: Likewise. * runtime/compiler_if_host.h: Likewise. * runtime/compiler_if_target.cpp: Likewise. * runtime/compiler_if_target.h: Likewise. * runtime/dv_util.cpp: Likewise. * runtime/dv_util.h: Likewise. * runtime/liboffload_error.c: Likewise. * runtime/liboffload_error_codes.h: Likewise. * runtime/liboffload_msg.c: Likewise. * runtime/liboffload_msg.h: Likewise. * runtime/mic_lib.f90: Likewise. * runtime/offload.h: Likewise. * runtime/offload_common.cpp: Likewise. * runtime/offload_common.h: Likewise. * runtime/offload_engine.cpp: Likewise. * runtime/offload_engine.h: Likewise. * runtime/offload_env.cpp: Likewise. * runtime/offload_env.h: Likewise. * runtime/offload_host.cpp: Likewise. * runtime/offload_host.h: Likewise. * runtime/offload_iterator.h: Likewise. * runtime/offload_myo_host.cpp: Likewise. * runtime/offload_myo_host.h: Likewise. * runtime/offload_myo_target.cpp: Likewise. * runtime/offload_myo_target.h: Likewise. * runtime/offload_omp_host.cpp: Likewise. * runtime/offload_omp_target.cpp: Likewise. * runtime/offload_orsl.cpp: Likewise. * runtime/offload_orsl.h: Likewise. * runtime/offload_table.cpp: Likewise. * runtime/offload_table.h: Likewise. * runtime/offload_target.cpp: Likewise. * runtime/offload_target.h: Likewise. * runtime/offload_target_main.cpp: Likewise. * runtime/offload_timer.h: Likewise. * runtime/offload_timer_host.cpp: Likewise. * runtime/offload_timer_target.cpp: Likewise. * runtime/offload_trace.cpp: Likewise. * runtime/offload_trace.h: Likewise. * runtime/offload_util.cpp: Likewise. * runtime/offload_util.h: Likewise. * runtime/ofldbegin.cpp: Likewise. * runtime/ofldend.cpp: Likewise. * runtime/orsl-lite/include/orsl-lite.h: Likewise. * runtime/orsl-lite/lib/orsl-lite.c: Likewise. * runtime/use_mpss2.txt: Likewise. * include/coi/common/COIEngine_common.h: Merge from upstream, MPSS version 3.5 <http://registrationcenter.intel.com/irc_nas/7445/mpss-src-3.5.tar>. * include/coi/common/COIEvent_common.h: Likewise. * include/coi/common/COIMacros_common.h: Likewise. * include/coi/common/COIPerf_common.h: Likewise. * include/coi/common/COIResult_common.h: Likewise. * include/coi/common/COISysInfo_common.h: Likewise. * include/coi/common/COITypes_common.h: Likewise. * include/coi/sink/COIBuffer_sink.h: Likewise. * include/coi/sink/COIPipeline_sink.h: Likewise. * include/coi/sink/COIProcess_sink.h: Likewise. * include/coi/source/COIBuffer_source.h: Likewise. * include/coi/source/COIEngine_source.h: Likewise. * include/coi/source/COIEvent_source.h: Likewise. * include/coi/source/COIPipeline_source.h: Likewise. * include/coi/source/COIProcess_source.h: Likewise. * include/myo/myo.h: Likewise. * include/myo/myoimpl.h: Likewise. * include/myo/myotypes.h: Likewise. * plugin/Makefile.am (myo_inc_dir): Remove. (libgomp_plugin_intelmic_la_CPPFLAGS): Do not define MYO_SUPPORT. (AM_CPPFLAGS): Likewise for offload_target_main. * plugin/Makefile.in: Regenerate. * runtime/emulator/coi_common.h: Update copyright years. (OFFLOAD_EMUL_KNC_NUM_ENV): Replace with ... (OFFLOAD_EMUL_NUM_ENV): ... this. (enum cmd_t): Add CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.cpp: Update copyright years. (COIProcessWaitForShutdown): Add space between string constants. Return handle to host in CMD_OPEN_LIBRARY. Support CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.h: Update copyright years. * runtime/emulator/coi_host.cpp: Update copyright years. (knc_engines_num): Replace with ... (num_engines): ... this. (init): Replace OFFLOAD_EMUL_KNC_NUM_ENV with OFFLOAD_EMUL_NUM_ENV. (COIEngineGetCount): Replace COI_ISA_KNC with COI_ISA_MIC, and knc_engines_num with num_engines. (COIEngineGetHandle): Likewise. (COIProcessCreateFromMemory): Add space between string constants. (COIProcessCreateFromFile): New function. (COIProcessLoadLibraryFromMemory): Rename arguments according to COIProcess_source.h. Return handle, received from target. (COIProcessUnloadLibrary): New function. (COIPipelineClearCPUMask): New function. (COIPipelineSetCPUMask): New function. (COIEngineGetInfo): New function. * runtime/emulator/coi_host.h: Update copyright years. * runtime/emulator/coi_version_asm.h: Regenerate. * runtime/emulator/coi_version_linker_script.map: Regenerate. * runtime/emulator/myo_client.cpp: Update copyright years. * runtime/emulator/myo_service.cpp: Update copyright years. (myoArenaRelease): New function. (myoArenaAcquire): New function. (myoArenaAlignedFree): New function. (myoArenaAlignedMalloc): New function. * runtime/emulator/myo_service.h: Update copyright years. * runtime/emulator/myo_version_asm.h: Regenerate. * runtime/emulator/myo_version_linker_script.map: Regenerate. From-SVN: r227532
399 lines
12 KiB
C++
399 lines
12 KiB
C++
/*
|
|
Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of Intel Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#include "cean_util.h"
|
|
#include "offload_common.h"
|
|
|
|
// 1. allocate element of CeanReadRanges type
|
|
// 2. initialized it for reading consequently contiguous ranges
|
|
// described by "ap" argument
|
|
CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
|
|
{
|
|
CeanReadRanges * res;
|
|
|
|
// find the max contiguous range
|
|
int64_t rank = ap->rank - 1;
|
|
int64_t length = ap->dim[rank].size;
|
|
for (; rank >= 0; rank--) {
|
|
if (ap->dim[rank].stride == 1) {
|
|
length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
|
|
if (rank > 0 && length != ap->dim[rank - 1].size) {
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
|
|
(ap->rank - rank) * sizeof(CeanReadDim));
|
|
if (res == NULL)
|
|
LIBOFFLOAD_ERROR(c_malloc);
|
|
|
|
res->arr_desc = const_cast<Arr_Desc*>(ap);
|
|
res->current_number = 0;
|
|
res->range_size = length;
|
|
res->last_noncont_ind = rank;
|
|
|
|
// calculate number of contiguous ranges inside noncontiguous dimensions
|
|
int count = 1;
|
|
bool prev_is_cont = true;
|
|
int64_t offset = 0;
|
|
|
|
for (; rank >= 0; rank--) {
|
|
res->Dim[rank].count = count;
|
|
res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
|
|
count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
|
|
(ap->dim[rank].upper - ap->dim[rank].lower +
|
|
ap->dim[rank].stride) / ap->dim[rank].stride);
|
|
prev_is_cont = false;
|
|
offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
|
|
ap->dim[rank].size;
|
|
}
|
|
res->range_max_number = count;
|
|
res -> ptr = (void*)ap->base;
|
|
res -> init_offset = offset;
|
|
return res;
|
|
}
|
|
|
|
// check if ranges described by 1 argument could be transferred into ranges
|
|
// described by 2-nd one
|
|
bool cean_ranges_match(
|
|
CeanReadRanges * read_rng1,
|
|
CeanReadRanges * read_rng2
|
|
)
|
|
{
|
|
return ( read_rng1 == NULL || read_rng2 == NULL ||
|
|
(read_rng1->range_size % read_rng2->range_size == 0 ||
|
|
read_rng2->range_size % read_rng1->range_size == 0));
|
|
}
|
|
|
|
// Set next offset and length and returns true for next range.
|
|
// Returns false if the ranges are over.
|
|
bool get_next_range(
|
|
CeanReadRanges * read_rng,
|
|
int64_t *offset
|
|
)
|
|
{
|
|
if (++read_rng->current_number > read_rng->range_max_number) {
|
|
read_rng->current_number = 0;
|
|
return false;
|
|
}
|
|
int rank = 0;
|
|
int num = read_rng->current_number - 1;
|
|
int64_t cur_offset = 0;
|
|
int num_loc;
|
|
for (; rank <= read_rng->last_noncont_ind; rank++) {
|
|
num_loc = num / read_rng->Dim[rank].count;
|
|
cur_offset += num_loc * read_rng->Dim[rank].size;
|
|
num = num % read_rng->Dim[rank].count;
|
|
}
|
|
*offset = cur_offset + read_rng->init_offset;
|
|
return true;
|
|
}
|
|
|
|
bool is_arr_desc_contiguous(const Arr_Desc *ap)
|
|
{
|
|
int64_t rank = ap->rank - 1;
|
|
int64_t length = ap->dim[rank].size;
|
|
for (; rank >= 0; rank--) {
|
|
if (ap->dim[rank].stride > 1 &&
|
|
ap->dim[rank].upper - ap->dim[rank].lower != 0) {
|
|
return false;
|
|
}
|
|
else if (length != ap->dim[rank].size) {
|
|
for (; rank >= 0; rank--) {
|
|
if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int64_t cean_get_transf_size(CeanReadRanges * read_rng)
|
|
{
|
|
return(read_rng->range_max_number * read_rng->range_size);
|
|
}
|
|
|
|
static uint64_t last_left, last_right;
|
|
|
|
typedef void (*fpp)(
|
|
const char *spaces,
|
|
uint64_t low,
|
|
uint64_t high,
|
|
int esize,
|
|
bool print_values
|
|
);
|
|
|
|
static void generate_one_range(
|
|
const char *spaces,
|
|
uint64_t lrange,
|
|
uint64_t rrange,
|
|
fpp fp,
|
|
int esize,
|
|
bool print_values
|
|
)
|
|
{
|
|
OFFLOAD_TRACE(3,
|
|
"%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
|
|
spaces, (void*)lrange, (void*)rrange, esize);
|
|
if (last_left == -1) {
|
|
// First range
|
|
last_left = lrange;
|
|
}
|
|
else {
|
|
if (lrange == last_right+1) {
|
|
// Extend previous range, don't print
|
|
}
|
|
else {
|
|
(*fp)(spaces, last_left, last_right, esize, print_values);
|
|
last_left = lrange;
|
|
}
|
|
}
|
|
last_right = rrange;
|
|
}
|
|
|
|
static bool element_is_contiguous(
|
|
uint64_t rank,
|
|
const struct Dim_Desc *ddp
|
|
)
|
|
{
|
|
if (rank == 1) {
|
|
return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
|
|
}
|
|
else {
|
|
return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
|
|
element_is_contiguous(rank-1, ddp++));
|
|
}
|
|
}
|
|
|
|
static void generate_mem_ranges_one_rank(
|
|
const char *spaces,
|
|
uint64_t base,
|
|
uint64_t rank,
|
|
const struct Dim_Desc *ddp,
|
|
fpp fp,
|
|
int esize,
|
|
bool print_values
|
|
)
|
|
{
|
|
uint64_t lindex = ddp->lindex;
|
|
uint64_t lower = ddp->lower;
|
|
uint64_t upper = ddp->upper;
|
|
uint64_t stride = ddp->stride;
|
|
uint64_t size = ddp->size;
|
|
OFFLOAD_TRACE(3,
|
|
"%s "
|
|
"generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
|
|
"lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
|
|
spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
|
|
|
|
if (element_is_contiguous(rank, ddp)) {
|
|
uint64_t lrange, rrange;
|
|
lrange = base + (lower-lindex)*size;
|
|
rrange = lrange + (upper-lower+1)*size - 1;
|
|
generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
|
|
}
|
|
else {
|
|
if (rank == 1) {
|
|
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
|
|
uint64_t lrange, rrange;
|
|
lrange = base + i*size;
|
|
rrange = lrange + size - 1;
|
|
generate_one_range(spaces, lrange, rrange,
|
|
fp, esize, print_values);
|
|
}
|
|
}
|
|
else {
|
|
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
|
|
generate_mem_ranges_one_rank(
|
|
spaces, base+i*size, rank-1, ddp+1,
|
|
fp, esize, print_values);
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void generate_mem_ranges(
|
|
const char *spaces,
|
|
const Arr_Desc *adp,
|
|
bool deref,
|
|
fpp fp,
|
|
bool print_values
|
|
)
|
|
{
|
|
uint64_t esize;
|
|
|
|
OFFLOAD_TRACE(3,
|
|
"%s "
|
|
"generate_mem_ranges(adp=%p, deref=%d, fp)\n",
|
|
spaces, adp, deref);
|
|
last_left = -1;
|
|
last_right = -2;
|
|
|
|
// Element size is derived from last dimension
|
|
esize = adp->dim[adp->rank-1].size;
|
|
|
|
generate_mem_ranges_one_rank(
|
|
// For c_cean_var the base addr is the address of the data
|
|
// For c_cean_var_ptr the base addr is dereferenced to get to the data
|
|
spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
|
|
adp->rank, &adp->dim[0], fp, esize, print_values);
|
|
(*fp)(spaces, last_left, last_right, esize, print_values);
|
|
}
|
|
|
|
// returns offset and length of the data to be transferred
|
|
void __arr_data_offset_and_length(
|
|
const Arr_Desc *adp,
|
|
int64_t &offset,
|
|
int64_t &length
|
|
)
|
|
{
|
|
int64_t rank = adp->rank - 1;
|
|
int64_t size = adp->dim[rank].size;
|
|
int64_t r_off = 0; // offset from right boundary
|
|
|
|
// find the rightmost dimension which takes just part of its
|
|
// range. We define it if the size of left rank is not equal
|
|
// the range's length between upper and lower boungaries
|
|
while (rank > 0) {
|
|
size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
|
|
if (size != adp->dim[rank - 1].size) {
|
|
break;
|
|
}
|
|
rank--;
|
|
}
|
|
|
|
offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
|
|
adp->dim[rank].size;
|
|
|
|
// find gaps both from the left - offset and from the right - r_off
|
|
for (rank--; rank >= 0; rank--) {
|
|
offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
|
|
adp->dim[rank].size;
|
|
r_off += adp->dim[rank].size -
|
|
(adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
|
|
adp->dim[rank + 1].size;
|
|
}
|
|
length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
|
|
adp->dim[0].size - offset - r_off;
|
|
}
|
|
|
|
#if OFFLOAD_DEBUG > 0
|
|
|
|
static void print_range(
|
|
const char *spaces,
|
|
uint64_t low,
|
|
uint64_t high,
|
|
int esize,
|
|
bool print_values
|
|
)
|
|
{
|
|
char buffer[1024];
|
|
char number[32];
|
|
|
|
OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
|
|
spaces, (void*)low, (void*)high, esize);
|
|
|
|
if (console_enabled < 4 || !print_values) {
|
|
return;
|
|
}
|
|
OFFLOAD_TRACE(4, "%s values:\n", spaces);
|
|
int count = 0;
|
|
buffer[0] = '\0';
|
|
while (low <= high)
|
|
{
|
|
switch (esize)
|
|
{
|
|
case 1:
|
|
sprintf(number, "%d ", *((char *)low));
|
|
low += 1;
|
|
break;
|
|
case 2:
|
|
sprintf(number, "%d ", *((short *)low));
|
|
low += 2;
|
|
break;
|
|
case 4:
|
|
sprintf(number, "%d ", *((int *)low));
|
|
low += 4;
|
|
break;
|
|
default:
|
|
sprintf(number, "0x%016x ", *((uint64_t *)low));
|
|
low += 8;
|
|
break;
|
|
}
|
|
strcat(buffer, number);
|
|
count++;
|
|
if (count == 10) {
|
|
OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
|
|
count = 0;
|
|
buffer[0] = '\0';
|
|
}
|
|
}
|
|
if (count != 0) {
|
|
OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
|
|
}
|
|
}
|
|
|
|
void __arr_desc_dump(
|
|
const char *spaces,
|
|
const char *name,
|
|
const Arr_Desc *adp,
|
|
bool deref,
|
|
bool print_values
|
|
)
|
|
{
|
|
OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
|
|
|
|
if (adp != 0) {
|
|
OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
|
|
spaces, adp->base, adp->rank);
|
|
|
|
for (int i = 0; i < adp->rank; i++) {
|
|
OFFLOAD_TRACE(2,
|
|
"%s dimension %d: size=%lld, lindex=%lld, "
|
|
"lower=%lld, upper=%lld, stride=%lld\n",
|
|
spaces, i, adp->dim[i].size, adp->dim[i].lindex,
|
|
adp->dim[i].lower, adp->dim[i].upper,
|
|
adp->dim[i].stride);
|
|
}
|
|
// For c_cean_var the base addr is the address of the data
|
|
// For c_cean_var_ptr the base addr is dereferenced to get to the data
|
|
generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
|
|
}
|
|
}
|
|
#endif // OFFLOAD_DEBUG
|