2014-11-13 15:03:17 +01:00
|
|
|
/*
|
2016-07-21 20:41:27 +02:00
|
|
|
Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived
|
|
|
|
from this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*! \file
|
|
|
|
\brief The parts of the runtime library used only on the host
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef OFFLOAD_HOST_H_INCLUDED
|
|
|
|
#define OFFLOAD_HOST_H_INCLUDED
|
|
|
|
|
|
|
|
#ifndef TARGET_WINNT
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif // TARGET_WINNT
|
|
|
|
#include "offload_common.h"
|
|
|
|
#include "offload_util.h"
|
|
|
|
#include "offload_engine.h"
|
|
|
|
#include "offload_env.h"
|
|
|
|
#include "offload_orsl.h"
|
|
|
|
#include "coi/coi_client.h"
|
|
|
|
|
|
|
|
// MIC engines.
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern Engine* mic_engines;
|
|
|
|
DLL_LOCAL extern uint32_t mic_engines_total;
|
|
|
|
|
|
|
|
// DMA channel count used by COI and set via
|
|
|
|
// OFFLOAD_DMA_CHANNEL_COUNT environment variable
|
|
|
|
DLL_LOCAL extern uint32_t mic_dma_channel_count;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
//! The target image is packed as follows.
|
|
|
|
/*! 1. 8 bytes containing the size of the target binary */
|
|
|
|
/*! 2. a null-terminated string which is the binary name */
|
|
|
|
/*! 3. <size> number of bytes that are the contents of the image */
|
|
|
|
/*! The address of symbol __offload_target_image
|
|
|
|
is the address of this structure. */
|
|
|
|
struct Image {
|
|
|
|
int64_t size; //!< Size in bytes of the target binary name and contents
|
|
|
|
char data[]; //!< The name and contents of the target image
|
|
|
|
};
|
|
|
|
|
|
|
|
// The offload descriptor.
|
|
|
|
class OffloadDescriptor
|
|
|
|
{
|
|
|
|
public:
|
2015-09-08 17:39:59 +02:00
|
|
|
enum OmpAsyncLastEventType {
|
|
|
|
c_last_not, // not last event
|
|
|
|
c_last_write, // the last event that is write
|
|
|
|
c_last_read, // the last event that is read
|
|
|
|
c_last_runfunc // the last event that is runfunction
|
|
|
|
};
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
OffloadDescriptor(
|
|
|
|
int index,
|
|
|
|
_Offload_status *status,
|
|
|
|
bool is_mandatory,
|
|
|
|
bool is_openmp,
|
|
|
|
OffloadHostTimerData * timer_data
|
|
|
|
) :
|
2015-09-08 17:39:59 +02:00
|
|
|
m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]),
|
2014-11-13 15:03:17 +01:00
|
|
|
m_is_mandatory(is_mandatory),
|
|
|
|
m_is_openmp(is_openmp),
|
|
|
|
m_inout_buf(0),
|
|
|
|
m_func_desc(0),
|
|
|
|
m_func_desc_size(0),
|
2016-07-21 20:41:27 +02:00
|
|
|
m_num_in_dependencies(0),
|
|
|
|
m_p_in_dependencies(0),
|
2014-11-13 15:03:17 +01:00
|
|
|
m_in_deps(0),
|
|
|
|
m_in_deps_total(0),
|
2015-09-08 17:39:59 +02:00
|
|
|
m_in_deps_allocated(0),
|
2014-11-13 15:03:17 +01:00
|
|
|
m_out_deps(0),
|
|
|
|
m_out_deps_total(0),
|
2015-09-08 17:39:59 +02:00
|
|
|
m_out_deps_allocated(0),
|
2014-11-13 15:03:17 +01:00
|
|
|
m_vars(0),
|
|
|
|
m_vars_extra(0),
|
|
|
|
m_status(status),
|
2015-09-08 17:39:59 +02:00
|
|
|
m_timer_data(timer_data),
|
|
|
|
m_out_with_preallocated(false),
|
|
|
|
m_preallocated_alloc(false),
|
|
|
|
m_traceback_called(false),
|
|
|
|
m_stream(-1),
|
2016-07-21 20:41:27 +02:00
|
|
|
m_signal(0),
|
|
|
|
m_has_signal(0),
|
2015-09-08 17:39:59 +02:00
|
|
|
m_omp_async_last_event_type(c_last_not)
|
|
|
|
{
|
|
|
|
m_wait_all_devices = index == -1;
|
|
|
|
}
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
~OffloadDescriptor()
|
|
|
|
{
|
|
|
|
if (m_in_deps != 0) {
|
|
|
|
free(m_in_deps);
|
|
|
|
}
|
|
|
|
if (m_out_deps != 0) {
|
|
|
|
free(m_out_deps);
|
|
|
|
}
|
|
|
|
if (m_func_desc != 0) {
|
|
|
|
free(m_func_desc);
|
|
|
|
}
|
|
|
|
if (m_vars != 0) {
|
|
|
|
free(m_vars);
|
|
|
|
free(m_vars_extra);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool offload(const char *name, bool is_empty,
|
|
|
|
VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
|
|
|
const void **waits, int num_waits, const void **signal,
|
2015-09-08 17:39:59 +02:00
|
|
|
int entry_id, const void *stack_addr,
|
|
|
|
OffloadFlags offload_flags);
|
|
|
|
|
|
|
|
bool offload_finish(bool is_traceback);
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
bool is_signaled();
|
|
|
|
|
|
|
|
OffloadHostTimerData* get_timer_data() const {
|
|
|
|
return m_timer_data;
|
|
|
|
}
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
void set_stream(_Offload_stream stream) {
|
|
|
|
m_stream = stream;
|
|
|
|
}
|
|
|
|
|
|
|
|
_Offload_stream get_stream() {
|
|
|
|
return(m_stream);
|
|
|
|
}
|
|
|
|
|
2016-07-21 20:41:27 +02:00
|
|
|
Engine& get_device() {
|
|
|
|
return m_device;
|
|
|
|
}
|
|
|
|
|
|
|
|
void* get_signal() {
|
|
|
|
return(m_signal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void set_signal(const void* signal) {
|
|
|
|
m_has_signal = 1;
|
|
|
|
m_signal = const_cast<void*>(signal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void cleanup();
|
|
|
|
|
|
|
|
uint32_t m_event_count;
|
|
|
|
bool m_has_signal;
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
private:
|
2015-09-08 17:39:59 +02:00
|
|
|
bool offload_wrap(const char *name, bool is_empty,
|
|
|
|
VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
|
|
|
const void **waits, int num_waits, const void **signal,
|
|
|
|
int entry_id, const void *stack_addr,
|
|
|
|
OffloadFlags offload_flags);
|
|
|
|
bool wait_dependencies(const void **waits, int num_waits,
|
|
|
|
_Offload_stream stream);
|
2014-11-13 15:03:17 +01:00
|
|
|
bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
|
|
|
|
int entry_id, const void *stack_addr);
|
|
|
|
bool setup_misc_data(const char *name);
|
2015-09-08 17:39:59 +02:00
|
|
|
bool send_pointer_data(bool is_async, void* info);
|
2014-11-13 15:03:17 +01:00
|
|
|
bool send_noncontiguous_pointer_data(
|
|
|
|
int i,
|
|
|
|
PtrData* src_buf,
|
|
|
|
PtrData* dst_buf,
|
2015-09-08 17:39:59 +02:00
|
|
|
COIEVENT *event,
|
|
|
|
uint64_t &sent_data,
|
|
|
|
uint32_t in_deps_amount,
|
|
|
|
COIEVENT *in_deps
|
|
|
|
);
|
|
|
|
bool receive_noncontiguous_pointer_data(
|
2014-11-13 15:03:17 +01:00
|
|
|
int i,
|
|
|
|
COIBUFFER dst_buf,
|
2015-09-08 17:39:59 +02:00
|
|
|
COIEVENT *event,
|
|
|
|
uint64_t &received_data,
|
|
|
|
uint32_t in_deps_amount,
|
|
|
|
COIEVENT *in_deps
|
|
|
|
);
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
bool gather_copyin_data();
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
bool compute(void *);
|
2014-11-13 15:03:17 +01:00
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
bool receive_pointer_data(bool is_async, bool first_run, void * info);
|
2014-11-13 15:03:17 +01:00
|
|
|
bool scatter_copyout_data();
|
|
|
|
|
|
|
|
bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
|
2015-09-08 17:39:59 +02:00
|
|
|
int64_t length, bool is_targptr,
|
|
|
|
bool error_does_not_exist = true);
|
2016-07-21 20:41:27 +02:00
|
|
|
|
|
|
|
void find_device_ptr( int64_t* &device_ptr,
|
|
|
|
void *host_ptr);
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
|
2015-09-08 17:39:59 +02:00
|
|
|
int64_t length, int64_t alloc_disp, int align,
|
|
|
|
bool is_targptr, bool is_prealloc, bool pin);
|
|
|
|
bool create_preallocated_buffer(PtrData* ptr_data, void *base);
|
2014-11-13 15:03:17 +01:00
|
|
|
bool init_static_ptr_data(PtrData *ptr_data);
|
|
|
|
bool init_mic_address(PtrData *ptr_data);
|
2016-07-21 20:41:27 +02:00
|
|
|
bool offload_stack_memory_manager(
|
|
|
|
const void * stack_begin,
|
|
|
|
int routine_id,
|
|
|
|
int buf_size,
|
|
|
|
int align,
|
|
|
|
bool thread_specific_function_locals,
|
|
|
|
bool *is_new);
|
|
|
|
char *get_this_threads_cpu_stack_addr(
|
|
|
|
const void * stack_begin,
|
|
|
|
int routine_id,
|
|
|
|
bool thread_specific_function_locals);
|
|
|
|
PtrData *get_this_threads_mic_stack_addr(
|
|
|
|
const void * stack_begin,
|
|
|
|
int routine_id,
|
|
|
|
bool thread_specific_function_locals);
|
2014-11-13 15:03:17 +01:00
|
|
|
bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
|
|
|
|
|
|
|
|
bool gen_var_descs_for_pointer_array(int i);
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
void get_stream_in_dependencies(uint32_t &in_deps_amount,
|
|
|
|
COIEVENT* &in_deps);
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
void report_coi_error(error_types msg, COIRESULT res);
|
|
|
|
_Offload_result translate_coi_error(COIRESULT res) const;
|
2016-07-21 20:41:27 +02:00
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
void setup_omp_async_info();
|
2016-07-21 20:41:27 +02:00
|
|
|
|
|
|
|
void setup_use_device_ptr(int i);
|
|
|
|
|
|
|
|
void register_event_call_back(void (*)(
|
|
|
|
COIEVENT,
|
|
|
|
const COIRESULT,
|
|
|
|
const void*),
|
|
|
|
const COIEVENT *event,
|
|
|
|
const void *info);
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
void register_omp_event_call_back(const COIEVENT *event, const void *info);
|
2016-07-21 20:41:27 +02:00
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
private:
|
|
|
|
typedef std::list<COIBUFFER> BufferList;
|
|
|
|
|
|
|
|
// extra data associated with each variable descriptor
|
|
|
|
struct VarExtra {
|
|
|
|
PtrData* src_data;
|
|
|
|
PtrData* dst_data;
|
|
|
|
AutoData* auto_data;
|
|
|
|
int64_t cpu_disp;
|
|
|
|
int64_t cpu_offset;
|
2015-09-08 17:39:59 +02:00
|
|
|
void *alloc;
|
2016-07-21 20:41:27 +02:00
|
|
|
union {
|
|
|
|
CeanReadRanges *read_rng_src;
|
|
|
|
NonContigDesc *noncont_desc;
|
|
|
|
};
|
2014-11-13 15:03:17 +01:00
|
|
|
CeanReadRanges *read_rng_dst;
|
|
|
|
int64_t ptr_arr_offset;
|
|
|
|
bool is_arr_ptr_el;
|
2015-09-08 17:39:59 +02:00
|
|
|
OmpAsyncLastEventType omp_last_event_type;
|
2016-07-21 20:41:27 +02:00
|
|
|
int64_t pointer_offset;
|
|
|
|
uint16_t type_src;
|
|
|
|
uint16_t type_dst;
|
2014-11-13 15:03:17 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
template<typename T> class ReadArrElements {
|
|
|
|
public:
|
|
|
|
ReadArrElements():
|
|
|
|
ranges(NULL),
|
|
|
|
el_size(sizeof(T)),
|
|
|
|
offset(0),
|
|
|
|
count(0),
|
|
|
|
is_empty(true),
|
|
|
|
base(NULL)
|
|
|
|
{}
|
|
|
|
|
|
|
|
bool read_next(bool flag)
|
|
|
|
{
|
|
|
|
if (flag != 0) {
|
|
|
|
if (is_empty) {
|
|
|
|
if (ranges) {
|
|
|
|
if (!get_next_range(ranges, &offset)) {
|
|
|
|
// ranges are over
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// all contiguous elements are over
|
|
|
|
else if (count != 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
length_cur = size;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
offset += el_size;
|
|
|
|
}
|
|
|
|
val = (T)get_el_value(base, offset, el_size);
|
|
|
|
length_cur -= el_size;
|
|
|
|
count++;
|
|
|
|
is_empty = length_cur == 0;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
public:
|
|
|
|
CeanReadRanges * ranges;
|
|
|
|
T val;
|
|
|
|
int el_size;
|
|
|
|
int64_t size,
|
|
|
|
offset,
|
|
|
|
length_cur;
|
|
|
|
bool is_empty;
|
|
|
|
int count;
|
|
|
|
char *base;
|
|
|
|
};
|
|
|
|
|
|
|
|
// ptr_data for persistent auto objects
|
|
|
|
PtrData* m_stack_ptr_data;
|
|
|
|
PtrDataList m_destroy_stack;
|
|
|
|
|
|
|
|
// Engine
|
|
|
|
Engine& m_device;
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
// true for offload_wait target(mic) stream(0)
|
|
|
|
bool m_wait_all_devices;
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// if true offload is mandatory
|
|
|
|
bool m_is_mandatory;
|
|
|
|
|
|
|
|
// if true offload has openmp origin
|
|
|
|
const bool m_is_openmp;
|
|
|
|
|
|
|
|
// The Marshaller for the inputs of the offloaded region.
|
|
|
|
Marshaller m_in;
|
|
|
|
|
|
|
|
// The Marshaller for the outputs of the offloaded region.
|
|
|
|
Marshaller m_out;
|
|
|
|
|
|
|
|
// List of buffers that are passed to dispatch call
|
|
|
|
BufferList m_compute_buffers;
|
|
|
|
|
|
|
|
// List of buffers that need to be destroyed at the end of offload
|
|
|
|
BufferList m_destroy_buffers;
|
|
|
|
|
|
|
|
// Variable descriptors
|
|
|
|
VarDesc* m_vars;
|
|
|
|
VarExtra* m_vars_extra;
|
|
|
|
int m_vars_total;
|
|
|
|
|
|
|
|
// Pointer to a user-specified status variable
|
|
|
|
_Offload_status *m_status;
|
|
|
|
|
|
|
|
// Function descriptor
|
|
|
|
FunctionDescriptor* m_func_desc;
|
|
|
|
uint32_t m_func_desc_size;
|
|
|
|
|
|
|
|
// Buffer for transferring copyin/copyout data
|
|
|
|
COIBUFFER m_inout_buf;
|
|
|
|
|
2016-07-21 20:41:27 +02:00
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// Dependencies
|
|
|
|
COIEVENT *m_in_deps;
|
|
|
|
uint32_t m_in_deps_total;
|
2015-09-08 17:39:59 +02:00
|
|
|
uint32_t m_in_deps_allocated;
|
2014-11-13 15:03:17 +01:00
|
|
|
COIEVENT *m_out_deps;
|
|
|
|
uint32_t m_out_deps_total;
|
2016-07-21 20:41:27 +02:00
|
|
|
uint32_t m_out_deps_allocated;
|
|
|
|
|
|
|
|
// 2 variables defines input dependencies for current COI API.
|
|
|
|
// The calls to routines as BufferWrite/PipelineRunFunction/BufferRead
|
|
|
|
// is supposed to have input dependencies.
|
|
|
|
// 2 variables below defines the number and vector of dependencies
|
|
|
|
// in every current moment of offload.
|
|
|
|
// So any phase of offload can use its values as input dependencies
|
|
|
|
// for the COI API that the phase calls.
|
|
|
|
// It means that all phases (of Write, RunFunction,Read) must keep
|
|
|
|
// the variables correct to be used by following phase.
|
|
|
|
// If some consequent offloads are connected (i.e. by the same stream)
|
|
|
|
// the final 2 variables of the offload is used as initial inputs
|
|
|
|
// for the next offload.
|
|
|
|
uint32_t m_num_in_dependencies;
|
|
|
|
COIEVENT *m_p_in_dependencies;
|
2015-09-08 17:39:59 +02:00
|
|
|
|
|
|
|
// Stream
|
|
|
|
_Offload_stream m_stream;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
2016-07-21 20:41:27 +02:00
|
|
|
// Signal
|
|
|
|
void* m_signal;
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// Timer data
|
|
|
|
OffloadHostTimerData *m_timer_data;
|
|
|
|
|
|
|
|
// copyin/copyout data length
|
|
|
|
uint64_t m_in_datalen;
|
|
|
|
uint64_t m_out_datalen;
|
|
|
|
|
|
|
|
// a boolean value calculated in setup_descriptors. If true we need to do
|
|
|
|
// a run function on the target. Otherwise it may be optimized away.
|
|
|
|
bool m_need_runfunction;
|
2015-09-08 17:39:59 +02:00
|
|
|
|
|
|
|
// initialized value of m_need_runfunction;
|
|
|
|
// is used to recognize offload_transfer
|
|
|
|
bool m_initial_need_runfunction;
|
|
|
|
|
|
|
|
// a Boolean value set to true when OUT clauses with preallocated targetptr
|
|
|
|
// is encountered to indicate that call receive_pointer_data needs to be
|
|
|
|
// invoked again after call to scatter_copyout_data.
|
|
|
|
bool m_out_with_preallocated;
|
|
|
|
|
|
|
|
// a Boolean value set to true if an alloc_if(1) is used with preallocated
|
|
|
|
// targetptr to indicate the need to scatter_copyout_data even for
|
|
|
|
// async offload
|
|
|
|
bool m_preallocated_alloc;
|
|
|
|
|
|
|
|
// a Boolean value set to true if traceback routine is called
|
|
|
|
bool m_traceback_called;
|
|
|
|
|
|
|
|
OmpAsyncLastEventType m_omp_async_last_event_type;
|
2014-11-13 15:03:17 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
// Initialization types for MIC
|
|
|
|
enum OffloadInitType {
|
|
|
|
c_init_on_start, // all devices before entering main
|
|
|
|
c_init_on_offload, // single device before starting the first offload
|
|
|
|
c_init_on_offload_all // all devices before starting the first offload
|
|
|
|
};
|
|
|
|
|
2015-09-08 17:39:59 +02:00
|
|
|
// Determines if MIC code is an executable or a shared library
|
|
|
|
extern "C" bool __offload_target_image_is_executable(const void *target_image);
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// Initializes library and registers specified offload image.
|
2015-09-08 17:39:59 +02:00
|
|
|
extern "C" bool __offload_register_image(const void* image);
|
2014-11-13 15:03:17 +01:00
|
|
|
extern "C" void __offload_unregister_image(const void* image);
|
|
|
|
|
omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_* internal calls - 0 if...
gcc/
2015-11-14 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_*
internal calls - 0 if ordered simd and 1 for ordered threads simd.
* tree-vectorizer.c (adjust_simduid_builtins): If GOMP_SIMD_ORDERED_*
argument is 1, replace it with GOMP_ordered_* call instead of removing
it.
gcc/c/
2015-11-14 Jakub Jelinek <jakub@redhat.com>
* c-typeck.c (c_finish_omp_clauses): Don't mark
GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable.
gcc/cp/
2015-11-14 Jakub Jelinek <jakub@redhat.com>
* semantics.c (finish_omp_clauses): Don't mark
GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable.
libgomp/
2015-11-14 Jakub Jelinek <jakub@redhat.com>
Aldy Hernandez <aldyh@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
* ordered.c (gomp_doacross_init, GOMP_doacross_post,
GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post,
GOMP_doacross_ull_wait): For GFS_GUIDED don't divide number of
iterators or IV by chunk size.
* parallel.c (gomp_resolve_num_threads): Don't assume that
if thr->ts.team is non-NULL, then pool must be non-NULL.
* libgomp-plugin.h (GOMP_PLUGIN_target_task_completion): Declare.
* libgomp.map (GOMP_PLUGIN_1.1): New symbol version, export
GOMP_PLUGIN_target_task_completion.
* Makefile.am (libgomp_la_SOURCES): Add priority_queue.c.
* Makefile.in: Regenerate.
* libgomp.h: Shuffle prototypes and forward definitions around so
priority queues can be defined.
(enum gomp_task_kind): Add GOMP_TASK_ASYNC_RUNNING.
(enum gomp_target_task_state): New enum.
(struct gomp_target_task): Add state, tgt, task and team fields.
(gomp_create_target_task): Change return type to bool, add
state argument.
(gomp_target_task_fn): Change return type to bool.
(struct gomp_device_descr): Add async_run_func.
(struct gomp_task): Remove children, next_child, prev_child,
next_queue, prev_queue, next_taskgroup, prev_taskgroup.
Add pnode field.
(struct gomp_taskgroup): Remove children.
Add taskgroup_queue.
(struct gomp_team): Change task_queue type to a priority queue.
(splay_compare): Define inline.
(priority_queue_offset): New.
(priority_node_to_task): New.
(task_to_priority_node): New.
* oacc-mem.c: Do not include splay-tree.h.
* priority_queue.c: New file.
* priority_queue.h: New file.
* splay-tree.c: Do not include splay-tree.h.
(splay_tree_foreach_internal): New.
(splay_tree_foreach): New.
* splay-tree.h: Become re-entrant if splay_tree_prefix is defined.
(splay_tree_callback): Define typedef.
* target.c (splay_compare): Move to libgomp.h.
(GOMP_target): Don't adjust *thr in any way around running offloaded
task.
(GOMP_target_ext): Likewise. Handle target nowait.
(GOMP_target_update_ext, GOMP_target_enter_exit_data): Check
return value from gomp_create_target_task, if false, fallthrough
as if no dependencies exist.
(gomp_target_task_fn): Change return type to bool, return true
if the task should have another part scheduled later. Handle
target nowait.
(gomp_load_plugin_for_device): Initialize async_run.
* task.c (gomp_init_task): Initialize children_queue.
(gomp_clear_parent_in_list): New.
(gomp_clear_parent_in_tree): New.
(gomp_clear_parent): Handle priorities.
(GOMP_task): Likewise.
(priority_queue_move_task_first,
gomp_target_task_completion, GOMP_PLUGIN_target_task_completion):
New functions.
(gomp_create_target_task): Use priority queues. Change return type
to bool, add state argument, return false if for async
{{enter,exit} data,update} constructs no dependencies need to be
waited for, handle target nowait. Set task->fn to NULL instead of
gomp_target_task_fn.
(verify_children_queue): Remove.
(priority_list_upgrade_task): New.
(priority_queue_upgrade_task): New.
(verify_task_queue): Remove.
(priority_list_downgrade_task): New.
(priority_queue_downgrade_task): New.
(gomp_task_run_pre): Use priority queues.
Abstract code out to priority_queue_downgrade_task.
(gomp_task_run_post_handle_dependers): Use priority queues.
(gomp_task_run_post_remove_parent): Likewise.
(gomp_task_run_post_remove_taskgroup): Likewise.
(gomp_barrier_handle_tasks): Likewise. Handle target nowait target
tasks specially.
(GOMP_taskwait): Likewise.
(gomp_task_maybe_wait_for_dependencies): Likewise. Abstract code to
priority-queue_upgrade_task.
(GOMP_taskgroup_start): Use priority queues.
(GOMP_taskgroup_end): Likewise. Handle target nowait target tasks
specially. If taskgroup is NULL, and thr->ts.level is 0, act as a
barrier.
* taskloop.c (GOMP_taskloop): Handle priorities.
* team.c (gomp_new_team): Call priority_queue_init.
(free_team): Call priority_queue_free.
(gomp_free_thread): Call gomp_team_end if thr->ts.team is artificial
team created for target nowait in implicit parallel region.
(gomp_team_start): For nested check, test thr->ts.level instead of
thr->ts.team != NULL.
* testsuite/libgomp.c/doacross-3.c: New test.
* testsuite/libgomp.c/ordered-5.c: New test.
* testsuite/libgomp.c/priority.c: New test.
* testsuite/libgomp.c/target-31.c: New test.
* testsuite/libgomp.c/target-32.c: New test.
* testsuite/libgomp.c/target-33.c: New test.
* testsuite/libgomp.c/target-34.c: New test.
liboffloadmic/
2015-11-14 Ilya Verbin <ilya.verbin@intel.com>
* runtime/offload_host.cpp (task_completion_callback): New
variable.
(offload_proxy_task_completed_ooo): Call task_completion_callback.
(__offload_register_task_callback): New function.
* runtime/offload_host.h (__offload_register_task_callback): New
declaration.
* plugin/libgomp-plugin-intelmic.cpp (offload): Add async_data
argument, handle async offloading.
(register_main_image): Call register_main_image.
(GOMP_OFFLOAD_init_device, get_target_table, GOMP_OFFLOAD_alloc,
GOMP_OFFLOAD_free, GOMP_OFFLOAD_host2dev, GOMP_OFFLOAD_dev2host,
GOMP_OFFLOAD_dev2dev) Adjust offload callers.
(GOMP_OFFLOAD_async_run): New function.
(GOMP_OFFLOAD_run): Implement using GOMP_OFFLOAD_async_run.
From-SVN: r230381
2015-11-14 19:42:13 +01:00
|
|
|
// Registers asynchronous task completion callback
|
|
|
|
extern "C" void __offload_register_task_callback(void (*cb)(void *));
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// Initializes offload runtime library.
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern int __offload_init_library(void);
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// thread data for associating pipelines with threads
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern pthread_key_t mic_thread_key;
|
|
|
|
|
|
|
|
// location of offload_main executable
|
|
|
|
// To be used if the main application has no offload and is not built
|
|
|
|
// with -offload but dynamic library linked in has offload pragma
|
|
|
|
DLL_LOCAL extern char* mic_device_main;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// Environment variables for devices
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern MicEnvVar mic_env_vars;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// CPU frequency
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern uint64_t cpu_frequency;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
2016-07-21 20:41:27 +02:00
|
|
|
// LD_LIBRARY_PATH for KNC libraries
|
|
|
|
DLL_LOCAL extern char* knc_library_path;
|
|
|
|
|
|
|
|
// LD_LIBRARY_PATH for KNL libraries
|
|
|
|
DLL_LOCAL extern char* knl_library_path;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// stack size for target
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern uint32_t mic_stack_size;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// Preallocated memory size for buffers on MIC
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern uint64_t mic_buffer_size;
|
|
|
|
|
|
|
|
// Preallocated 4K page memory size for buffers on MIC
|
|
|
|
DLL_LOCAL extern uint64_t mic_4k_buffer_size;
|
|
|
|
|
|
|
|
// Preallocated 2M page memory size for buffers on MIC
|
|
|
|
DLL_LOCAL extern uint64_t mic_2m_buffer_size;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// Setting controlling inout proxy
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern bool mic_proxy_io;
|
|
|
|
DLL_LOCAL extern char* mic_proxy_fs_root;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// Threshold for creating buffers with large pages
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern uint64_t __offload_use_2mb_buffers;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// offload initialization type
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern OffloadInitType __offload_init_type;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// Device number to offload to when device is not explicitly specified.
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern int __omp_device_num;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
|
|
|
// target executable
|
2015-09-08 17:39:59 +02:00
|
|
|
DLL_LOCAL extern TargetImage* __target_exe;
|
2014-11-13 15:03:17 +01:00
|
|
|
|
2016-07-21 20:41:27 +02:00
|
|
|
// is true if last loaded image is dll
|
|
|
|
DLL_LOCAL extern bool __current_image_is_dll;
|
|
|
|
// is true if myo library is loaded when dll is loaded
|
|
|
|
DLL_LOCAL extern bool __myo_init_in_so;
|
|
|
|
|
2014-11-13 15:03:17 +01:00
|
|
|
// IDB support
|
|
|
|
|
|
|
|
// Called by the offload runtime after initialization of offload infrastructure
|
|
|
|
// has been completed.
|
|
|
|
extern "C" void __dbg_target_so_loaded();
|
|
|
|
|
|
|
|
// Called by the offload runtime when the offload infrastructure is about to be
|
|
|
|
// shut down, currently at application exit.
|
|
|
|
extern "C" void __dbg_target_so_unloaded();
|
|
|
|
|
|
|
|
// Null-terminated string containing path to the process image of the hosting
|
|
|
|
// application (offload_main)
|
|
|
|
#define MAX_TARGET_NAME 512
|
|
|
|
extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME];
|
|
|
|
|
|
|
|
// Integer specifying the process id
|
|
|
|
extern "C" pid_t __dbg_target_so_pid;
|
|
|
|
|
|
|
|
// Integer specifying the 0-based device number
|
|
|
|
extern "C" int __dbg_target_id;
|
|
|
|
|
|
|
|
// Set to non-zero by the host-side debugger to enable offload debugging
|
|
|
|
// support
|
|
|
|
extern "C" int __dbg_is_attached;
|
|
|
|
|
|
|
|
// Major version of the debugger support API
|
|
|
|
extern "C" const int __dbg_api_major_version;
|
|
|
|
|
|
|
|
// Minor version of the debugger support API
|
|
|
|
extern "C" const int __dbg_api_minor_version;
|
|
|
|
|
|
|
|
#endif // OFFLOAD_HOST_H_INCLUDED
|