734 lines
23 KiB
C
734 lines
23 KiB
C
|
/* Cilk_abi.c -*-C++-*-
|
||
|
*
|
||
|
*************************************************************************
|
||
|
*
|
||
|
* @copyright
|
||
|
* Copyright (C) 2010-2013, Intel Corporation
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* @copyright
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions
|
||
|
* are met:
|
||
|
*
|
||
|
* * Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* * Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in
|
||
|
* the documentation and/or other materials provided with the
|
||
|
* distribution.
|
||
|
* * Neither the name of Intel Corporation nor the names of its
|
||
|
* contributors may be used to endorse or promote products derived
|
||
|
* from this software without specific prior written permission.
|
||
|
*
|
||
|
* @copyright
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||
|
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||
|
*
|
||
|
**************************************************************************/
|
||
|
|
||
|
/**
|
||
|
* @file cilk-abi.c
|
||
|
*
|
||
|
* @brief cilk-abi.c implements all of the entrypoints to the Intel Cilk
|
||
|
* Plus runtime.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Define this macro so that compiliation of this file generates the
|
||
|
* non-inlined versions of certain functions in cilk_api.h.
|
||
|
*/
|
||
|
#include "internal/abi.h"
|
||
|
#include "cilk/cilk_api.h"
|
||
|
#include "cilk/cilk_undocumented.h"
|
||
|
#include "cilktools/cilkscreen.h"
|
||
|
|
||
|
#include "global_state.h"
|
||
|
#include "os.h"
|
||
|
#include "os_mutex.h"
|
||
|
#include "bug.h"
|
||
|
#include "local_state.h"
|
||
|
#include "full_frame.h"
|
||
|
#include "pedigrees.h"
|
||
|
#include "scheduler.h"
|
||
|
#include "sysdep.h"
|
||
|
#include "except.h"
|
||
|
#include "cilk_malloc.h"
|
||
|
#include "record-replay.h"
|
||
|
|
||
|
#include <errno.h>
|
||
|
#include <string.h>
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#ifdef _MSC_VER
|
||
|
/* Some versions of icc don't support limits.h on Linux if
|
||
|
gcc 4.3 or newer is installed. */
|
||
|
#include <limits.h>
|
||
|
|
||
|
/* Declare _ReturnAddress compiler intrinsic */
|
||
|
void * _ReturnAddress(void);
|
||
|
#pragma intrinsic(_ReturnAddress)
|
||
|
|
||
|
#include "sysdep-win.h" // Needed for sysdep_init_module()
|
||
|
#endif /* _WIN32 */
|
||
|
|
||
|
#include "metacall_impl.h"
|
||
|
#include "reducer_impl.h"
|
||
|
#include "cilk-ittnotify.h"
|
||
|
#include "cilk-tbb-interop.h"
|
||
|
|
||
|
#define TBB_INTEROP_DATA_DELAYED_UNTIL_BIND (void *)-1
|
||
|
|
||
|
/**
|
||
|
* __cilkrts_bind_thread is a versioned entrypoint. The runtime should be
|
||
|
* exporting copies of __cilkrts_bind_version for the current and all previous
|
||
|
* versions of the ABI.
|
||
|
*
|
||
|
* This macro should always be set to generate a version to match the current
|
||
|
* version; __CILKRTS_ABI_VERSION.
|
||
|
*/
|
||
|
#define BIND_THREAD_RTN __cilkrts_bind_thread_1
|
||
|
|
||
|
static inline
|
||
|
void enter_frame_internal(__cilkrts_stack_frame *sf, uint32_t version)
|
||
|
{
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||
|
if (w == 0) { /* slow path */
|
||
|
w = BIND_THREAD_RTN();
|
||
|
|
||
|
sf->flags = CILK_FRAME_LAST | (version << 24);
|
||
|
CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == CILK_FRAME_LAST);
|
||
|
} else {
|
||
|
sf->flags = (version << 24);
|
||
|
CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == 0);
|
||
|
}
|
||
|
sf->call_parent = w->current_stack_frame;
|
||
|
sf->worker = w;
|
||
|
w->current_stack_frame = sf;
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID __cilkrts_enter_frame(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
enter_frame_internal(sf, 0);
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID __cilkrts_enter_frame_1(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
enter_frame_internal(sf, 1);
|
||
|
sf->reserved = 0;
|
||
|
}
|
||
|
|
||
|
static inline
|
||
|
void enter_frame_fast_internal(__cilkrts_stack_frame *sf, uint32_t version)
|
||
|
{
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker_fast();
|
||
|
sf->flags = version << 24;
|
||
|
sf->call_parent = w->current_stack_frame;
|
||
|
sf->worker = w;
|
||
|
w->current_stack_frame = sf;
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID __cilkrts_enter_frame_fast(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
enter_frame_fast_internal(sf, 0);
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
enter_frame_fast_internal(sf, 1);
|
||
|
sf->reserved = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* A component of the THE protocol. __cilkrts_undo_detach checks whether
|
||
|
* this frame's parent has been stolen. If it hasn't, the frame can return
|
||
|
* normally. If the parent has been stolen, of if we suspect it might be,
|
||
|
* then __cilkrts_leave_frame() needs to call into the runtime.
|
||
|
*
|
||
|
* @note __cilkrts_undo_detach() is comparing the exception pointer against
|
||
|
* the tail pointer. The exception pointer is modified when another worker
|
||
|
* is considering whether it can steal a frame. The head pointer is updated
|
||
|
* to match when the worker lock is taken out and the thief is sure that
|
||
|
* it can complete the steal. If the steal cannot be completed, the thief
|
||
|
* will restore the exception pointer.
|
||
|
*
|
||
|
* @return true if undo-detach failed.
|
||
|
*/
|
||
|
static int __cilkrts_undo_detach(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
__cilkrts_worker *w = sf->worker;
|
||
|
__cilkrts_stack_frame *volatile *t = w->tail;
|
||
|
|
||
|
/* DBGPRINTF("%d - __cilkrts_undo_detach - sf %p\n", w->self, sf); */
|
||
|
|
||
|
--t;
|
||
|
w->tail = t;
|
||
|
/* On x86 the __sync_fetch_and_<op> family includes a
|
||
|
full memory barrier. In theory the sequence in the
|
||
|
second branch of the #if should be faster, but on
|
||
|
most x86 it is not. */
|
||
|
#if defined __i386__ || defined __x86_64__
|
||
|
__sync_fetch_and_and(&sf->flags, ~CILK_FRAME_DETACHED);
|
||
|
#else
|
||
|
__cilkrts_fence(); /* membar #StoreLoad */
|
||
|
sf->flags &= ~CILK_FRAME_DETACHED;
|
||
|
#endif
|
||
|
|
||
|
return __builtin_expect(t < w->exc, 0);
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
__cilkrts_worker *w = sf->worker;
|
||
|
|
||
|
/* DBGPRINTF("%d-%p __cilkrts_leave_frame - sf %p, flags: %x\n", w->self, GetWorkerFiber(w), sf, sf->flags); */
|
||
|
|
||
|
#ifdef _WIN32
|
||
|
/* if leave frame was called from our unwind handler, leave_frame should
|
||
|
proceed no further. */
|
||
|
if (sf->flags & CILK_FRAME_UNWINDING)
|
||
|
{
|
||
|
/* DBGPRINTF("%d - __cilkrts_leave_frame - aborting due to UNWINDING flag\n", w->self); */
|
||
|
|
||
|
// If this is the frame of a spawn helper (indicated by the
|
||
|
// CILK_FRAME_DETACHED flag) we must update the pedigree. The pedigree
|
||
|
// points to nodes allocated on the stack. Failing to update it will
|
||
|
// result in a accvio/segfault if the pedigree is walked. This must happen
|
||
|
// for all spawn helper frames, even if we're processing an exception
|
||
|
if ((sf->flags & CILK_FRAME_DETACHED))
|
||
|
{
|
||
|
update_pedigree_on_leave_frame(w, sf);
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if CILK_LIB_DEBUG
|
||
|
/* ensure the caller popped itself */
|
||
|
CILK_ASSERT(w->current_stack_frame != sf);
|
||
|
#endif
|
||
|
|
||
|
/* The exiting function should have checked for zero flags,
|
||
|
so there is no check for flags == 0 here. */
|
||
|
|
||
|
#if CILK_LIB_DEBUG
|
||
|
if (__builtin_expect(sf->flags & (CILK_FRAME_EXITING|CILK_FRAME_UNSYNCHED), 0))
|
||
|
__cilkrts_bug("W%u: function exiting with invalid flags %02x\n",
|
||
|
w->self, sf->flags);
|
||
|
#endif
|
||
|
|
||
|
/* Must return normally if (1) the active function was called
|
||
|
and not spawned, or (2) the parent has never been stolen. */
|
||
|
if ((sf->flags & CILK_FRAME_DETACHED)) {
|
||
|
/* DBGPRINTF("%d - __cilkrts_leave_frame - CILK_FRAME_DETACHED\n", w->self); */
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
|
||
|
// Pedigree will be updated in __cilkrts_leave_frame. We need the
|
||
|
// pedigree before the update for record/replay
|
||
|
// update_pedigree_on_leave_frame(w, sf);
|
||
|
__cilkrts_return_exception(sf);
|
||
|
/* If return_exception returns the caller is attached.
|
||
|
leave_frame is called from a cleanup (destructor)
|
||
|
for the frame object. The caller will reraise the
|
||
|
exception. */
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// During replay, check whether w was the last worker to continue
|
||
|
replay_wait_for_steal_if_parent_was_stolen(w);
|
||
|
|
||
|
// Attempt to undo the detach
|
||
|
if (__builtin_expect(__cilkrts_undo_detach(sf), 0)) {
|
||
|
// The update of pedigree for leaving the frame occurs
|
||
|
// inside this call if it does not return.
|
||
|
__cilkrts_c_THE_exception_check(w, sf);
|
||
|
}
|
||
|
|
||
|
update_pedigree_on_leave_frame(w, sf);
|
||
|
|
||
|
/* This path is taken when undo-detach wins the race with stealing.
|
||
|
Otherwise this strand terminates and the caller will be resumed
|
||
|
via setjmp at sync. */
|
||
|
if (__builtin_expect(sf->flags & CILK_FRAME_FLAGS_MASK, 0))
|
||
|
__cilkrts_bug("W%u: frame won undo-detach race with flags %02x\n",
|
||
|
w->self, sf->flags);
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#if CILK_LIB_DEBUG
|
||
|
sf->flags |= CILK_FRAME_EXITING;
|
||
|
#endif
|
||
|
|
||
|
if (__builtin_expect(sf->flags & CILK_FRAME_LAST, 0))
|
||
|
__cilkrts_c_return_from_initial(w); /* does return */
|
||
|
else if (sf->flags & CILK_FRAME_STOLEN)
|
||
|
__cilkrts_return(w); /* does return */
|
||
|
|
||
|
/* DBGPRINTF("%d-%p __cilkrts_leave_frame - returning, StackBase: %p\n", w->self, GetWorkerFiber(w)); */
|
||
|
}
|
||
|
|
||
|
/* Caller must have called setjmp. */
|
||
|
CILK_ABI_VOID __cilkrts_sync(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
__cilkrts_worker *w = sf->worker;
|
||
|
/* DBGPRINTF("%d-%p __cilkrts_sync - sf %p\n", w->self, GetWorkerFiber(w), sf); */
|
||
|
if (__builtin_expect(!(sf->flags & CILK_FRAME_UNSYNCHED), 0))
|
||
|
__cilkrts_bug("W%u: double sync %p\n", w->self, sf);
|
||
|
#ifndef _WIN32
|
||
|
if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
|
||
|
__cilkrts_c_sync_except(w, sf);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
__cilkrts_c_sync(w, sf);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __cilkrts_get_sf
|
||
|
*
|
||
|
* Debugging aid to provide access to the current __cilkrts_stack_frame.
|
||
|
*
|
||
|
* Not documented!
|
||
|
*/
|
||
|
|
||
|
CILK_API_VOID_PTR
|
||
|
__cilkrts_get_sf(void)
|
||
|
{
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||
|
if (0 == w)
|
||
|
return NULL;
|
||
|
|
||
|
return w->current_stack_frame;
|
||
|
}
|
||
|
|
||
|
/* Call with global lock held */
|
||
|
static __cilkrts_worker *find_free_worker(global_state_t *g)
|
||
|
{
|
||
|
__cilkrts_worker *w = 0;
|
||
|
int i;
|
||
|
|
||
|
// Scan the non-system workers looking for one which is free so we can
|
||
|
// use it.
|
||
|
for (i = g->P - 1; i < g->total_workers; ++i) {
|
||
|
w = g->workers[i];
|
||
|
CILK_ASSERT(WORKER_SYSTEM != w->l->type);
|
||
|
if (w->l->type == WORKER_FREE) {
|
||
|
w->l->type = WORKER_USER;
|
||
|
w->l->team = w;
|
||
|
return w;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If we ran out of workers, create a new one. It doesn't actually belong
|
||
|
// to the Cilk global state so nobody will ever try to steal from it.
|
||
|
w = (__cilkrts_worker *)__cilkrts_malloc(sizeof(*w));
|
||
|
__cilkrts_cilkscreen_ignore_block(w, w+1);
|
||
|
make_worker(g, -1, w);
|
||
|
w->l->type = WORKER_USER;
|
||
|
w->l->team = w;
|
||
|
return w;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __cilkrts_bind_thread
|
||
|
*
|
||
|
* Exported function to bind a thread to the runtime.
|
||
|
*
|
||
|
* This function name should always have a trailing suffix for the latest ABI
|
||
|
* version. This means that code built with a new compiler will not load
|
||
|
* against an old copy of the runtime.
|
||
|
*
|
||
|
* Symbols for the function called by code compiled with old versions of the
|
||
|
* compiler are created in an OS-specific manner:
|
||
|
* - On Windows the old symbols are defined in the cilk-exports.def linker
|
||
|
* definitions file as aliases of BIND_THREAD_RTN
|
||
|
* - On Linux aliased symbols are created for BIND_THREAD_RTN in this file
|
||
|
* - On MacOS the alternate entrypoints are implemented and simply call
|
||
|
* BIND_THREAD_RTN.
|
||
|
*/
|
||
|
CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
|
||
|
{
|
||
|
__cilkrts_worker *w;
|
||
|
int start_cilkscreen = 0;
|
||
|
#ifdef USE_ITTNOTIFY
|
||
|
static int unique_obj;
|
||
|
#endif
|
||
|
|
||
|
// Cannot set this pointer until after __cilkrts_init_internal() call:
|
||
|
global_state_t* g;
|
||
|
|
||
|
ITT_SYNC_CREATE (&unique_obj, "Initialization");
|
||
|
ITT_SYNC_PREPARE(&unique_obj);
|
||
|
ITT_SYNC_ACQUIRED(&unique_obj);
|
||
|
|
||
|
|
||
|
/* 1: Initialize and start the Cilk runtime */
|
||
|
__cilkrts_init_internal(1);
|
||
|
|
||
|
/*
|
||
|
* 2: Choose a worker for this thread (fail if none left). The table of
|
||
|
* user workers is protected by the global OS mutex lock.
|
||
|
*/
|
||
|
g = cilkg_get_global_state();
|
||
|
global_os_mutex_lock();
|
||
|
if (__builtin_expect(g->work_done, 0))
|
||
|
__cilkrts_bug("Attempt to enter Cilk while Cilk is shutting down");
|
||
|
w = find_free_worker(g);
|
||
|
CILK_ASSERT(w);
|
||
|
|
||
|
__cilkrts_set_tls_worker(w);
|
||
|
__cilkrts_cilkscreen_establish_worker(w);
|
||
|
{
|
||
|
full_frame *ff = __cilkrts_make_full_frame(w, 0);
|
||
|
|
||
|
ff->fiber_self = cilk_fiber_allocate_from_thread();
|
||
|
CILK_ASSERT(ff->fiber_self);
|
||
|
|
||
|
cilk_fiber_set_owner(ff->fiber_self, w);
|
||
|
cilk_fiber_tbb_interop_use_saved_stack_op_info(ff->fiber_self);
|
||
|
|
||
|
CILK_ASSERT(ff->join_counter == 0);
|
||
|
ff->join_counter = 1;
|
||
|
w->l->frame_ff = ff;
|
||
|
w->reducer_map = __cilkrts_make_reducer_map(w);
|
||
|
__cilkrts_set_leftmost_reducer_map(w->reducer_map, 1);
|
||
|
load_pedigree_leaf_into_user_worker(w);
|
||
|
}
|
||
|
|
||
|
// Make sure that the head and tail are reset, and saved_protected_tail
|
||
|
// allows all frames to be stolen.
|
||
|
//
|
||
|
// Note that we must NOT check w->exc, since workers that are trying to
|
||
|
// steal from it will be updating w->exc and we don't own the worker lock.
|
||
|
// It's not worth taking out the lock just for an assertion.
|
||
|
CILK_ASSERT(w->head == w->l->ltq);
|
||
|
CILK_ASSERT(w->tail == w->l->ltq);
|
||
|
CILK_ASSERT(w->protected_tail == w->ltq_limit);
|
||
|
|
||
|
// There may have been an old pending exception which was freed when the
|
||
|
// exception was caught outside of Cilk
|
||
|
w->l->pending_exception = NULL;
|
||
|
|
||
|
w->reserved = NULL;
|
||
|
|
||
|
// If we've already created a scheduling fiber for this worker, we'll just
|
||
|
// reuse it. If w->self < 0, it means that this is an ad-hoc user worker
|
||
|
// not known to the global state. Thus, we need to create a scheduling
|
||
|
// stack only if we don't already have one and w->self >= 0.
|
||
|
if (NULL == w->l->scheduling_fiber && w->self >= 0)
|
||
|
{
|
||
|
START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
|
||
|
// Create a scheduling fiber for this worker.
|
||
|
w->l->scheduling_fiber =
|
||
|
cilk_fiber_allocate_from_heap(CILK_SCHEDULING_STACK_SIZE);
|
||
|
cilk_fiber_reset_state(w->l->scheduling_fiber,
|
||
|
scheduler_fiber_proc_for_user_worker);
|
||
|
cilk_fiber_set_owner(w->l->scheduling_fiber, w);
|
||
|
} STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
|
||
|
}
|
||
|
|
||
|
// If the scheduling fiber is NULL, we've either exceeded our quota for
|
||
|
// fibers or workers or we're out of memory, so we should lose parallelism
|
||
|
// by disallowing stealing.
|
||
|
if (NULL == w->l->scheduling_fiber)
|
||
|
__cilkrts_disallow_stealing(w, NULL);
|
||
|
|
||
|
start_cilkscreen = (0 == w->g->Q);
|
||
|
|
||
|
if (w->self != -1) {
|
||
|
// w->self != -1, means that w is a normal user worker and must be
|
||
|
// accounted for by the global state since other workers can steal from
|
||
|
// it.
|
||
|
|
||
|
// w->self == -1, means that w is an overflow worker and was created on
|
||
|
// demand. I.e., it does not need to be accounted for by the global
|
||
|
// state.
|
||
|
|
||
|
__cilkrts_enter_cilk(w->g);
|
||
|
}
|
||
|
|
||
|
global_os_mutex_unlock();
|
||
|
|
||
|
/* If there's only 1 worker, the counts will be started in
|
||
|
* __cilkrts_scheduler */
|
||
|
if (g->P > 1)
|
||
|
{
|
||
|
START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
|
||
|
START_INTERVAL(w, INTERVAL_WORKING);
|
||
|
}
|
||
|
|
||
|
ITT_SYNC_RELEASING(&unique_obj);
|
||
|
|
||
|
/* Turn on Cilkscreen if this is the first worker. This needs to be done
|
||
|
* when we are NOT holding the os mutex. */
|
||
|
if (start_cilkscreen)
|
||
|
__cilkrts_cilkscreen_enable_instrumentation();
|
||
|
|
||
|
return w;
|
||
|
}
|
||
|
|
||
|
#ifndef _MSC_VER
|
||
|
/*
|
||
|
* Define old version-specific symbols for binding threads (since they exist in
|
||
|
* all Cilk code). These aliases prohibit newly compiled code from loading an
|
||
|
* old version of the runtime. We can handle old code with a new runtime, but
|
||
|
* new code with an old runtime is verboten!
|
||
|
*
|
||
|
* For Windows, the aliased symbol is exported in cilk-exports.def.
|
||
|
*/
|
||
|
#if defined(_DARWIN_C_SOURCE) || defined(__APPLE__)
|
||
|
/**
|
||
|
* Mac OS X: Unfortunately, Darwin doesn't allow aliasing, so we just make a
|
||
|
* call and hope the optimizer does the right thing.
|
||
|
*/
|
||
|
CILK_ABI_WORKER_PTR __cilkrts_bind_thread (void) {
|
||
|
return BIND_THREAD_RTN();
|
||
|
}
|
||
|
#else
|
||
|
|
||
|
/**
|
||
|
* Macro to convert a parameter to a string. Used on Linux or BSD.
|
||
|
*/
|
||
|
#define STRINGIFY(x) #x
|
||
|
|
||
|
/**
|
||
|
* Macro to generate an __attribute__ for an aliased name
|
||
|
*/
|
||
|
#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x))))
|
||
|
|
||
|
/**
|
||
|
* Linux or BSD: Use the alias attribute to make the labels for the versioned
|
||
|
* functions point to the same place in the code as the original. Using
|
||
|
* the two macros is annoying but required.
|
||
|
*/
|
||
|
|
||
|
CILK_ABI_WORKER_PTR __cilkrts_bind_thread(void)
|
||
|
ALIASED_NAME(BIND_THREAD_RTN);
|
||
|
|
||
|
#endif // defined _DARWIN_C_SOURCE || defined __APPLE__
|
||
|
#endif // !defined _MSC_VER
|
||
|
|
||
|
CILK_API_SIZET
|
||
|
__cilkrts_get_stack_size(void) {
|
||
|
return cilkg_get_stack_size();
|
||
|
}
|
||
|
|
||
|
// Method for debugging.
|
||
|
CILK_API_VOID __cilkrts_dump_stats(void)
|
||
|
{
|
||
|
// While the stats aren't protected by the global OS mutex, the table
|
||
|
// of workers is, so take out the global OS mutex while we're doing this
|
||
|
global_os_mutex_lock();
|
||
|
if (cilkg_is_published()) {
|
||
|
global_state_t *g = cilkg_get_global_state();
|
||
|
__cilkrts_dump_stats_to_stderr(g);
|
||
|
}
|
||
|
else {
|
||
|
__cilkrts_bug("Attempting to report Cilk stats before the runtime has started\n");
|
||
|
}
|
||
|
global_os_mutex_unlock();
|
||
|
}
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
CILK_ABI_THROWS_VOID __cilkrts_rethrow(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
__cilkrts_gcc_rethrow(sf);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* __cilkrts_unwatch_stack
|
||
|
*
|
||
|
* Callback for TBB to tell us they don't want to watch the stack anymore
|
||
|
*/
|
||
|
|
||
|
static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data)
|
||
|
{
|
||
|
__cilk_tbb_stack_op_thunk o;
|
||
|
|
||
|
// If the cilk_fiber wasn't available fetch it now
|
||
|
if (TBB_INTEROP_DATA_DELAYED_UNTIL_BIND == data)
|
||
|
{
|
||
|
full_frame *ff;
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||
|
if (NULL == w)
|
||
|
{
|
||
|
// Free any saved stack op information
|
||
|
cilk_fiber_tbb_interop_free_stack_op_info();
|
||
|
|
||
|
return 0; /* Success! */
|
||
|
}
|
||
|
|
||
|
__cilkrts_worker_lock(w);
|
||
|
ff = w->l->frame_ff;
|
||
|
__cilkrts_frame_lock(w,ff);
|
||
|
data = ff->fiber_self;
|
||
|
__cilkrts_frame_unlock(w,ff);
|
||
|
__cilkrts_worker_unlock(w);
|
||
|
}
|
||
|
|
||
|
#if CILK_LIB_DEBUG /* Debug code */
|
||
|
/* Get current stack */
|
||
|
full_frame *ff;
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||
|
__cilkrts_worker_lock(w);
|
||
|
ff = w->l->frame_ff;
|
||
|
__cilkrts_frame_lock(w,ff);
|
||
|
CILK_ASSERT (data == ff->fiber_self);
|
||
|
__cilkrts_frame_unlock(w,ff);
|
||
|
__cilkrts_worker_unlock(w);
|
||
|
#endif
|
||
|
|
||
|
/* Clear the callback information */
|
||
|
o.data = NULL;
|
||
|
o.routine = NULL;
|
||
|
cilk_fiber_set_stack_op((cilk_fiber*)data, o);
|
||
|
|
||
|
// Note. Do *NOT* free any saved stack information here. If they want to
|
||
|
// free the saved stack op information, they'll do it when the thread is
|
||
|
// unbound
|
||
|
|
||
|
return 0; /* Success! */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __cilkrts_watch_stack
|
||
|
*
|
||
|
* Called by TBB, defined by Cilk.
|
||
|
*
|
||
|
* Requests that Cilk invoke the stack op routine when it orphans a stack.
|
||
|
* Cilk sets *u to a thunk that TBB should call when it is no longer interested
|
||
|
* in watching the stack.
|
||
|
*/
|
||
|
|
||
|
CILK_API_TBB_RETCODE
|
||
|
__cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
|
||
|
__cilk_tbb_stack_op_thunk o)
|
||
|
{
|
||
|
cilk_fiber* current_fiber;
|
||
|
__cilkrts_worker *w;
|
||
|
|
||
|
#ifdef _MSC_VER
|
||
|
// This may be called by TBB *before* the OS has given us our
|
||
|
// initialization call. Make sure the module is initialized.
|
||
|
sysdep_init_module();
|
||
|
#endif
|
||
|
|
||
|
// Fetch the __cilkrts_worker bound to this thread
|
||
|
w = __cilkrts_get_tls_worker();
|
||
|
if (NULL == w)
|
||
|
{
|
||
|
// Save data for later. We'll deal with it when/if this thread binds
|
||
|
// to the runtime
|
||
|
cilk_fiber_tbb_interop_save_stack_op_info(o);
|
||
|
|
||
|
u->routine = __cilkrts_unwatch_stack;
|
||
|
u->data = TBB_INTEROP_DATA_DELAYED_UNTIL_BIND;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* Get current stack */
|
||
|
__cilkrts_worker_lock(w);
|
||
|
current_fiber = w->l->frame_ff->fiber_self;
|
||
|
__cilkrts_worker_unlock(w);
|
||
|
|
||
|
/* CILK_ASSERT( !sd->stack_op_data ); */
|
||
|
/* CILK_ASSERT( !sd->stack_op_routine ); */
|
||
|
|
||
|
/* Give TBB our callback */
|
||
|
u->routine = __cilkrts_unwatch_stack;
|
||
|
u->data = current_fiber;
|
||
|
/* Save the callback information */
|
||
|
cilk_fiber_set_stack_op(current_fiber, o);
|
||
|
|
||
|
return 0; /* Success! */
|
||
|
}
|
||
|
|
||
|
|
||
|
// This function must be called only within a continuation, within the stack
|
||
|
// frame of the continuation itself.
|
||
|
CILK_API_INT __cilkrts_synched(void)
|
||
|
{
|
||
|
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||
|
|
||
|
// If we don't have a worker, then we're synched by definition :o)
|
||
|
if (NULL == w)
|
||
|
return 1;
|
||
|
|
||
|
// Check to see if we are in a stolen continuation. If not, then
|
||
|
// we are synched.
|
||
|
uint32_t flags = w->current_stack_frame->flags;
|
||
|
if (0 == (flags & CILK_FRAME_UNSYNCHED))
|
||
|
return 1;
|
||
|
|
||
|
// We are in a stolen continutation, but the join counter might have been
|
||
|
// decremented to one, making us synched again. Get the full frame so
|
||
|
// that we can check the join counter. ASSUME: frame_ff is stable (can be
|
||
|
// read without a lock) in a stolen continuation -- it can't be stolen
|
||
|
// while it's currently executing.
|
||
|
full_frame *ff = w->l->frame_ff;
|
||
|
|
||
|
// Make sure we have a full frame
|
||
|
// TBD: Don't think that we should ever not have a full frame here.
|
||
|
// CILK_ASSERT(NULL != ff); ?
|
||
|
if (NULL == ff)
|
||
|
return 1;
|
||
|
|
||
|
// We're synched if there are no outstanding children at this instant in
|
||
|
// time. Note that this is a known race, but it's ok since we're only
|
||
|
// reading. We can get false negatives, but not false positives. (I.e.,
|
||
|
// we can read a non-one join_counter just before it goes to one, but the
|
||
|
// join_counter cannot go from one to greater than one while we're
|
||
|
// reading.)
|
||
|
return 1 == ff->join_counter;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
CILK_API_INT
|
||
|
__cilkrts_bump_loop_rank_internal(__cilkrts_worker* w)
|
||
|
{
|
||
|
// If we don't have a worker, then the runtime is not bound to this
|
||
|
// thread and there is no rank to increment
|
||
|
if (NULL == w)
|
||
|
return -1;
|
||
|
|
||
|
// We're at the start of the loop body. Advance the cilk_for loop
|
||
|
// body pedigree by following the parent link and updating its
|
||
|
// rank.
|
||
|
|
||
|
// Normally, we'd just write "w->pedigree.parent->rank++"
|
||
|
// But we need to cast away the "const".
|
||
|
((__cilkrts_pedigree*) w->pedigree.parent)->rank++;
|
||
|
|
||
|
// Zero the worker's pedigree rank since this is the start of a new
|
||
|
// pedigree domain.
|
||
|
w->pedigree.rank = 0;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
CILK_ABI_VOID
|
||
|
__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
|
||
|
{
|
||
|
// Pass call onto OS/architecture dependent function
|
||
|
sysdep_save_fp_ctrl_state(sf);
|
||
|
}
|
||
|
|
||
|
/* end cilk-abi.c */
|