3038054c68
From-SVN: r204173
478 lines
18 KiB
C++
478 lines
18 KiB
C++
/* cilk_fake.h -*-C++-*-
|
|
*
|
|
*************************************************************************
|
|
*
|
|
* @copyright
|
|
* Copyright (C) 2011-2013, Intel Corporation
|
|
* All rights reserved.
|
|
*
|
|
* @copyright
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* @copyright
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
|
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
**************************************************************************/
|
|
|
|
/**
|
|
* @file cilk_fake.h
|
|
*
|
|
* @brief Macros to simulate a compiled Cilk program.
|
|
*
|
|
* Used carefully, these macros can be used to create a Cilk program with a
|
|
* non-Cilk compiler by manually inserting the code necessary for interacting
|
|
* with the Cilk runtime library. They are not intended to be pretty (you
|
|
* wouldn't want to write a whole program using these macros), but they are
|
|
* useful for experiments. They also work well as an illustration of what the
|
|
* compiler generates.
|
|
*
|
|
* Details of the mechanisms used in these macros are described in
|
|
* design-notes/CilkPlusABI.docx
|
|
*
|
|
* Example 1: fib in C++
|
|
* ---------------------
|
|
*
|
|
* #include <internal/cilk_fake.h>
|
|
*
|
|
* int fib(int n)
|
|
* {
|
|
* CILK_FAKE_PROLOG();
|
|
*
|
|
* if (n < 2)
|
|
* return n;
|
|
*
|
|
* int a, b;
|
|
* CILK_FAKE_SPAWN_R(a, fib(n - 1));
|
|
* b = fib(n - 2);
|
|
* CILK_FAKE_SYNC();
|
|
*
|
|
* return a + b;
|
|
* }
|
|
*
|
|
*
|
|
* Example 2: fib in C
|
|
* -------------------
|
|
*
|
|
* #include <internal/cilk_fake.h>
|
|
*
|
|
* int fib(int n);
|
|
*
|
|
* void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n)
|
|
* {
|
|
* CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf);
|
|
* *a = fib(n - 1);
|
|
* CILK_FAKE_SPAWN_HELPER_EPILOG();
|
|
* }
|
|
*
|
|
* int fib(int n)
|
|
* {
|
|
* CILK_FAKE_PROLOG();
|
|
*
|
|
* if (n < 2)
|
|
* return n;
|
|
*
|
|
* int a, b;
|
|
* CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n));
|
|
* b = fib(n - 2);
|
|
* CILK_FAKE_SYNC();
|
|
*
|
|
* CILK_FAKE_EPILOG();
|
|
* return a + b;
|
|
* }
|
|
*/
|
|
|
|
#ifndef INCLUDED_CILK_FAKE_DOT_H
|
|
#define INCLUDED_CILK_FAKE_DOT_H
|
|
|
|
// This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already
|
|
// defined but is less than 1, then the data structures in <internal/abi.h>
|
|
// will not match the expectations of facilities in this header. Therefore,
|
|
// for successful compilation, __CILKRTS_ABI_VERSION must either be not
|
|
// defined, or defined to be 1 or greater.
|
|
#ifndef __CILKRTS_ABI_VERSION
|
|
// ABI version was not specified. Set it to 1.
|
|
# define __CILKRTS_ABI_VERSION 1
|
|
#elif __CILKRTS_ABI_VERSION < 1
|
|
// ABI version was specified but was too old. Fail compilation.
|
|
# error cilk_fake.h requirs an ABI version of 1 or greater
|
|
#endif
|
|
|
|
#include <internal/abi.h>
|
|
|
|
// alloca is defined in malloc.h on Windows, alloca.h on Linux
|
|
#ifndef _MSC_VER
|
|
#include <alloca.h>
|
|
#else
|
|
#include <malloc.h>
|
|
// Define offsetof
|
|
#include <stddef.h>
|
|
#endif
|
|
|
|
// Allows use of a different version that the one defined in abi.h
|
|
#define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24)
|
|
|
|
/* Initialize frame. To be called when worker is known */
|
|
__CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf,
|
|
__cilkrts_worker *w)
|
|
{
|
|
sf->call_parent = w->current_stack_frame;
|
|
sf->worker = w;
|
|
sf->flags = CILK_FAKE_VERSION_FLAG;
|
|
w->current_stack_frame = sf;
|
|
}
|
|
|
|
/* Initialize frame. To be called when worker is not known */
|
|
__CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf)
|
|
{
|
|
__cilkrts_worker* w = __cilkrts_get_tls_worker();
|
|
uint32_t last_flag = 0;
|
|
if (! w) {
|
|
w = __cilkrts_bind_thread_1();
|
|
last_flag = CILK_FRAME_LAST;
|
|
}
|
|
__cilk_fake_enter_frame_fast(sf, w);
|
|
sf->flags |= last_flag;
|
|
}
|
|
|
|
/* Initialize frame. To be called within the spawn helper */
|
|
__CILKRTS_INLINE void __cilk_fake_helper_enter_frame(
|
|
__cilkrts_stack_frame *sf,
|
|
__cilkrts_stack_frame *parent_sf)
|
|
{
|
|
sf->worker = 0;
|
|
sf->call_parent = parent_sf;
|
|
}
|
|
|
|
/* Called from the spawn helper to push the parent continuation on the task
|
|
* deque so that it can be stolen.
|
|
*/
|
|
__CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf)
|
|
{
|
|
/* Initialize spawn helper frame.
|
|
* call_parent was saved in __cilk_fake_helper_enter_frame */
|
|
__cilkrts_stack_frame *parent = sf->call_parent;
|
|
__cilkrts_worker *w = parent->worker;
|
|
__cilk_fake_enter_frame_fast(sf, w);
|
|
|
|
/* Append a node to the pedigree */
|
|
sf->spawn_helper_pedigree = w->pedigree;
|
|
parent->parent_pedigree = w->pedigree;
|
|
w->pedigree.rank = 0;
|
|
w->pedigree.parent = &sf->spawn_helper_pedigree;
|
|
|
|
/* Push parent onto the task deque */
|
|
__cilkrts_stack_frame *volatile *tail = w->tail;
|
|
*tail++ = sf->call_parent;
|
|
/* The stores must be separated by a store fence (noop on x86)
|
|
* or the second store is a release (st8.rel on Itanium) */
|
|
w->tail = tail;
|
|
sf->flags |= CILK_FRAME_DETACHED;
|
|
}
|
|
|
|
/* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */
|
|
static int __cilk_fake_dummy = 8;
|
|
|
|
/* The following macro is used to force the compiler into generating a frame
|
|
* pointer. We never change the value of __cilk_fake_dummy, so the alloca()
|
|
* is never called, but we need the 'if' statement and the __cilk_fake_dummy
|
|
* variable so that the compiler does not attempt to optimize it away.
|
|
*/
|
|
#define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \
|
|
if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \
|
|
(sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \
|
|
} while (0)
|
|
|
|
#ifndef CILK_FAKE_NO_SHRINKWRAP
|
|
/* "shrink-wrap" optimization enabled. Do not initialize frame on entry,
|
|
* except to clear worker pointer. Instead, defer initialization until
|
|
* the first spawn.
|
|
*/
|
|
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0))
|
|
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \
|
|
if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \
|
|
} while (0)
|
|
#else
|
|
/* "shrink-wrap" optimization disabled. Initialize frame immediately on
|
|
* entry. Do not initialize frame on spawn.
|
|
*/
|
|
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \
|
|
__cilk_fake_enter_frame(&(sf))
|
|
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf))
|
|
#endif
|
|
|
|
/* Prologue of a spawning function. Declares and initializes the stack
|
|
* frame.
|
|
*/
|
|
#define CILK_FAKE_PROLOG() \
|
|
__cilk_fake_stack_frame __cilk_sf; \
|
|
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
|
|
CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf)
|
|
|
|
/* Prologue of a spawning function where the current worker is already known.
|
|
* Declares and initializes the stack frame without looking up the worker from
|
|
* TLS.
|
|
*/
|
|
#define CILK_FAKE_PROLOG_FAST(w) \
|
|
__cilk_fake_stack_frame __cilk_sf; \
|
|
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
|
|
__cilk_fake_enter_frame_fast(&__cilk_sf, (w))
|
|
|
|
/* Simulate a cilk_sync */
|
|
#define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf)
|
|
|
|
/* Epilog at the end of a spawning function. Does a sync and calls the
|
|
* runtime for leaving the frame.
|
|
*/
|
|
#ifdef __cplusplus
|
|
// Epilogue is run automatically by __cilk_fake_stack_frame destructor.
|
|
# define CILK_FAKE_EPILOG() ((void) __cilk_sf)
|
|
#else
|
|
# define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf)
|
|
#endif // C
|
|
|
|
/* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and
|
|
* __cilk_fake_stack_frame destructor body.
|
|
*/
|
|
#define CILK_FAKE_CLEANUP_FRAME(sf) do { \
|
|
if (! (sf).worker) break; \
|
|
CILK_FAKE_SYNC_IMP(sf); \
|
|
CILK_FAKE_POP_FRAME(sf); \
|
|
if ((sf).flags != CILK_FAKE_VERSION_FLAG) \
|
|
__cilkrts_leave_frame(&(sf)); \
|
|
} while (0)
|
|
|
|
/* Implementation of CILK_FAKE_SYNC with sf argument */
|
|
#define CILK_FAKE_SYNC_IMP(sf) do { \
|
|
if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \
|
|
(sf).parent_pedigree = (sf).worker->pedigree; \
|
|
CILK_FAKE_SAVE_FP(sf); \
|
|
if (! CILK_SETJMP((sf).ctx)) \
|
|
__cilkrts_sync(&(sf)); \
|
|
} \
|
|
++(sf).worker->pedigree.rank; \
|
|
} while (0)
|
|
|
|
/* Save the floating-point control registers.
|
|
* The definition of CILK_FAKE_SAVE_FP is compiler specific (and
|
|
* architecture specific on Windows)
|
|
*/
|
|
#ifdef _MSC_VER
|
|
# define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr)
|
|
# define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr)
|
|
# if defined(_M_IX86)
|
|
/* Windows x86 */
|
|
# define CILK_FAKE_SAVE_FP(sf) do { \
|
|
__asm \
|
|
{ \
|
|
mov eax, sf \
|
|
stmxcsr [eax+MXCSR_OFFSET] \
|
|
fnstcw [eax+FPCSR_OFFSET] \
|
|
} \
|
|
} while (0)
|
|
# elif defined(_M_X64)
|
|
/* Windows Intel64 - Not needed - saved by setjmp call */
|
|
# define CILK_FAKE_SAVE_FP(sf) ((void) sf)
|
|
# else
|
|
# error "Unknown architecture"
|
|
# endif /* Microsoft architecture specifics */
|
|
#else
|
|
/* Non-Windows */
|
|
# define CILK_FAKE_SAVE_FP(sf) do { \
|
|
__asm__ ( "stmxcsr %0\n\t" \
|
|
"fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \
|
|
} while (0)
|
|
#endif
|
|
|
|
/* Call the spawn helper as part of a fake spawn */
|
|
#define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \
|
|
CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \
|
|
CILK_FAKE_SAVE_FP(__cilk_sf); \
|
|
if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \
|
|
helper; \
|
|
} \
|
|
} while (0)
|
|
|
|
/* Body of a spawn helper function. In addition to the worker and the
|
|
* expression to spawn, pass it any number of statements to be executed before
|
|
* detaching.
|
|
*/
|
|
#define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \
|
|
CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \
|
|
__VA_ARGS__; \
|
|
__cilk_fake_detach(&__cilk_sf); \
|
|
expr; \
|
|
CILK_FAKE_SPAWN_HELPER_EPILOG()
|
|
|
|
/* Prolog for a spawn helper function */
|
|
#define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \
|
|
__cilk_fake_spawn_helper_stack_frame __cilk_sf; \
|
|
__cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf))
|
|
|
|
/* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG
|
|
* and the __cilk_fake_spawn_helper_frame destructor.
|
|
*/
|
|
#define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \
|
|
if (! (sf).worker) break; \
|
|
CILK_FAKE_POP_FRAME(sf); \
|
|
__cilkrts_leave_frame(&(sf)); \
|
|
} while (0)
|
|
|
|
/* Epilog to execute at the end of a spawn helper */
|
|
#ifdef __cplusplus
|
|
// Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor
|
|
# define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf)
|
|
#else
|
|
# define CILK_FAKE_SPAWN_HELPER_EPILOG() \
|
|
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf)
|
|
#endif
|
|
|
|
/* Pop the current frame off of the call chain */
|
|
#define CILK_FAKE_POP_FRAME(sf) do { \
|
|
(sf).worker->current_stack_frame = (sf).call_parent; \
|
|
(sf).call_parent = 0; \
|
|
} while (0)
|
|
|
|
#ifdef _WIN32
|
|
/* define macros for synching functions before allowing them to propagate. */
|
|
# define CILK_FAKE_EXCEPT_BEGIN \
|
|
if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) {
|
|
|
|
# define CILK_FAKE_EXCEPT_END \
|
|
} else { \
|
|
assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\
|
|
== CILK_FRAME_EXCEPTING); \
|
|
__cilkrts_rethrow(&__cilk_sf); \
|
|
exit(0); \
|
|
}
|
|
#else
|
|
# define CILK_EXCEPT_BEGIN {
|
|
# define CILK_EXCEPT_END }
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
// The following definitions depend on C++ features.
|
|
|
|
// Wrap a functor (probably a lambda), so that a call to it cannot be
|
|
// inlined.
|
|
template <typename F>
|
|
class __cilk_fake_noinline_wrapper
|
|
{
|
|
F&& m_fn;
|
|
public:
|
|
__cilk_fake_noinline_wrapper(F&& fn) : m_fn(static_cast<F&&>(fn)) { }
|
|
|
|
#ifdef _WIN32
|
|
__declspec(noinline) void operator()(__cilkrts_stack_frame *sf);
|
|
#else
|
|
void operator()(__cilkrts_stack_frame *sf) __attribute__((noinline));
|
|
#endif
|
|
|
|
};
|
|
|
|
template <typename F>
|
|
void __cilk_fake_noinline_wrapper<F>::operator()(__cilkrts_stack_frame *sf)
|
|
{
|
|
m_fn(sf);
|
|
}
|
|
|
|
template <typename F>
|
|
inline
|
|
__cilk_fake_noinline_wrapper<F> __cilk_fake_make_noinline_wrapper(F&& fn)
|
|
{
|
|
return __cilk_fake_noinline_wrapper<F>(static_cast<F&&>(fn));
|
|
}
|
|
|
|
// Simulate "_Cilk_spawn expr", where expr must be a function call.
|
|
//
|
|
// Note: this macro does not correctly construct function arguments.
|
|
// According to the ABI specification, function arguments should be evaluated
|
|
// before the detach and destroyed after the detach. This macro both
|
|
// evaluates and destroys them after the detach. This means that if any part
|
|
// of the function argument expression depends on a value that is modified in
|
|
// the continuation of the spawn, race will occur between the continuation and
|
|
// the argument evaluation.
|
|
//
|
|
// To work around this problem, this macro accepts an arbitrary list of
|
|
// declarations and statements (separated by semicolons) that are evaluated
|
|
// before the detach. Thus, to simulate:
|
|
//
|
|
// _Cilk_spawn f(expr);
|
|
//
|
|
// one would write:
|
|
//
|
|
// CILK_FAKE_SPAWN(f(arg), auto arg = expr);
|
|
//
|
|
// Despite appearing in the reverse order, the 'arg' variable is created and
|
|
// initialized before the detach and the call to f(arg) occurs after the
|
|
// detach.
|
|
#define CILK_FAKE_SPAWN(expr, ...) \
|
|
CILK_FAKE_CALL_SPAWN_HELPER( \
|
|
CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf))
|
|
|
|
// Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints.
|
|
#define CILK_FAKE_SPAWN_R(ret, expr, ...) \
|
|
CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__)
|
|
|
|
// Create a spawn helper as a C++11 lambda function. In addition to the
|
|
// expression to spawn, this macro takes a any number of statements to be
|
|
// executed before detaching.
|
|
#define CILK_FAKE_SPAWN_HELPER(expr, ...) \
|
|
__cilk_fake_make_noinline_wrapper([&](__cilkrts_stack_frame *parent_sf) { \
|
|
CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \
|
|
})
|
|
|
|
// C++ version of a __cilkrts_stack_frame for a spawning function.
|
|
// This struct is identical to __cilkrts_stack_frame except that the
|
|
// destructor automatically does frame cleanup.
|
|
struct __cilk_fake_stack_frame : __cilkrts_stack_frame
|
|
{
|
|
// Extension of __cilkrts_stack_frame with constructor and destructor
|
|
__cilk_fake_stack_frame() { }
|
|
__forceinline ~__cilk_fake_stack_frame() {
|
|
CILK_FAKE_CLEANUP_FRAME(*this);
|
|
}
|
|
};
|
|
|
|
// C++ version of a __cilkrts_stack_frame for a spawn helper.
|
|
// This struct is identical to __cilkrts_stack_frame except that the
|
|
// destructor automatically does frame cleanup.
|
|
struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame
|
|
{
|
|
// Extension of __cilkrts_stack_frame with constructor and destructor
|
|
__cilk_fake_spawn_helper_stack_frame() { worker = 0; }
|
|
__forceinline ~__cilk_fake_spawn_helper_stack_frame() {
|
|
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this);
|
|
}
|
|
};
|
|
#else
|
|
// For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are
|
|
// identical to __cilkrts_stack_frame. Frame cleanup must be performed
|
|
// excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG)
|
|
typedef __cilkrts_stack_frame __cilk_fake_stack_frame;
|
|
typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame;
|
|
#endif
|
|
|
|
#endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)
|