418 lines
14 KiB
C
418 lines
14 KiB
C
|
/* global_state.h -*-C++-*-
|
||
|
*
|
||
|
*************************************************************************
|
||
|
*
|
||
|
* @copyright
|
||
|
* Copyright (C) 2009-2013, Intel Corporation
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* @copyright
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions
|
||
|
* are met:
|
||
|
*
|
||
|
* * Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* * Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in
|
||
|
* the documentation and/or other materials provided with the
|
||
|
* distribution.
|
||
|
* * Neither the name of Intel Corporation nor the names of its
|
||
|
* contributors may be used to endorse or promote products derived
|
||
|
* from this software without specific prior written permission.
|
||
|
*
|
||
|
* @copyright
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||
|
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||
|
**************************************************************************/
|
||
|
|
||
|
/**
|
||
|
* @file global_state.h
|
||
|
*
|
||
|
* @brief The global_state_t structure contains most of the global context
|
||
|
* maintained by the Intel Cilk runtime.
|
||
|
*/
|
||
|
|
||
|
#ifndef INCLUDED_GLOBAL_STATE_DOT_H
|
||
|
#define INCLUDED_GLOBAL_STATE_DOT_H
|
||
|
|
||
|
#include <cilk/common.h>
|
||
|
|
||
|
#include "frame_malloc.h"
|
||
|
#include "stats.h"
|
||
|
#include "bug.h"
|
||
|
#include "cilk_fiber.h"
|
||
|
|
||
|
__CILKRTS_BEGIN_EXTERN_C
|
||
|
|
||
|
/**
|
||
|
* Non-null place-holder for a stack handle that has no meaningful value.
|
||
|
*/
|
||
|
#define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
|
||
|
|
||
|
/**
|
||
|
* States for record_or_replay
|
||
|
*/
|
||
|
enum record_replay_t {
|
||
|
RECORD_REPLAY_NONE,
|
||
|
RECORD_LOG,
|
||
|
REPLAY_LOG
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* @brief The global state is a structure that is shared by all workers in
|
||
|
* Cilk.
|
||
|
*
|
||
|
* Make the structure ready for use by calling
|
||
|
* cilkg_init_global_state() and then cilkg_publish_global_state().
|
||
|
*
|
||
|
* The same global lock should be held while both of these methods are
|
||
|
* called. These methods are split because it is useful to execute
|
||
|
* other runtime initialization code in between.
|
||
|
*
|
||
|
* After cilkg_publish_global_state() has completed, Cilk runtime
|
||
|
* methods may call cilkg_get_global_state() to look at the published
|
||
|
* value without holding the global lock.
|
||
|
*
|
||
|
* Finally, clean up the global state by calling
|
||
|
* cilkg_deinit_global_state(). This method should be called only
|
||
|
* after all calls to cilkg_get_global_state() have completed, and
|
||
|
* while holding the global lock.
|
||
|
*
|
||
|
* Before initialization and after deinitialization, the fields in the
|
||
|
* global state have unspecified values, except for a few special
|
||
|
* fields labeled "USER SETTING", which can be read and written before
|
||
|
* initialization and after deinitialization.
|
||
|
*/
|
||
|
|
||
|
struct global_state_t { /* COMMON_PORTABLE */
|
||
|
|
||
|
/* Fields described as "(fixed)" should not be changed after
|
||
|
* initialization.
|
||
|
*/
|
||
|
|
||
|
/*************************************************************************
|
||
|
* Note that debugger integration must reach into the
|
||
|
* global state! The debugger integration is depending on the
|
||
|
* offsets of the addr_size, system_workers, total_workers,
|
||
|
* stealing_disabled, sysdep, and workers. If these offsets change, the
|
||
|
* debugger integration library will need to be changed to match!!!
|
||
|
*************************************************************************/
|
||
|
|
||
|
int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
|
||
|
|
||
|
int system_workers; ///< Number of system workers (fixed)
|
||
|
|
||
|
/**
|
||
|
* @brief USER SETTING: Maximum number of user workers that can be
|
||
|
* bound to cilk workers.
|
||
|
*
|
||
|
* 0 unless set by user. Call cilkg_calc_max_user_workers to get
|
||
|
* the value.
|
||
|
*/
|
||
|
int max_user_workers;
|
||
|
|
||
|
int total_workers; ///< Total number of worker threads allocated (fixed)
|
||
|
|
||
|
int workers_running; ///< True when system workers have beens started */
|
||
|
|
||
|
/// Set by debugger to disable stealing (fixed)
|
||
|
int stealing_disabled;
|
||
|
|
||
|
/// System-dependent part of the global state
|
||
|
struct global_sysdep_state *sysdep;
|
||
|
|
||
|
/// Array of worker structures.
|
||
|
__cilkrts_worker **workers;
|
||
|
|
||
|
/******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
|
||
|
|
||
|
/// Number of frames in each worker's lazy task queue
|
||
|
__STDNS size_t ltqsize;
|
||
|
|
||
|
/**
|
||
|
* @brief USER SETTING: Force all possible reductions.
|
||
|
*
|
||
|
* TRUE if running a p-tool that requires reducers to call the reduce()
|
||
|
* method even if no actual stealing occurs.
|
||
|
*
|
||
|
* When set to TRUE, runtime will simulate steals, forcing calls to the
|
||
|
* the reduce() methods of reducers.
|
||
|
*
|
||
|
*/
|
||
|
int force_reduce;
|
||
|
|
||
|
/// USER SETTING: Per-worker fiber pool size
|
||
|
int fiber_pool_size;
|
||
|
|
||
|
/// USER SETTING: Global fiber pool size
|
||
|
int global_fiber_pool_size;
|
||
|
|
||
|
/**
|
||
|
* @brief TRUE when workers should exit scheduling loop so we can
|
||
|
* shut down the runtime and free the global state.
|
||
|
*
|
||
|
* @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
|
||
|
* by idle workers. We need to ensure that it's not in a cache line which
|
||
|
* may be invalidated by other cores. The surrounding fields are either
|
||
|
* constant after initialization or not used until shutdown (stats) so we
|
||
|
* should be OK.
|
||
|
*/
|
||
|
volatile int work_done;
|
||
|
|
||
|
int under_ptool; ///< True when running under a serial PIN tool
|
||
|
|
||
|
statistics stats; ///< Statistics on use of runtime
|
||
|
|
||
|
/**
|
||
|
* @brief USER SETTING: Maximum number of stacks the runtime will
|
||
|
* allocate (apart from those created by the OS when worker
|
||
|
* threads are created).
|
||
|
*
|
||
|
* If max_stacks == 0,there is no pre-defined maximum.
|
||
|
*/
|
||
|
unsigned max_stacks;
|
||
|
|
||
|
/// Size of each stack
|
||
|
size_t stack_size;
|
||
|
|
||
|
/// Global cache for per-worker memory
|
||
|
struct __cilkrts_frame_cache frame_malloc;
|
||
|
|
||
|
/// Global fiber pool
|
||
|
cilk_fiber_pool fiber_pool;
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief Track whether the runtime has failed to allocate a
|
||
|
* stack.
|
||
|
*
|
||
|
* Setting this flag prevents multiple warnings from being
|
||
|
* issued.
|
||
|
*/
|
||
|
int failure_to_allocate_stack;
|
||
|
|
||
|
/**
|
||
|
* @brief USER SETTING: indicate record or replay log.
|
||
|
* Set to NULL if not used in this run.
|
||
|
*/
|
||
|
char *record_replay_file_name;
|
||
|
|
||
|
/**
|
||
|
* @brief Record/replay state.
|
||
|
* Valid states are:
|
||
|
* RECORD_REPLAY_NONE - Not recording or replaying a log
|
||
|
* RECORD_LOG - Recording a log for replay later
|
||
|
* REPLAY_LOG - Replay a log recorded earlier
|
||
|
*/
|
||
|
enum record_replay_t record_or_replay;
|
||
|
|
||
|
/**
|
||
|
* @brief Buffer to force max_steal_failures to appear on a
|
||
|
* different cache line from the previous member variables.
|
||
|
*
|
||
|
* This padding is needed because max_steal_failures is read
|
||
|
* constantly and other modified values in the global state will
|
||
|
* cause thrashing.
|
||
|
*/
|
||
|
char cache_buf[64];
|
||
|
|
||
|
/**
|
||
|
* @brief Maximum number of times a thread should fail to steal
|
||
|
* before checking if Cilk is shutting down.
|
||
|
*/
|
||
|
unsigned int max_steal_failures;
|
||
|
|
||
|
/// Pointer to scheduler entry point
|
||
|
void (*scheduler)(__cilkrts_worker *w);
|
||
|
|
||
|
/**
|
||
|
* @brief Buffer to force P and Q to appear on a different cache
|
||
|
* line from the previous member variables.
|
||
|
*/
|
||
|
char cache_buf_2[64];
|
||
|
|
||
|
int P; ///< USER SETTING: number of system workers + 1 (fixed)
|
||
|
int Q; ///< Number of user threads currently bound to workers
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* @brief Initialize the global state object. This method must both
|
||
|
* complete before referencing any fields in the global state, except
|
||
|
* those specified as "user-settable values".
|
||
|
*/
|
||
|
global_state_t* cilkg_init_global_state();
|
||
|
|
||
|
/**
|
||
|
* @brief Publish the global state object, so that
|
||
|
* cilkg_is_published can return true.
|
||
|
*
|
||
|
* @param g - the global state created by cilkg_init_global_state() to
|
||
|
* publish.
|
||
|
*
|
||
|
* After the global state object has been published, a thread should
|
||
|
* not modify this state unless it has exclusive access (i.e., holds
|
||
|
* the global lock).
|
||
|
*/
|
||
|
void cilkg_publish_global_state(global_state_t* g);
|
||
|
|
||
|
/**
|
||
|
* @brief Return true if the global state has been fully initialized
|
||
|
* and published, and has not been deinitialized.
|
||
|
*/
|
||
|
int cilkg_is_published(void);
|
||
|
|
||
|
/**
|
||
|
* @brief De-initializes the global state object. Must be called to free
|
||
|
* resources when the global state is no longer needed.
|
||
|
*/
|
||
|
void cilkg_deinit_global_state(void);
|
||
|
|
||
|
/**
|
||
|
* @brief Returns the global state object. Result is valid only if the
|
||
|
* global state has been published (see cilkg_publish_global_state()).
|
||
|
*/
|
||
|
static inline
|
||
|
global_state_t* cilkg_get_global_state(void)
|
||
|
{
|
||
|
// "private" extern declaration:
|
||
|
extern global_state_t *cilkg_singleton_ptr;
|
||
|
|
||
|
__CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
|
||
|
return cilkg_singleton_ptr;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @brief Implementation of __cilkrts_set_params.
|
||
|
*
|
||
|
* Set user controllable parameters
|
||
|
* @param param - string specifying parameter to be set
|
||
|
* @param value - string specifying new value
|
||
|
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
||
|
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
||
|
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
||
|
*
|
||
|
* @attention The wide character version __cilkrts_set_param_w() is available
|
||
|
* only on Windows.
|
||
|
*
|
||
|
* Allowable parameter names:
|
||
|
*
|
||
|
* - "nworkers" - number of processors that should run Cilk code.
|
||
|
* The value is a string of digits to be parsed by strtol.
|
||
|
*
|
||
|
* - "force reduce" - test reducer callbacks by allocating new views
|
||
|
* for every spawn within which a reducer is accessed. This can
|
||
|
* significantly reduce performance. The value is "1" or "true"
|
||
|
* to enable, "0" or "false" to disable.
|
||
|
* @warning Enabling "force reduce" when running with more than a single
|
||
|
* worker is currently broken.
|
||
|
*
|
||
|
* - "max user workers" - (Not publicly documented) Sets the number of slots
|
||
|
* allocated for user worker threads
|
||
|
*
|
||
|
* - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
|
||
|
* the per-worker stack cache. Range 1 .. 42. See
|
||
|
* cilkg_init_global_state for details.
|
||
|
*
|
||
|
* - "shared stacks" - (Not publicly documented) Maximum number of stacks
|
||
|
* we'll hold in the global stack cache. Maximum value is 42. See
|
||
|
* __cilkrts_make_global_state for details
|
||
|
*
|
||
|
* - "nstacks" - (Not publicly documented at this time, though it may be
|
||
|
* exposed in the future) Sets the maximum number of stacks permitted at one
|
||
|
* time. If the runtime reaches this maximum, it will cease to allocate
|
||
|
* stacks and the app will lose parallelism. 0 means unlimited. Default is
|
||
|
* unlimited. Minimum is twice the number of worker threads, though that
|
||
|
* cannot be tested at this time.
|
||
|
*/
|
||
|
int cilkg_set_param(const char* param, const char* value);
|
||
|
#ifdef _WIN32
|
||
|
/**
|
||
|
* @brief Implementation of __cilkrts_set_params for Unicode characters on
|
||
|
* Windows. See the documentation on @ref cilkg_set_param for more details.
|
||
|
*
|
||
|
* Set user controllable parameters
|
||
|
* @param param - string specifying parameter to be set
|
||
|
* @param value - string specifying new value
|
||
|
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
||
|
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
||
|
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
||
|
*/
|
||
|
int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
|
||
|
#endif
|
||
|
|
||
|
/**
|
||
|
* @brief implementation of __cilkrts_get_nworkers()
|
||
|
*/
|
||
|
static inline
|
||
|
int cilkg_get_nworkers(void)
|
||
|
{
|
||
|
// "private" extern declaration
|
||
|
extern global_state_t* cilkg_get_user_settable_values(void);
|
||
|
return cilkg_get_user_settable_values()->P;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief implementation of __cilkrts_get_total_workers()
|
||
|
*/
|
||
|
static inline
|
||
|
int cilkg_get_total_workers(void)
|
||
|
{
|
||
|
// "private" extern declaration
|
||
|
extern int cilkg_calc_total_workers(void);
|
||
|
|
||
|
// This number can fluctate until initialization so we
|
||
|
// compute it from scratch
|
||
|
return cilkg_calc_total_workers();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief implementation of __cilkrts_get_force_reduce()
|
||
|
*/
|
||
|
static inline
|
||
|
int cilkg_get_force_reduce(void)
|
||
|
{
|
||
|
// "private" extern declaration
|
||
|
extern global_state_t* cilkg_get_user_settable_values(void);
|
||
|
return cilkg_get_user_settable_values()->force_reduce;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief implementation of __cilkrts_get_stack_size()
|
||
|
*/
|
||
|
static inline
|
||
|
size_t cilkg_get_stack_size(void)
|
||
|
{
|
||
|
// "private" extern declaration
|
||
|
extern global_state_t* cilkg_get_user_settable_values(void);
|
||
|
return cilkg_get_user_settable_values()->stack_size;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief Run the scheduler function stored in the global_state
|
||
|
*
|
||
|
* Look up the scheduler function in global_state and run it. Report a fatal
|
||
|
* error if an exception escapes the scheduler function.
|
||
|
*
|
||
|
* @param w - Worker structure to associate with the current thread.
|
||
|
*
|
||
|
* @attention The scheduler field of the global state must be set before this
|
||
|
* function is called.
|
||
|
*/
|
||
|
void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
|
||
|
|
||
|
__CILKRTS_END_EXTERN_C
|
||
|
|
||
|
#endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
|