gcc/libcilkrts/runtime/global_state.h

418 lines
14 KiB
C++

/* global_state.h -*-C++-*-
*
*************************************************************************
*
* @copyright
* Copyright (C) 2009-2013, Intel Corporation
* All rights reserved.
*
* @copyright
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* @copyright
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************/
/**
* @file global_state.h
*
* @brief The global_state_t structure contains most of the global context
* maintained by the Intel Cilk runtime.
*/
#ifndef INCLUDED_GLOBAL_STATE_DOT_H
#define INCLUDED_GLOBAL_STATE_DOT_H
#include <cilk/common.h>
#include "frame_malloc.h"
#include "stats.h"
#include "bug.h"
#include "cilk_fiber.h"
__CILKRTS_BEGIN_EXTERN_C
/**
* Non-null place-holder for a stack handle that has no meaningful value.
*/
#define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
/**
* States for record_or_replay
*/
enum record_replay_t {
RECORD_REPLAY_NONE,
RECORD_LOG,
REPLAY_LOG
};
/**
* @brief The global state is a structure that is shared by all workers in
* Cilk.
*
* Make the structure ready for use by calling
* cilkg_init_global_state() and then cilkg_publish_global_state().
*
* The same global lock should be held while both of these methods are
* called. These methods are split because it is useful to execute
* other runtime initialization code in between.
*
* After cilkg_publish_global_state() has completed, Cilk runtime
* methods may call cilkg_get_global_state() to look at the published
* value without holding the global lock.
*
* Finally, clean up the global state by calling
* cilkg_deinit_global_state(). This method should be called only
* after all calls to cilkg_get_global_state() have completed, and
* while holding the global lock.
*
* Before initialization and after deinitialization, the fields in the
* global state have unspecified values, except for a few special
* fields labeled "USER SETTING", which can be read and written before
* initialization and after deinitialization.
*/
struct global_state_t { /* COMMON_PORTABLE */
/* Fields described as "(fixed)" should not be changed after
* initialization.
*/
/*************************************************************************
* Note that debugger integration must reach into the
* global state! The debugger integration is depending on the
* offsets of the addr_size, system_workers, total_workers,
* stealing_disabled, sysdep, and workers. If these offsets change, the
* debugger integration library will need to be changed to match!!!
*************************************************************************/
int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
int system_workers; ///< Number of system workers (fixed)
/**
* @brief USER SETTING: Maximum number of user workers that can be
* bound to cilk workers.
*
* 0 unless set by user. Call cilkg_calc_max_user_workers to get
* the value.
*/
int max_user_workers;
int total_workers; ///< Total number of worker threads allocated (fixed)
int workers_running; ///< True when system workers have beens started */
/// Set by debugger to disable stealing (fixed)
int stealing_disabled;
/// System-dependent part of the global state
struct global_sysdep_state *sysdep;
/// Array of worker structures.
__cilkrts_worker **workers;
/******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
/// Number of frames in each worker's lazy task queue
__STDNS size_t ltqsize;
/**
* @brief USER SETTING: Force all possible reductions.
*
* TRUE if running a p-tool that requires reducers to call the reduce()
* method even if no actual stealing occurs.
*
* When set to TRUE, runtime will simulate steals, forcing calls to the
* the reduce() methods of reducers.
*
*/
int force_reduce;
/// USER SETTING: Per-worker fiber pool size
int fiber_pool_size;
/// USER SETTING: Global fiber pool size
int global_fiber_pool_size;
/**
* @brief TRUE when workers should exit scheduling loop so we can
* shut down the runtime and free the global state.
*
* @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
* by idle workers. We need to ensure that it's not in a cache line which
* may be invalidated by other cores. The surrounding fields are either
* constant after initialization or not used until shutdown (stats) so we
* should be OK.
*/
volatile int work_done;
int under_ptool; ///< True when running under a serial PIN tool
statistics stats; ///< Statistics on use of runtime
/**
* @brief USER SETTING: Maximum number of stacks the runtime will
* allocate (apart from those created by the OS when worker
* threads are created).
*
* If max_stacks == 0,there is no pre-defined maximum.
*/
unsigned max_stacks;
/// Size of each stack
size_t stack_size;
/// Global cache for per-worker memory
struct __cilkrts_frame_cache frame_malloc;
/// Global fiber pool
cilk_fiber_pool fiber_pool;
/**
* @brief Track whether the runtime has failed to allocate a
* stack.
*
* Setting this flag prevents multiple warnings from being
* issued.
*/
int failure_to_allocate_stack;
/**
* @brief USER SETTING: indicate record or replay log.
* Set to NULL if not used in this run.
*/
char *record_replay_file_name;
/**
* @brief Record/replay state.
* Valid states are:
* RECORD_REPLAY_NONE - Not recording or replaying a log
* RECORD_LOG - Recording a log for replay later
* REPLAY_LOG - Replay a log recorded earlier
*/
enum record_replay_t record_or_replay;
/**
* @brief Buffer to force max_steal_failures to appear on a
* different cache line from the previous member variables.
*
* This padding is needed because max_steal_failures is read
* constantly and other modified values in the global state will
* cause thrashing.
*/
char cache_buf[64];
/**
* @brief Maximum number of times a thread should fail to steal
* before checking if Cilk is shutting down.
*/
unsigned int max_steal_failures;
/// Pointer to scheduler entry point
void (*scheduler)(__cilkrts_worker *w);
/**
* @brief Buffer to force P and Q to appear on a different cache
* line from the previous member variables.
*/
char cache_buf_2[64];
int P; ///< USER SETTING: number of system workers + 1 (fixed)
int Q; ///< Number of user threads currently bound to workers
};
/**
* @brief Initialize the global state object. This method must both
* complete before referencing any fields in the global state, except
* those specified as "user-settable values".
*/
global_state_t* cilkg_init_global_state();
/**
* @brief Publish the global state object, so that
* cilkg_is_published can return true.
*
* @param g - the global state created by cilkg_init_global_state() to
* publish.
*
* After the global state object has been published, a thread should
* not modify this state unless it has exclusive access (i.e., holds
* the global lock).
*/
void cilkg_publish_global_state(global_state_t* g);
/**
* @brief Return true if the global state has been fully initialized
* and published, and has not been deinitialized.
*/
int cilkg_is_published(void);
/**
* @brief De-initializes the global state object. Must be called to free
* resources when the global state is no longer needed.
*/
void cilkg_deinit_global_state(void);
/**
* @brief Returns the global state object. Result is valid only if the
* global state has been published (see cilkg_publish_global_state()).
*/
static inline
global_state_t* cilkg_get_global_state(void)
{
// "private" extern declaration:
extern global_state_t *cilkg_singleton_ptr;
__CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
return cilkg_singleton_ptr;
}
/**
* @brief Implementation of __cilkrts_set_params.
*
* Set user controllable parameters
* @param param - string specifying parameter to be set
* @param value - string specifying new value
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
*
* @attention The wide character version __cilkrts_set_param_w() is available
* only on Windows.
*
* Allowable parameter names:
*
* - "nworkers" - number of processors that should run Cilk code.
* The value is a string of digits to be parsed by strtol.
*
* - "force reduce" - test reducer callbacks by allocating new views
* for every spawn within which a reducer is accessed. This can
* significantly reduce performance. The value is "1" or "true"
* to enable, "0" or "false" to disable.
* @warning Enabling "force reduce" when running with more than a single
* worker is currently broken.
*
* - "max user workers" - (Not publicly documented) Sets the number of slots
* allocated for user worker threads
*
* - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
* the per-worker stack cache. Range 1 .. 42. See
* cilkg_init_global_state for details.
*
* - "shared stacks" - (Not publicly documented) Maximum number of stacks
* we'll hold in the global stack cache. Maximum value is 42. See
* __cilkrts_make_global_state for details
*
* - "nstacks" - (Not publicly documented at this time, though it may be
* exposed in the future) Sets the maximum number of stacks permitted at one
* time. If the runtime reaches this maximum, it will cease to allocate
* stacks and the app will lose parallelism. 0 means unlimited. Default is
* unlimited. Minimum is twice the number of worker threads, though that
* cannot be tested at this time.
*/
int cilkg_set_param(const char* param, const char* value);
#ifdef _WIN32
/**
* @brief Implementation of __cilkrts_set_params for Unicode characters on
* Windows. See the documentation on @ref cilkg_set_param for more details.
*
* Set user controllable parameters
* @param param - string specifying parameter to be set
* @param value - string specifying new value
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
*/
int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
#endif
/**
* @brief implementation of __cilkrts_get_nworkers()
*/
static inline
int cilkg_get_nworkers(void)
{
// "private" extern declaration
extern global_state_t* cilkg_get_user_settable_values(void);
return cilkg_get_user_settable_values()->P;
}
/**
* @brief implementation of __cilkrts_get_total_workers()
*/
static inline
int cilkg_get_total_workers(void)
{
// "private" extern declaration
extern int cilkg_calc_total_workers(void);
// This number can fluctate until initialization so we
// compute it from scratch
return cilkg_calc_total_workers();
}
/**
* @brief implementation of __cilkrts_get_force_reduce()
*/
static inline
int cilkg_get_force_reduce(void)
{
// "private" extern declaration
extern global_state_t* cilkg_get_user_settable_values(void);
return cilkg_get_user_settable_values()->force_reduce;
}
/**
* @brief implementation of __cilkrts_get_stack_size()
*/
static inline
size_t cilkg_get_stack_size(void)
{
// "private" extern declaration
extern global_state_t* cilkg_get_user_settable_values(void);
return cilkg_get_user_settable_values()->stack_size;
}
/**
* @brief Run the scheduler function stored in the global_state
*
* Look up the scheduler function in global_state and run it. Report a fatal
* error if an exception escapes the scheduler function.
*
* @param w - Worker structure to associate with the current thread.
*
* @attention The scheduler field of the global state must be set before this
* function is called.
*/
void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
__CILKRTS_END_EXTERN_C
#endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)