bitmap_allocator.h: (_Bit_scan_forward) -> Made this function call __builtin_ctz instead of the...

2004-03-24  Dhruv Matani  <dhruvbird@gmx.net>

	* include/ext/bitmap_allocator.h: (_Bit_scan_forward) -> Made this
	function call __builtin_ctz instead of the while loop.
	(allocate) -> If condition has __builtin_expect.
	(deallocate) -> Ditto.
	Renamed a few left-over variables and typedefs according to the
	C++STYLE mentioned in the documentation.
	Protected calls to __gthread* by __gthread_active_p(), whose value
	is cached in the local variable __threads_active.

From-SVN: r79924
This commit is contained in:
Dhruv Matani 2004-03-24 18:27:43 +00:00 committed by Benjamin Kosnik
parent 643d3bd23e
commit 71f9a9d15e
2 changed files with 148 additions and 101 deletions

View File

@ -1,3 +1,14 @@
2004-03-24 Dhruv Matani <dhruvbird@gmx.net>
* include/ext/bitmap_allocator.h: (_Bit_scan_forward) -> Made this
function call __builtin_ctz instead of the while loop.
(allocate) -> If condition has __builtin_expect.
(deallocate) -> Ditto.
Renamed a few left-over variables and typedefs according to the
C++STYLE mentioned in the documentation.
Protected calls to __gthread* by __gthread_active_p(), whose value
is cached in the local variable __threads_active.
2004-03-24 Felix Yen <fwy@alumni.brown.edu> 2004-03-24 Felix Yen <fwy@alumni.brown.edu>
* testsuite/performance/20_util/allocator/producer_consumer.cc: * testsuite/performance/20_util/allocator/producer_consumer.cc:

View File

@ -37,7 +37,7 @@
#include <utility> #include <utility>
//For std::pair. //For std::pair.
#include <algorithm> #include <algorithm>
//std::find_if. //std::find_if, and std::lower_bound.
#include <vector> #include <vector>
//For the free list of exponentially growing memory blocks. At max, //For the free list of exponentially growing memory blocks. At max,
//size of the vector should be not more than the number of bits in an //size of the vector should be not more than the number of bits in an
@ -55,10 +55,18 @@
#define NDEBUG #define NDEBUG
//#define CHECK_FOR_ERRORS //#define CHECK_FOR_ERRORS
//#define __CPU_HAS_BACKWARD_BRANCH_PREDICTION
namespace __gnu_cxx namespace __gnu_cxx
{ {
namespace {
#if defined __GTHREADS
bool const __threads_enabled = __gthread_active_p();
#endif
}
#if defined __GTHREADS
class _Mutex { class _Mutex {
__gthread_mutex_t _M_mut; __gthread_mutex_t _M_mut;
//Prevent Copying and assignment. //Prevent Copying and assignment.
@ -67,12 +75,15 @@ namespace __gnu_cxx
public: public:
_Mutex () _Mutex ()
{ {
if (__threads_enabled)
{
#if !defined __GTHREAD_MUTEX_INIT #if !defined __GTHREAD_MUTEX_INIT
__GTHREAD_MUTEX_INIT_FUNCTION(&_M_mut); __GTHREAD_MUTEX_INIT_FUNCTION(&_M_mut);
#else #else
__gthread_mutex_t __mtemp = __GTHREAD_MUTEX_INIT; __gthread_mutex_t __mtemp = __GTHREAD_MUTEX_INIT;
_M_mut = __mtemp; _M_mut = __mtemp;
#endif #endif
}
} }
~_Mutex () ~_Mutex ()
{ {
@ -81,22 +92,42 @@ namespace __gnu_cxx
__gthread_mutex_t *_M_get() { return &_M_mut; } __gthread_mutex_t *_M_get() { return &_M_mut; }
}; };
class _Lock { class _Lock {
_Mutex& _M_mt; _Mutex* _M_pmt;
bool _M_locked;
//Prevent Copying and assignment. //Prevent Copying and assignment.
_Lock (_Lock const&); _Lock (_Lock const&);
_Lock& operator= (_Lock const&); _Lock& operator= (_Lock const&);
public: public:
_Lock (_Mutex& __mref) : _M_mt(__mref) _Lock(_Mutex* __mptr)
: _M_pmt(__mptr), _M_locked(false)
{ this->_M_lock(); }
void _M_lock()
{ {
__gthread_mutex_lock(_M_mt._M_get()); if (__threads_enabled)
{
_M_locked = true;
__gthread_mutex_lock(_M_pmt->_M_get());
}
} }
~_Lock () { __gthread_mutex_unlock(_M_mt._M_get()); } void _M_unlock()
{
if (__threads_enabled)
{
if (__builtin_expect(_M_locked, true))
{
__gthread_mutex_unlock(_M_pmt->_M_get());
_M_locked = false;
}
}
}
~_Lock() { this->_M_unlock(); }
}; };
#endif
namespace __aux_balloc { namespace __aux_balloc {
static const unsigned int _Bits_Per_Byte = 8; static const unsigned int _Bits_Per_Byte = 8;
static const unsigned int _Bits_Per_Block = sizeof(unsigned int) * _Bits_Per_Byte; static const unsigned int _Bits_Per_Block = sizeof(unsigned int) * _Bits_Per_Byte;
@ -148,7 +179,8 @@ namespace __gnu_cxx
//T should be a pointer type, and A is the Allocator for the vector. //T should be a pointer type, and A is the Allocator for the vector.
template <typename _Tp, typename _Alloc> template <typename _Tp, typename _Alloc>
class _Ffit_finder : public std::unary_function<typename std::pair<_Tp, _Tp>, bool> { class _Ffit_finder
: public std::unary_function<typename std::pair<_Tp, _Tp>, bool> {
typedef typename std::vector<std::pair<_Tp, _Tp>, _Alloc> _BPVector; typedef typename std::vector<std::pair<_Tp, _Tp>, _Alloc> _BPVector;
typedef typename _BPVector::difference_type _Counter_type; typedef typename _BPVector::difference_type _Counter_type;
typedef typename std::pair<_Tp, _Tp> _Block_pair; typedef typename std::pair<_Tp, _Tp> _Block_pair;
@ -157,7 +189,9 @@ namespace __gnu_cxx
unsigned int _M_data_offset; unsigned int _M_data_offset;
public: public:
_Ffit_finder () : _M_pbitmap (0), _M_data_offset (0) { } _Ffit_finder ()
: _M_pbitmap (0), _M_data_offset (0)
{ }
bool operator() (_Block_pair __bp) throw() bool operator() (_Block_pair __bp) throw()
{ {
@ -214,7 +248,8 @@ namespace __gnu_cxx
//Use the 2nd parameter with care. Make sure that such an entry //Use the 2nd parameter with care. Make sure that such an entry
//exists in the vector before passing that particular index to //exists in the vector before passing that particular index to
//this ctor. //this ctor.
_Bit_map_counter (_BPVector& Rvbp, int __index = -1) : _M_vbp(Rvbp) _Bit_map_counter (_BPVector& Rvbp, int __index = -1)
: _M_vbp(Rvbp)
{ {
this->_M_reset(__index); this->_M_reset(__index);
} }
@ -238,7 +273,7 @@ namespace __gnu_cxx
} }
//Dangerous Function! Use with extreme care. Pass to this //Dangerous Function! Use with extreme care. Pass to this
//functions ONLY those values that are known to be correct, //function ONLY those values that are known to be correct,
//otherwise this will mess up big time. //otherwise this will mess up big time.
void _M_set_internal_bit_map (unsigned int *__new_internal_marker) throw() void _M_set_internal_bit_map (unsigned int *__new_internal_marker) throw()
{ {
@ -275,28 +310,22 @@ namespace __gnu_cxx
return _M_curr_bmap; return _M_curr_bmap;
} }
pointer base () { return _M_vbp[_M_curr_index].first; } pointer _M_base () { return _M_vbp[_M_curr_index].first; }
unsigned int _M_offset () unsigned int _M_offset ()
{ {
return _Bits_Per_Block * ((reinterpret_cast<unsigned int*>(this->base()) - _M_curr_bmap) - 1); return _Bits_Per_Block * ((reinterpret_cast<unsigned int*>(this->_M_base()) - _M_curr_bmap) - 1);
} }
unsigned int _M_where () { return _M_curr_index; } unsigned int _M_where () { return _M_curr_index; }
}; };
} }
//Generic Version of the bsf instruction. //Generic Version of the bsf instruction.
typedef unsigned int _Bit_map_type; typedef unsigned int _Bit_map_type;
static inline unsigned int _Bit_scan_forward (_Bit_map_type __num) static inline unsigned int _Bit_scan_forward (register _Bit_map_type __num)
{ {
unsigned int __ret_val = 0; return static_cast<unsigned int>(__builtin_ctz(__num));
while (__num % 2 == 0) }
{
++__ret_val;
__num >>= 1;
}
return __ret_val;
}
struct _OOM_handler { struct _OOM_handler {
static std::new_handler _S_old_handler; static std::new_handler _S_old_handler;
@ -347,8 +376,8 @@ namespace __gnu_cxx
static void _S_validate_free_list(unsigned int *__addr) throw() static void _S_validate_free_list(unsigned int *__addr) throw()
{ {
const unsigned int Max_Size = 64; const unsigned int __max_size = 64;
if (_S_free_list.size() >= Max_Size) if (_S_free_list.size() >= __max_size)
{ {
//Ok, the threshold value has been reached. //Ok, the threshold value has been reached.
//We determine which block to remove from the list of free //We determine which block to remove from the list of free
@ -380,10 +409,9 @@ namespace __gnu_cxx
static bool _S_should_i_give(unsigned int __block_size, unsigned int __required_size) throw() static bool _S_should_i_give(unsigned int __block_size, unsigned int __required_size) throw()
{ {
const unsigned int Max_Wastage_Percentage = 36; const unsigned int __max_wastage_percentage = 36;
if (__block_size >= __required_size && if (__block_size >= __required_size &&
(((__block_size - __required_size) * 100 / __block_size) < Max_Wastage_Percentage)) (((__block_size - __required_size) * 100 / __block_size) < __max_wastage_percentage))
return true; return true;
else else
return false; return false;
@ -395,7 +423,7 @@ namespace __gnu_cxx
static inline void _S_insert_free_list(unsigned int *__addr) throw() static inline void _S_insert_free_list(unsigned int *__addr) throw()
{ {
#if defined __GTHREADS #if defined __GTHREADS
_Lock __bfl_lock(*&_S_bfl_mutex); _Lock __bfl_lock(&_S_bfl_mutex);
#endif #endif
//Call _S_validate_free_list to decide what should be done with this //Call _S_validate_free_list to decide what should be done with this
//particular free list. //particular free list.
@ -405,12 +433,14 @@ namespace __gnu_cxx
static unsigned int *_S_get_free_list(unsigned int __sz) throw (std::bad_alloc) static unsigned int *_S_get_free_list(unsigned int __sz) throw (std::bad_alloc)
{ {
#if defined __GTHREADS #if defined __GTHREADS
_Lock __bfl_lock(*&_S_bfl_mutex); _Lock __bfl_lock(&_S_bfl_mutex);
#endif #endif
_FLIter __temp = std::lower_bound(_S_free_list.begin(), _S_free_list.end(), _FLIter __temp = std::lower_bound(_S_free_list.begin(), _S_free_list.end(),
__sz, _LT_pointer_compare()); __sz, _LT_pointer_compare());
if (__temp == _S_free_list.end() || !_S_should_i_give (**__temp, __sz)) if (__temp == _S_free_list.end() || !_S_should_i_give (**__temp, __sz))
{ {
//We hold the lock because the OOM_Handler is a stateless
//entity.
_OOM_handler __set_handler(_BFL_type::_S_clear); _OOM_handler __set_handler(_BFL_type::_S_clear);
unsigned int *__ret_val = reinterpret_cast<unsigned int*> unsigned int *__ret_val = reinterpret_cast<unsigned int*>
(operator new (__sz + sizeof(unsigned int))); (operator new (__sz + sizeof(unsigned int)));
@ -430,7 +460,7 @@ namespace __gnu_cxx
static void _S_clear() static void _S_clear()
{ {
#if defined __GTHREADS #if defined __GTHREADS
_Lock __bfl_lock(*&_S_bfl_mutex); _Lock __bfl_lock(&_S_bfl_mutex);
#endif #endif
_FLIter __iter = _S_free_list.begin(); _FLIter __iter = _S_free_list.begin();
while (__iter != _S_free_list.end()) while (__iter != _S_free_list.end())
@ -448,7 +478,7 @@ namespace __gnu_cxx
#endif #endif
std::vector<unsigned int*> _BA_free_list_store::_S_free_list; std::vector<unsigned int*> _BA_free_list_store::_S_free_list;
template <class _Tp> class bitmap_allocator; template <typename _Tp> class bitmap_allocator;
// specialize for void: // specialize for void:
template <> class bitmap_allocator<void> { template <> class bitmap_allocator<void> {
public: public:
@ -456,10 +486,10 @@ namespace __gnu_cxx
typedef const void* const_pointer; typedef const void* const_pointer;
// reference-to-void members are impossible. // reference-to-void members are impossible.
typedef void value_type; typedef void value_type;
template <class U> struct rebind { typedef bitmap_allocator<U> other; }; template <typename _Tp1> struct rebind { typedef bitmap_allocator<_Tp1> other; };
}; };
template <class _Tp> class bitmap_allocator : private _BA_free_list_store { template <typename _Tp> class bitmap_allocator : private _BA_free_list_store {
public: public:
typedef size_t size_type; typedef size_t size_type;
typedef ptrdiff_t difference_type; typedef ptrdiff_t difference_type;
@ -468,7 +498,7 @@ namespace __gnu_cxx
typedef _Tp& reference; typedef _Tp& reference;
typedef const _Tp& const_reference; typedef const _Tp& const_reference;
typedef _Tp value_type; typedef _Tp value_type;
template <class U> struct rebind { typedef bitmap_allocator<U> other; }; template <typename _Tp1> struct rebind { typedef bitmap_allocator<_Tp1> other; };
private: private:
static const unsigned int _Bits_Per_Byte = 8; static const unsigned int _Bits_Per_Byte = 8;
@ -481,9 +511,9 @@ namespace __gnu_cxx
*__pbmap &= __mask; *__pbmap &= __mask;
} }
static inline void _S_bit_free(unsigned int *__pbmap, unsigned int __Pos) throw() static inline void _S_bit_free(unsigned int *__pbmap, unsigned int __pos) throw()
{ {
unsigned int __mask = 1 << __Pos; unsigned int __mask = 1 << __pos;
*__pbmap |= __mask; *__pbmap |= __mask;
} }
@ -565,18 +595,6 @@ namespace __gnu_cxx
static _Mutex _S_mut; static _Mutex _S_mut;
#endif #endif
public:
bitmap_allocator() throw()
{ }
bitmap_allocator(const bitmap_allocator&) { }
template <typename _Tp1> bitmap_allocator(const bitmap_allocator<_Tp1>&) throw()
{ }
~bitmap_allocator() throw()
{ }
//Complexity: Worst case complexity is O(N), but that is hardly ever //Complexity: Worst case complexity is O(N), but that is hardly ever
//hit. if and when this particular case is encountered, the next few //hit. if and when this particular case is encountered, the next few
//cases are guaranteed to have a worst case complexity of O(1)! //cases are guaranteed to have a worst case complexity of O(1)!
@ -586,22 +604,27 @@ namespace __gnu_cxx
static pointer _S_allocate_single_object() static pointer _S_allocate_single_object()
{ {
#if defined __GTHREADS #if defined __GTHREADS
_Lock _bit_lock(*&_S_mut); _Lock __bit_lock(&_S_mut);
#endif #endif
//The algorithm is something like this: The last_requst variable //The algorithm is something like this: The last_requst variable
//points to the last accessed Bit Map. When such a condition //points to the last accessed Bit Map. When such a condition
//occurs, we try to find a free block in the current bitmap, or //occurs, we try to find a free block in the current bitmap, or
//succeeding bitmaps until the last bitmap is reached. If no free //succeeding bitmaps until the last bitmap is reached. If no free
//block turns up, we resort to First Fit method. But, again, the //block turns up, we resort to First Fit method.
//First Fit is used only upto the point where we started the
//previous linear search.
//WARNING: Do not re-order the condition in the while statement
//below, because it relies on C++'s short-circuit
//evaluation. The return from _S_last_request->_M_get() will NOT
//be dereferenceable if _S_last_request->_M_finished() returns
//true. This would inevitibly lead to a NULL pointer dereference
//if tinkered with.
while (_S_last_request._M_finished() == false && (*(_S_last_request._M_get()) == 0)) while (_S_last_request._M_finished() == false && (*(_S_last_request._M_get()) == 0))
{ {
_S_last_request.operator++(); _S_last_request.operator++();
} }
if (_S_last_request._M_finished()) if (__builtin_expect(_S_last_request._M_finished() == true, false))
{ {
//Fall Back to First Fit algorithm. //Fall Back to First Fit algorithm.
typedef typename __gnu_cxx::__aux_balloc::_Ffit_finder<pointer, _BPVec_allocator_type> _FFF; typedef typename __gnu_cxx::__aux_balloc::_Ffit_finder<pointer, _BPVec_allocator_type> _FFF;
@ -645,7 +668,7 @@ namespace __gnu_cxx
unsigned int __nz_bit = _Bit_scan_forward(*_S_last_request._M_get()); unsigned int __nz_bit = _Bit_scan_forward(*_S_last_request._M_get());
_S_bit_allocate(_S_last_request._M_get(), __nz_bit); _S_bit_allocate(_S_last_request._M_get(), __nz_bit);
pointer __ret_val = _S_last_request.base() + _S_last_request._M_offset() + __nz_bit; pointer __ret_val = _S_last_request._M_base() + _S_last_request._M_offset() + __nz_bit;
unsigned int *__puse_count = reinterpret_cast<unsigned int*> unsigned int *__puse_count = reinterpret_cast<unsigned int*>
(_S_mem_blocks[_S_last_request._M_where()].first) - (_S_mem_blocks[_S_last_request._M_where()].first) -
@ -654,49 +677,19 @@ namespace __gnu_cxx
return __ret_val; return __ret_val;
} }
//Complexity: O(1), but internally the complexity depends upon the
//complexity of the function(s) _S_allocate_single_object and
//_S_memory_get.
pointer allocate(size_type __n)
{
if (__n == 1)
return _S_allocate_single_object();
else
return reinterpret_cast<pointer>(_S_memory_get(__n * sizeof(value_type)));
}
//Complexity: Worst case complexity is O(N) where N is the number of
//blocks of size sizeof(value_type) within the free lists that the
//allocator holds. However, this worst case is hit only when the
//user supplies a bogus argument to hint. If the hint argument is
//sensible, then the complexity drops to O(lg(N)), and in extreme
//cases, even drops to as low as O(1). So, if the user supplied
//argument is good, then this function performs very well.
pointer allocate(size_type __n, typename bitmap_allocator<void>::const_pointer)
{
return allocate(__n);
}
void deallocate(pointer __p, size_type __n) throw()
{
if (__n == 1)
_S_deallocate_single_object(__p);
else
_S_memory_put(__p);
}
//Complexity: O(lg(N)), but the worst case is hit quite often! I //Complexity: O(lg(N)), but the worst case is hit quite often! I
//need to do something about this. I'll be able to work on it, only //need to do something about this. I'll be able to work on it, only
//when I have some solid figures from a few real apps. //when I have some solid figures from a few real apps.
static void _S_deallocate_single_object(pointer __p) throw() static void _S_deallocate_single_object(pointer __p) throw()
{ {
#if defined __GTHREADS #if defined __GTHREADS
_Lock _bit_lock(*&_S_mut); _Lock __bit_lock(&_S_mut);
#endif #endif
typedef typename _BPVector::iterator iterator;
typedef typename _BPVector::difference_type diff_type;
diff_type __diff; typedef typename _BPVector::iterator _Iterator;
typedef typename _BPVector::difference_type _Difference_type;
_Difference_type __diff;
int __displacement; int __displacement;
assert(_S_last_dealloc_index >= 0); assert(_S_last_dealloc_index >= 0);
@ -711,7 +704,7 @@ namespace __gnu_cxx
} }
else else
{ {
iterator _iter = (std::find_if(_S_mem_blocks.begin(), _S_mem_blocks.end(), _Iterator _iter = (std::find_if(_S_mem_blocks.begin(), _S_mem_blocks.end(),
__gnu_cxx::__aux_balloc::_Inclusive_between<pointer>(__p))); __gnu_cxx::__aux_balloc::_Inclusive_between<pointer>(__p)));
assert(_iter != _S_mem_blocks.end()); assert(_iter != _S_mem_blocks.end());
@ -734,7 +727,7 @@ namespace __gnu_cxx
--(*__puse_count); --(*__puse_count);
if (!*__puse_count) if (__builtin_expect(*__puse_count == 0, false))
{ {
_S_block_size /= 2; _S_block_size /= 2;
@ -744,12 +737,12 @@ namespace __gnu_cxx
_S_mem_blocks.erase(_S_mem_blocks.begin() + __diff); _S_mem_blocks.erase(_S_mem_blocks.begin() + __diff);
//We reset the _S_last_request variable to reflect the erased //We reset the _S_last_request variable to reflect the erased
//block. We do this to pretect future requests after the last //block. We do this to protect future requests after the last
//block has been removed from a particular memory Chunk, //block has been removed from a particular memory Chunk,
//which in turn has been returned to the free list, and //which in turn has been returned to the free list, and
//hence had been erased from the vector, so the size of the //hence had been erased from the vector, so the size of the
//vector gets reduced by 1. //vector gets reduced by 1.
if ((diff_type)_S_last_request._M_where() >= __diff--) if ((_Difference_type)_S_last_request._M_where() >= __diff--)
{ {
_S_last_request._M_reset(__diff); _S_last_request._M_reset(__diff);
// assert(__diff >= 0); // assert(__diff >= 0);
@ -768,14 +761,57 @@ namespace __gnu_cxx
} }
} }
public:
bitmap_allocator() throw()
{ }
bitmap_allocator(const bitmap_allocator&) { }
template <typename _Tp1> bitmap_allocator(const bitmap_allocator<_Tp1>&) throw()
{ }
~bitmap_allocator() throw()
{ }
//Complexity: O(1), but internally the complexity depends upon the
//complexity of the function(s) _S_allocate_single_object and
//_S_memory_get.
pointer allocate(size_type __n)
{
if (__builtin_expect(__n == 1, true))
return _S_allocate_single_object();
else
return reinterpret_cast<pointer>(_S_memory_get(__n * sizeof(value_type)));
}
//Complexity: Worst case complexity is O(N) where N is the number of
//blocks of size sizeof(value_type) within the free lists that the
//allocator holds. However, this worst case is hit only when the
//user supplies a bogus argument to hint. If the hint argument is
//sensible, then the complexity drops to O(lg(N)), and in extreme
//cases, even drops to as low as O(1). So, if the user supplied
//argument is good, then this function performs very well.
pointer allocate(size_type __n, typename bitmap_allocator<void>::const_pointer)
{
return allocate(__n);
}
void deallocate(pointer __p, size_type __n) throw()
{
if (__builtin_expect(__n == 1, true))
_S_deallocate_single_object(__p);
else
_S_memory_put(__p);
}
pointer address(reference r) const { return &r; } pointer address(reference r) const { return &r; }
const_pointer address(const_reference r) const { return &r; } const_pointer address(const_reference r) const { return &r; }
size_type max_size(void) const throw() { return (size_type()-1)/sizeof(value_type); } size_type max_size(void) const throw() { return (size_type()-1)/sizeof(value_type); }
void construct (pointer p, const_reference _data) void construct (pointer p, const_reference __data)
{ {
new (p) value_type (_data); ::new(p) value_type(__data);
} }
void destroy (pointer p) void destroy (pointer p)