From 71f9a9d15e0deaa0b6b80f4ad146f0840cb483b0 Mon Sep 17 00:00:00 2001 From: Dhruv Matani Date: Wed, 24 Mar 2004 18:27:43 +0000 Subject: [PATCH] bitmap_allocator.h: (_Bit_scan_forward) -> Made this function call __builtin_ctz instead of the... 2004-03-24 Dhruv Matani * include/ext/bitmap_allocator.h: (_Bit_scan_forward) -> Made this function call __builtin_ctz instead of the while loop. (allocate) -> If condition has __builtin_expect. (deallocate) -> Ditto. Renamed a few left-over variables and typedefs according to the C++STYLE mentioned in the documentation. Protected calls to __gthread* by __gthread_active_p(), whose value is cached in the local variable __threads_active. From-SVN: r79924 --- libstdc++-v3/ChangeLog | 11 + libstdc++-v3/include/ext/bitmap_allocator.h | 238 +++++++++++--------- 2 files changed, 148 insertions(+), 101 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 73096ffc3de..7328e0c1dcf 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,14 @@ +2004-03-24 Dhruv Matani + + * include/ext/bitmap_allocator.h: (_Bit_scan_forward) -> Made this + function call __builtin_ctz instead of the while loop. + (allocate) -> If condition has __builtin_expect. + (deallocate) -> Ditto. + Renamed a few left-over variables and typedefs according to the + C++STYLE mentioned in the documentation. + Protected calls to __gthread* by __gthread_active_p(), whose value + is cached in the local variable __threads_active. + 2004-03-24 Felix Yen * testsuite/performance/20_util/allocator/producer_consumer.cc: diff --git a/libstdc++-v3/include/ext/bitmap_allocator.h b/libstdc++-v3/include/ext/bitmap_allocator.h index 71b278b30dc..9a0d1620984 100644 --- a/libstdc++-v3/include/ext/bitmap_allocator.h +++ b/libstdc++-v3/include/ext/bitmap_allocator.h @@ -37,7 +37,7 @@ #include //For std::pair. #include -//std::find_if. +//std::find_if, and std::lower_bound. #include //For the free list of exponentially growing memory blocks. At max, //size of the vector should be not more than the number of bits in an @@ -55,10 +55,18 @@ #define NDEBUG //#define CHECK_FOR_ERRORS +//#define __CPU_HAS_BACKWARD_BRANCH_PREDICTION namespace __gnu_cxx { + namespace { +#if defined __GTHREADS + bool const __threads_enabled = __gthread_active_p(); +#endif + } + +#if defined __GTHREADS class _Mutex { __gthread_mutex_t _M_mut; //Prevent Copying and assignment. @@ -67,12 +75,15 @@ namespace __gnu_cxx public: _Mutex () { + if (__threads_enabled) + { #if !defined __GTHREAD_MUTEX_INIT - __GTHREAD_MUTEX_INIT_FUNCTION(&_M_mut); + __GTHREAD_MUTEX_INIT_FUNCTION(&_M_mut); #else - __gthread_mutex_t __mtemp = __GTHREAD_MUTEX_INIT; - _M_mut = __mtemp; + __gthread_mutex_t __mtemp = __GTHREAD_MUTEX_INIT; + _M_mut = __mtemp; #endif + } } ~_Mutex () { @@ -81,22 +92,42 @@ namespace __gnu_cxx __gthread_mutex_t *_M_get() { return &_M_mut; } }; - class _Lock { - _Mutex& _M_mt; + _Mutex* _M_pmt; + bool _M_locked; //Prevent Copying and assignment. _Lock (_Lock const&); _Lock& operator= (_Lock const&); public: - _Lock (_Mutex& __mref) : _M_mt(__mref) + _Lock(_Mutex* __mptr) + : _M_pmt(__mptr), _M_locked(false) + { this->_M_lock(); } + void _M_lock() { - __gthread_mutex_lock(_M_mt._M_get()); + if (__threads_enabled) + { + _M_locked = true; + __gthread_mutex_lock(_M_pmt->_M_get()); + } } - ~_Lock () { __gthread_mutex_unlock(_M_mt._M_get()); } + void _M_unlock() + { + if (__threads_enabled) + { + if (__builtin_expect(_M_locked, true)) + { + __gthread_mutex_unlock(_M_pmt->_M_get()); + _M_locked = false; + } + } + } + ~_Lock() { this->_M_unlock(); } }; +#endif + + namespace __aux_balloc { - static const unsigned int _Bits_Per_Byte = 8; static const unsigned int _Bits_Per_Block = sizeof(unsigned int) * _Bits_Per_Byte; @@ -148,7 +179,8 @@ namespace __gnu_cxx //T should be a pointer type, and A is the Allocator for the vector. template - class _Ffit_finder : public std::unary_function, bool> { + class _Ffit_finder + : public std::unary_function, bool> { typedef typename std::vector, _Alloc> _BPVector; typedef typename _BPVector::difference_type _Counter_type; typedef typename std::pair<_Tp, _Tp> _Block_pair; @@ -157,7 +189,9 @@ namespace __gnu_cxx unsigned int _M_data_offset; public: - _Ffit_finder () : _M_pbitmap (0), _M_data_offset (0) { } + _Ffit_finder () + : _M_pbitmap (0), _M_data_offset (0) + { } bool operator() (_Block_pair __bp) throw() { @@ -214,7 +248,8 @@ namespace __gnu_cxx //Use the 2nd parameter with care. Make sure that such an entry //exists in the vector before passing that particular index to //this ctor. - _Bit_map_counter (_BPVector& Rvbp, int __index = -1) : _M_vbp(Rvbp) + _Bit_map_counter (_BPVector& Rvbp, int __index = -1) + : _M_vbp(Rvbp) { this->_M_reset(__index); } @@ -238,7 +273,7 @@ namespace __gnu_cxx } //Dangerous Function! Use with extreme care. Pass to this - //functions ONLY those values that are known to be correct, + //function ONLY those values that are known to be correct, //otherwise this will mess up big time. void _M_set_internal_bit_map (unsigned int *__new_internal_marker) throw() { @@ -275,28 +310,22 @@ namespace __gnu_cxx return _M_curr_bmap; } - pointer base () { return _M_vbp[_M_curr_index].first; } + pointer _M_base () { return _M_vbp[_M_curr_index].first; } unsigned int _M_offset () { - return _Bits_Per_Block * ((reinterpret_cast(this->base()) - _M_curr_bmap) - 1); + return _Bits_Per_Block * ((reinterpret_cast(this->_M_base()) - _M_curr_bmap) - 1); } unsigned int _M_where () { return _M_curr_index; } }; } - //Generic Version of the bsf instruction. - typedef unsigned int _Bit_map_type; - static inline unsigned int _Bit_scan_forward (_Bit_map_type __num) - { - unsigned int __ret_val = 0; - while (__num % 2 == 0) - { - ++__ret_val; - __num >>= 1; - } - return __ret_val; - } + //Generic Version of the bsf instruction. + typedef unsigned int _Bit_map_type; + static inline unsigned int _Bit_scan_forward (register _Bit_map_type __num) + { + return static_cast(__builtin_ctz(__num)); + } struct _OOM_handler { static std::new_handler _S_old_handler; @@ -347,8 +376,8 @@ namespace __gnu_cxx static void _S_validate_free_list(unsigned int *__addr) throw() { - const unsigned int Max_Size = 64; - if (_S_free_list.size() >= Max_Size) + const unsigned int __max_size = 64; + if (_S_free_list.size() >= __max_size) { //Ok, the threshold value has been reached. //We determine which block to remove from the list of free @@ -380,10 +409,9 @@ namespace __gnu_cxx static bool _S_should_i_give(unsigned int __block_size, unsigned int __required_size) throw() { - const unsigned int Max_Wastage_Percentage = 36; - + const unsigned int __max_wastage_percentage = 36; if (__block_size >= __required_size && - (((__block_size - __required_size) * 100 / __block_size) < Max_Wastage_Percentage)) + (((__block_size - __required_size) * 100 / __block_size) < __max_wastage_percentage)) return true; else return false; @@ -395,7 +423,7 @@ namespace __gnu_cxx static inline void _S_insert_free_list(unsigned int *__addr) throw() { #if defined __GTHREADS - _Lock __bfl_lock(*&_S_bfl_mutex); + _Lock __bfl_lock(&_S_bfl_mutex); #endif //Call _S_validate_free_list to decide what should be done with this //particular free list. @@ -405,12 +433,14 @@ namespace __gnu_cxx static unsigned int *_S_get_free_list(unsigned int __sz) throw (std::bad_alloc) { #if defined __GTHREADS - _Lock __bfl_lock(*&_S_bfl_mutex); + _Lock __bfl_lock(&_S_bfl_mutex); #endif _FLIter __temp = std::lower_bound(_S_free_list.begin(), _S_free_list.end(), __sz, _LT_pointer_compare()); if (__temp == _S_free_list.end() || !_S_should_i_give (**__temp, __sz)) { + //We hold the lock because the OOM_Handler is a stateless + //entity. _OOM_handler __set_handler(_BFL_type::_S_clear); unsigned int *__ret_val = reinterpret_cast (operator new (__sz + sizeof(unsigned int))); @@ -430,7 +460,7 @@ namespace __gnu_cxx static void _S_clear() { #if defined __GTHREADS - _Lock __bfl_lock(*&_S_bfl_mutex); + _Lock __bfl_lock(&_S_bfl_mutex); #endif _FLIter __iter = _S_free_list.begin(); while (__iter != _S_free_list.end()) @@ -448,7 +478,7 @@ namespace __gnu_cxx #endif std::vector _BA_free_list_store::_S_free_list; - template class bitmap_allocator; + template class bitmap_allocator; // specialize for void: template <> class bitmap_allocator { public: @@ -456,10 +486,10 @@ namespace __gnu_cxx typedef const void* const_pointer; // reference-to-void members are impossible. typedef void value_type; - template struct rebind { typedef bitmap_allocator other; }; + template struct rebind { typedef bitmap_allocator<_Tp1> other; }; }; - template class bitmap_allocator : private _BA_free_list_store { + template class bitmap_allocator : private _BA_free_list_store { public: typedef size_t size_type; typedef ptrdiff_t difference_type; @@ -468,7 +498,7 @@ namespace __gnu_cxx typedef _Tp& reference; typedef const _Tp& const_reference; typedef _Tp value_type; - template struct rebind { typedef bitmap_allocator other; }; + template struct rebind { typedef bitmap_allocator<_Tp1> other; }; private: static const unsigned int _Bits_Per_Byte = 8; @@ -481,9 +511,9 @@ namespace __gnu_cxx *__pbmap &= __mask; } - static inline void _S_bit_free(unsigned int *__pbmap, unsigned int __Pos) throw() + static inline void _S_bit_free(unsigned int *__pbmap, unsigned int __pos) throw() { - unsigned int __mask = 1 << __Pos; + unsigned int __mask = 1 << __pos; *__pbmap |= __mask; } @@ -565,18 +595,6 @@ namespace __gnu_cxx static _Mutex _S_mut; #endif - public: - bitmap_allocator() throw() - { } - - bitmap_allocator(const bitmap_allocator&) { } - - template bitmap_allocator(const bitmap_allocator<_Tp1>&) throw() - { } - - ~bitmap_allocator() throw() - { } - //Complexity: Worst case complexity is O(N), but that is hardly ever //hit. if and when this particular case is encountered, the next few //cases are guaranteed to have a worst case complexity of O(1)! @@ -586,22 +604,27 @@ namespace __gnu_cxx static pointer _S_allocate_single_object() { #if defined __GTHREADS - _Lock _bit_lock(*&_S_mut); + _Lock __bit_lock(&_S_mut); #endif + //The algorithm is something like this: The last_requst variable //points to the last accessed Bit Map. When such a condition //occurs, we try to find a free block in the current bitmap, or //succeeding bitmaps until the last bitmap is reached. If no free - //block turns up, we resort to First Fit method. But, again, the - //First Fit is used only upto the point where we started the - //previous linear search. + //block turns up, we resort to First Fit method. + //WARNING: Do not re-order the condition in the while statement + //below, because it relies on C++'s short-circuit + //evaluation. The return from _S_last_request->_M_get() will NOT + //be dereferenceable if _S_last_request->_M_finished() returns + //true. This would inevitibly lead to a NULL pointer dereference + //if tinkered with. while (_S_last_request._M_finished() == false && (*(_S_last_request._M_get()) == 0)) { _S_last_request.operator++(); } - if (_S_last_request._M_finished()) + if (__builtin_expect(_S_last_request._M_finished() == true, false)) { //Fall Back to First Fit algorithm. typedef typename __gnu_cxx::__aux_balloc::_Ffit_finder _FFF; @@ -645,7 +668,7 @@ namespace __gnu_cxx unsigned int __nz_bit = _Bit_scan_forward(*_S_last_request._M_get()); _S_bit_allocate(_S_last_request._M_get(), __nz_bit); - pointer __ret_val = _S_last_request.base() + _S_last_request._M_offset() + __nz_bit; + pointer __ret_val = _S_last_request._M_base() + _S_last_request._M_offset() + __nz_bit; unsigned int *__puse_count = reinterpret_cast (_S_mem_blocks[_S_last_request._M_where()].first) - @@ -654,49 +677,19 @@ namespace __gnu_cxx return __ret_val; } - //Complexity: O(1), but internally the complexity depends upon the - //complexity of the function(s) _S_allocate_single_object and - //_S_memory_get. - pointer allocate(size_type __n) - { - if (__n == 1) - return _S_allocate_single_object(); - else - return reinterpret_cast(_S_memory_get(__n * sizeof(value_type))); - } - - //Complexity: Worst case complexity is O(N) where N is the number of - //blocks of size sizeof(value_type) within the free lists that the - //allocator holds. However, this worst case is hit only when the - //user supplies a bogus argument to hint. If the hint argument is - //sensible, then the complexity drops to O(lg(N)), and in extreme - //cases, even drops to as low as O(1). So, if the user supplied - //argument is good, then this function performs very well. - pointer allocate(size_type __n, typename bitmap_allocator::const_pointer) - { - return allocate(__n); - } - - void deallocate(pointer __p, size_type __n) throw() - { - if (__n == 1) - _S_deallocate_single_object(__p); - else - _S_memory_put(__p); - } - //Complexity: O(lg(N)), but the worst case is hit quite often! I //need to do something about this. I'll be able to work on it, only //when I have some solid figures from a few real apps. static void _S_deallocate_single_object(pointer __p) throw() { #if defined __GTHREADS - _Lock _bit_lock(*&_S_mut); + _Lock __bit_lock(&_S_mut); #endif - typedef typename _BPVector::iterator iterator; - typedef typename _BPVector::difference_type diff_type; - diff_type __diff; + typedef typename _BPVector::iterator _Iterator; + typedef typename _BPVector::difference_type _Difference_type; + + _Difference_type __diff; int __displacement; assert(_S_last_dealloc_index >= 0); @@ -711,7 +704,7 @@ namespace __gnu_cxx } else { - iterator _iter = (std::find_if(_S_mem_blocks.begin(), _S_mem_blocks.end(), + _Iterator _iter = (std::find_if(_S_mem_blocks.begin(), _S_mem_blocks.end(), __gnu_cxx::__aux_balloc::_Inclusive_between(__p))); assert(_iter != _S_mem_blocks.end()); @@ -734,7 +727,7 @@ namespace __gnu_cxx --(*__puse_count); - if (!*__puse_count) + if (__builtin_expect(*__puse_count == 0, false)) { _S_block_size /= 2; @@ -744,12 +737,12 @@ namespace __gnu_cxx _S_mem_blocks.erase(_S_mem_blocks.begin() + __diff); //We reset the _S_last_request variable to reflect the erased - //block. We do this to pretect future requests after the last + //block. We do this to protect future requests after the last //block has been removed from a particular memory Chunk, //which in turn has been returned to the free list, and //hence had been erased from the vector, so the size of the //vector gets reduced by 1. - if ((diff_type)_S_last_request._M_where() >= __diff--) + if ((_Difference_type)_S_last_request._M_where() >= __diff--) { _S_last_request._M_reset(__diff); // assert(__diff >= 0); @@ -768,14 +761,57 @@ namespace __gnu_cxx } } + public: + bitmap_allocator() throw() + { } + + bitmap_allocator(const bitmap_allocator&) { } + + template bitmap_allocator(const bitmap_allocator<_Tp1>&) throw() + { } + + ~bitmap_allocator() throw() + { } + + //Complexity: O(1), but internally the complexity depends upon the + //complexity of the function(s) _S_allocate_single_object and + //_S_memory_get. + pointer allocate(size_type __n) + { + if (__builtin_expect(__n == 1, true)) + return _S_allocate_single_object(); + else + return reinterpret_cast(_S_memory_get(__n * sizeof(value_type))); + } + + //Complexity: Worst case complexity is O(N) where N is the number of + //blocks of size sizeof(value_type) within the free lists that the + //allocator holds. However, this worst case is hit only when the + //user supplies a bogus argument to hint. If the hint argument is + //sensible, then the complexity drops to O(lg(N)), and in extreme + //cases, even drops to as low as O(1). So, if the user supplied + //argument is good, then this function performs very well. + pointer allocate(size_type __n, typename bitmap_allocator::const_pointer) + { + return allocate(__n); + } + + void deallocate(pointer __p, size_type __n) throw() + { + if (__builtin_expect(__n == 1, true)) + _S_deallocate_single_object(__p); + else + _S_memory_put(__p); + } + pointer address(reference r) const { return &r; } const_pointer address(const_reference r) const { return &r; } size_type max_size(void) const throw() { return (size_type()-1)/sizeof(value_type); } - void construct (pointer p, const_reference _data) + void construct (pointer p, const_reference __data) { - new (p) value_type (_data); + ::new(p) value_type(__data); } void destroy (pointer p)