// Internal header for TR1 unordered_set and unordered_map -*- C++ -*- // Copyright (C) 2005, 2006 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 2, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License along // with this library; see the file COPYING. If not, write to the Free // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, // USA. // As a special exception, you may use this file as part of a free software // library without restriction. Specifically, if other files instantiate // templates or use macros or inline functions from this file, or you compile // this file and link it with other files to produce an executable, this // file does not by itself cause the resulting executable to be covered by // the GNU General Public License. This exception does not however // invalidate any other reasons why the executable file might be covered by // the GNU General Public License. /** @file * This is a TR1 C++ Library header. */ // This header file defines std::tr1::hashtable, which is used to // implement std::tr1::unordered_set, std::tr1::unordered_map, // std::tr1::unordered_multiset, and std::tr1::unordered_multimap. // hashtable has many template parameters, partly to accommodate // the differences between those four classes and partly to // accommodate policy choices that go beyond what TR1 calls for. // Class template hashtable attempts to encapsulate all reasonable // variation among hash tables that use chaining. It does not handle // open addressing. // References: // M. Austern, "A Proposal to Add Hash Tables to the Standard // Library (revision 4)," WG21 Document N1456=03-0039, 2003. // D. E. Knuth, The Art of Computer Programming, v. 3, Sorting and Searching. // A. Tavori and V. Dreizin, "Policy-Based Data Structures", 2004. // http://gcc.gnu.org/onlinedocs/libstdc++/ext/pb_ds/index.html #ifndef _TR1_HASHTABLE #define _TR1_HASHTABLE 1 #include // For std::pair #include #include #include #include #include #include #include // For true_type and false_type #include namespace std { _GLIBCXX_BEGIN_NAMESPACE(tr1) // Class template hashtable, class definition. // Meaning of class template hashtable's template parameters // Key and Value: arbitrary CopyConstructible types. // Allocator: an allocator type ([lib.allocator.requirements]) whose // value type is Value. // ExtractKey: function object that takes a object of type Value // and returns a value of type Key. // Equal: function object that takes two objects of type k and returns // a bool-like value that is true if the two objects are considered equal. // H1: the hash function. A unary function object with argument type // Key and result type size_t. Return values should be distributed // over the entire range [0, numeric_limits:::max()]. // H2: the range-hashing function (in the terminology of Tavori and // Dreizin). A binary function object whose argument types and result // type are all size_t. Given arguments r and N, the return value is // in the range [0, N). // H: the ranged hash function (Tavori and Dreizin). A binary function // whose argument types are Key and size_t and whose result type is // size_t. Given arguments k and N, the return value is in the range // [0, N). Default: h(k, N) = h2(h1(k), N). If H is anything other // than the default, H1 and H2 are ignored. // RehashPolicy: Policy class with three members, all of which govern // the bucket count. n_bkt(n) returns a bucket count no smaller // than n. bkt_for_elements(n) returns a bucket count appropriate // for an element count of n. need_rehash(n_bkt, n_elt, n_ins) // determines whether, if the current bucket count is n_bkt and the // current element count is n_elt, we need to increase the bucket // count. If so, returns make_pair(true, n), where n is the new // bucket count. If not, returns make_pair(false, ). // ??? Right now it is hard-wired that the number of buckets never // shrinks. Should we allow RehashPolicy to change that? // cache_hash_code: bool. true if we store the value of the hash // function along with the value. This is a time-space tradeoff. // Storing it may improve lookup speed by reducing the number of times // we need to call the Equal function. // constant_iterators: bool. true if iterator and const_iterator are // both constant iterator types. This is true for unordered_set and // unordered_multiset, false for unordered_map and unordered_multimap. // unique_keys: bool. true if the return value of hashtable::count(k) // is always at most one, false if it may be an arbitrary number. This // true for unordered_set and unordered_map, false for unordered_multiset // and unordered_multimap. template class hashtable : public detail::rehash_base >, public detail::hash_code_base, public detail::map_base > { public: typedef Allocator allocator_type; typedef Value value_type; typedef Key key_type; typedef Equal key_equal; // mapped_type, if present, comes from map_base. // hasher, if present, comes from hash_code_base. typedef typename Allocator::difference_type difference_type; typedef typename Allocator::size_type size_type; typedef typename Allocator::reference reference; typedef typename Allocator::const_reference const_reference; typedef detail::node_iterator local_iterator; typedef detail::node_const_iterator const_local_iterator; typedef detail::hashtable_iterator iterator; typedef detail::hashtable_const_iterator const_iterator; template friend struct detail::map_base; private: typedef detail::hash_node node; typedef typename Allocator::template rebind::other node_allocator_t; typedef typename Allocator::template rebind::other bucket_allocator_t; node_allocator_t m_node_allocator; node** m_buckets; size_type m_bucket_count; size_type m_element_count; RehashPolicy m_rehash_policy; node* m_allocate_node(const value_type& v); void m_deallocate_node(node* n); void m_deallocate_nodes(node**, size_type); node** m_allocate_buckets(size_type n); void m_deallocate_buckets(node**, size_type n); public: // Constructor, destructor, assignment, swap hashtable(size_type bucket_hint, const H1&, const H2&, const H&, const Equal&, const ExtractKey&, const allocator_type&); template hashtable(InIter first, InIter last, size_type bucket_hint, const H1&, const H2&, const H&, const Equal&, const ExtractKey&, const allocator_type&); hashtable(const hashtable&); hashtable& operator=(const hashtable&); ~hashtable(); void swap(hashtable&); // Basic container operations iterator begin() { iterator i(m_buckets); if (!i.m_cur_node) i.m_incr_bucket(); return i; } const_iterator begin() const { const_iterator i(m_buckets); if (!i.m_cur_node) i.m_incr_bucket(); return i; } iterator end() { return iterator(m_buckets + m_bucket_count); } const_iterator end() const { return const_iterator(m_buckets + m_bucket_count); } size_type size() const { return m_element_count; } bool empty() const { return size() == 0; } allocator_type get_allocator() const { return m_node_allocator; } size_type max_size() const { return m_node_allocator.max_size(); } // Observers key_equal key_eq() const { return this->m_eq; } // hash_function, if present, comes from hash_code_base. // Bucket operations size_type bucket_count() const { return m_bucket_count; } size_type max_bucket_count() const { return max_size(); } size_type bucket_size(size_type n) const { return std::distance(begin(n), end(n)); } size_type bucket(const key_type& k) const { return this->bucket_index(k, this->m_hash_code(k), this->m_bucket_count); } local_iterator begin(size_type n) { return local_iterator(m_buckets[n]); } local_iterator end(size_type) { return local_iterator(0); } const_local_iterator begin(size_type n) const { return const_local_iterator(m_buckets[n]); } const_local_iterator end(size_type) const { return const_local_iterator(0); } float load_factor() const { return static_cast(size()) / static_cast(bucket_count()); } // max_load_factor, if present, comes from rehash_base. // Generalization of max_load_factor. Extension, not found in TR1. Only // useful if RehashPolicy is something other than the default. const RehashPolicy& rehash_policy() const { return m_rehash_policy; } void rehash_policy(const RehashPolicy&); // Lookup. iterator find(const key_type& k); const_iterator find(const key_type& k) const; size_type count(const key_type& k) const; std::pair equal_range(const key_type& k); std::pair equal_range(const key_type& k) const; private: // Find, insert and erase helper functions // ??? This dispatching is a workaround for the fact that we don't // have partial specialization of member templates; it would be // better to just specialize insert on unique_keys. There may be a // cleaner workaround. typedef typename __gnu_cxx::__conditional_type, iterator>::__type Insert_Return_Type; typedef typename __gnu_cxx::__conditional_type, std::_Identity >::__type Insert_Conv_Type; node* m_find_node(node*, const key_type&, typename hashtable::hash_code_t) const; iterator m_insert_bucket(const value_type&, size_type, typename hashtable::hash_code_t); std::pair m_insert(const value_type&, std::tr1::true_type); iterator m_insert(const value_type&, std::tr1::false_type); void m_erase_node(node*, node**); public: // Insert and erase Insert_Return_Type insert(const value_type& v) { return m_insert(v, std::tr1::integral_constant()); } iterator insert(iterator, const value_type& v) { return iterator(Insert_Conv_Type()(this->insert(v))); } const_iterator insert(const_iterator, const value_type& v) { return const_iterator(Insert_Conv_Type()(this->insert(v))); } template void insert(InIter first, InIter last); iterator erase(iterator); const_iterator erase(const_iterator); size_type erase(const key_type&); iterator erase(iterator, iterator); const_iterator erase(const_iterator, const_iterator); void clear(); // Set number of buckets to be appropriate for container of n element. void rehash(size_type n); private: // Unconditionally change size of bucket array to n. void m_rehash(size_type n); }; // Definitions of class template hashtable's out-of-line member functions. template typename hashtable::node* hashtable:: m_allocate_node(const value_type& v) { node* n = m_node_allocator.allocate(1); try { get_allocator().construct(&n->m_v, v); n->m_next = 0; return n; } catch(...) { m_node_allocator.deallocate(n, 1); __throw_exception_again; } } template void hashtable:: m_deallocate_node(node* n) { get_allocator().destroy(&n->m_v); m_node_allocator.deallocate(n, 1); } template void hashtable:: m_deallocate_nodes(node** array, size_type n) { for (size_type i = 0; i < n; ++i) { node* p = array[i]; while (p) { node* tmp = p; p = p->m_next; m_deallocate_node(tmp); } array[i] = 0; } } template typename hashtable::node** hashtable:: m_allocate_buckets(size_type n) { bucket_allocator_t alloc(m_node_allocator); // We allocate one extra bucket to hold a sentinel, an arbitrary // non-null pointer. Iterator increment relies on this. node** p = alloc.allocate(n + 1); std::fill(p, p + n, (node*) 0); p[n] = reinterpret_cast(0x1000); return p; } template void hashtable:: m_deallocate_buckets(node** p, size_type n) { bucket_allocator_t alloc(m_node_allocator); alloc.deallocate(p, n + 1); } template hashtable:: hashtable(size_type bucket_hint, const H1& h1, const H2& h2, const H& h, const Eq& eq, const Ex& exk, const allocator_type& a) : detail::rehash_base(), detail::hash_code_base(exk, eq, h1, h2, h), detail::map_base(), m_node_allocator(a), m_bucket_count(0), m_element_count(0), m_rehash_policy() { m_bucket_count = m_rehash_policy.next_bkt(bucket_hint); m_buckets = m_allocate_buckets(m_bucket_count); } template template hashtable:: hashtable(InIter f, InIter l, size_type bucket_hint, const H1& h1, const H2& h2, const H& h, const Eq& eq, const Ex& exk, const allocator_type& a) : detail::rehash_base(), detail::hash_code_base(exk, eq, h1, h2, h), detail::map_base(), m_node_allocator(a), m_bucket_count(0), m_element_count(0), m_rehash_policy() { m_bucket_count = std::max(m_rehash_policy.next_bkt(bucket_hint), m_rehash_policy. bkt_for_elements(detail:: distance_fw(f, l))); m_buckets = m_allocate_buckets(m_bucket_count); try { for (; f != l; ++f) this->insert(*f); } catch(...) { clear(); m_deallocate_buckets(m_buckets, m_bucket_count); __throw_exception_again; } } template hashtable:: hashtable(const hashtable& ht) : detail::rehash_base(ht), detail::hash_code_base(ht), detail::map_base(ht), m_node_allocator(ht.get_allocator()), m_bucket_count(ht.m_bucket_count), m_element_count(ht.m_element_count), m_rehash_policy(ht.m_rehash_policy) { m_buckets = m_allocate_buckets(m_bucket_count); try { for (size_type i = 0; i < ht.m_bucket_count; ++i) { node* n = ht.m_buckets[i]; node** tail = m_buckets + i; while (n) { *tail = m_allocate_node(n->m_v); this->copy_code(*tail, n); tail = &((*tail)->m_next); n = n->m_next; } } } catch(...) { clear(); m_deallocate_buckets(m_buckets, m_bucket_count); __throw_exception_again; } } template hashtable& hashtable:: operator=(const hashtable& ht) { hashtable tmp(ht); this->swap(tmp); return *this; } template hashtable:: ~hashtable() { clear(); m_deallocate_buckets(m_buckets, m_bucket_count); } template void hashtable:: swap(hashtable& x) { // The only base class with member variables is hash_code_base. We // define hash_code_base::m_swap because different specializations // have different members. detail::hash_code_base::m_swap(x); // _GLIBCXX_RESOLVE_LIB_DEFECTS // 431. Swapping containers with unequal allocators. std::__alloc_swap::_S_do_it(m_node_allocator, x.m_node_allocator); std::swap(m_rehash_policy, x.m_rehash_policy); std::swap(m_buckets, x.m_buckets); std::swap(m_bucket_count, x.m_bucket_count); std::swap(m_element_count, x.m_element_count); } template void hashtable:: rehash_policy(const RP& pol) { m_rehash_policy = pol; size_type n_bkt = pol.bkt_for_elements(m_element_count); if (n_bkt > m_bucket_count) m_rehash(n_bkt); } template typename hashtable::iterator hashtable:: find(const key_type& k) { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, this->bucket_count()); node* p = m_find_node(m_buckets[n], k, code); return p ? iterator(p, m_buckets + n) : this->end(); } template typename hashtable::const_iterator hashtable:: find(const key_type& k) const { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, this->bucket_count()); node* p = m_find_node(m_buckets[n], k, code); return p ? const_iterator(p, m_buckets + n) : this->end(); } template typename hashtable::size_type hashtable:: count(const key_type& k) const { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, this->bucket_count()); std::size_t result = 0; for (node* p = m_buckets[n]; p; p = p->m_next) if (this->compare(k, code, p)) ++result; return result; } template std::pair::iterator, typename hashtable::iterator> hashtable:: equal_range(const key_type& k) { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, this->bucket_count()); node** head = m_buckets + n; node* p = m_find_node(*head, k, code); if (p) { node* p1 = p->m_next; for (; p1; p1 = p1->m_next) if (!this->compare(k, code, p1)) break; iterator first(p, head); iterator last(p1, head); if (!p1) last.m_incr_bucket(); return std::make_pair(first, last); } else return std::make_pair(this->end(), this->end()); } template std::pair::const_iterator, typename hashtable::const_iterator> hashtable:: equal_range(const key_type& k) const { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, this->bucket_count()); node** head = m_buckets + n; node* p = m_find_node(*head, k, code); if (p) { node* p1 = p->m_next; for (; p1; p1 = p1->m_next) if (!this->compare(k, code, p1)) break; const_iterator first(p, head); const_iterator last(p1, head); if (!p1) last.m_incr_bucket(); return std::make_pair(first, last); } else return std::make_pair(this->end(), this->end()); } // Find the node whose key compares equal to k, beginning the search // at p (usually the head of a bucket). Return nil if no node is found. template typename hashtable::node* hashtable:: m_find_node(node* p, const key_type& k, typename hashtable::hash_code_t code) const { for (; p; p = p->m_next) if (this->compare(k, code, p)) return p; return false; } // Insert v in bucket n (assumes no element with its key already present). template typename hashtable::iterator hashtable:: m_insert_bucket(const value_type& v, size_type n, typename hashtable::hash_code_t code) { std::pair do_rehash = m_rehash_policy.need_rehash(m_bucket_count, m_element_count, 1); // Allocate the new node before doing the rehash so that we don't // do a rehash if the allocation throws. node* new_node = m_allocate_node(v); try { if (do_rehash.first) { const key_type& k = this->m_extract(v); n = this->bucket_index(k, code, do_rehash.second); m_rehash(do_rehash.second); } new_node->m_next = m_buckets[n]; this->store_code(new_node, code); m_buckets[n] = new_node; ++m_element_count; return iterator(new_node, m_buckets + n); } catch(...) { m_deallocate_node(new_node); __throw_exception_again; } } // Insert v if no element with its key is already present. template std::pair::iterator, bool> hashtable:: m_insert(const value_type& v, std::tr1::true_type) { const key_type& k = this->m_extract(v); typename hashtable::hash_code_t code = this->m_hash_code(k); size_type n = this->bucket_index(k, code, m_bucket_count); if (node* p = m_find_node(m_buckets[n], k, code)) return std::make_pair(iterator(p, m_buckets + n), false); return std::make_pair(m_insert_bucket(v, n, code), true); } // Insert v unconditionally. template typename hashtable::iterator hashtable:: m_insert(const value_type& v, std::tr1::false_type) { std::pair do_rehash = m_rehash_policy.need_rehash(m_bucket_count, m_element_count, 1); if (do_rehash.first) m_rehash(do_rehash.second); const key_type& k = this->m_extract(v); typename hashtable::hash_code_t code = this->m_hash_code(k); size_type n = this->bucket_index(k, code, m_bucket_count); // First find the node, avoid leaking new_node if compare throws. node* prev = m_find_node(m_buckets[n], k, code); node* new_node = m_allocate_node(v); if (prev) { new_node->m_next = prev->m_next; prev->m_next = new_node; } else { new_node->m_next = m_buckets[n]; m_buckets[n] = new_node; } this->store_code(new_node, code); ++m_element_count; return iterator(new_node, m_buckets + n); } // For erase(iterator) and erase(const_iterator). template void hashtable:: m_erase_node(node* p, node** b) { node* cur = *b; if (cur == p) *b = cur->m_next; else { node* next = cur->m_next; while (next != p) { cur = next; next = cur->m_next; } cur->m_next = next->m_next; } m_deallocate_node(p); --m_element_count; } template template void hashtable:: insert(InIter first, InIter last) { size_type n_elt = detail::distance_fw(first, last); std::pair do_rehash = m_rehash_policy.need_rehash(m_bucket_count, m_element_count, n_elt); if (do_rehash.first) m_rehash(do_rehash.second); for (; first != last; ++first) this->insert(*first); } template typename hashtable::iterator hashtable:: erase(iterator it) { iterator result = it; ++result; m_erase_node(it.m_cur_node, it.m_cur_bucket); return result; } template typename hashtable::const_iterator hashtable:: erase(const_iterator it) { const_iterator result = it; ++result; m_erase_node(it.m_cur_node, it.m_cur_bucket); return result; } template typename hashtable::size_type hashtable:: erase(const key_type& k) { typename hashtable::hash_code_t code = this->m_hash_code(k); std::size_t n = this->bucket_index(k, code, m_bucket_count); size_type result = 0; node** slot = m_buckets + n; while (*slot && !this->compare(k, code, *slot)) slot = &((*slot)->m_next); while (*slot && this->compare(k, code, *slot)) { node* p = *slot; *slot = p->m_next; m_deallocate_node(p); --m_element_count; ++result; } return result; } // ??? This could be optimized by taking advantage of the bucket // structure, but it's not clear that it's worth doing. It probably // wouldn't even be an optimization unless the load factor is large. template typename hashtable::iterator hashtable:: erase(iterator first, iterator last) { while (first != last) first = this->erase(first); return last; } template typename hashtable::const_iterator hashtable:: erase(const_iterator first, const_iterator last) { while (first != last) first = this->erase(first); return last; } template void hashtable:: clear() { m_deallocate_nodes(m_buckets, m_bucket_count); m_element_count = 0; } template void hashtable:: rehash(size_type n) { m_rehash(std::max(m_rehash_policy.next_bkt(n), m_rehash_policy.bkt_for_elements(m_element_count + 1))); } template void hashtable:: m_rehash(size_type n) { node** new_array = m_allocate_buckets(n); try { for (size_type i = 0; i < m_bucket_count; ++i) while (node* p = m_buckets[i]) { std::size_t new_index = this->bucket_index(p, n); m_buckets[i] = p->m_next; p->m_next = new_array[new_index]; new_array[new_index] = p; } m_deallocate_buckets(m_buckets, m_bucket_count); m_bucket_count = n; m_buckets = new_array; } catch(...) { // A failure here means that a hash function threw an exception. // We can't restore the previous state without calling the hash // function again, so the only sensible recovery is to delete // everything. m_deallocate_nodes(new_array, n); m_deallocate_buckets(new_array, n); m_deallocate_nodes(m_buckets, m_bucket_count); m_element_count = 0; __throw_exception_again; } } _GLIBCXX_END_NAMESPACE } // namespace std::tr1 #endif // _TR1_HASHTABLE