/* * Copyright (c) 1996 * Silicon Graphics Computer Systems, Inc. * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Silicon Graphics makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied warranty. */ #ifndef __PTHREAD_ALLOC_H #define __PTHREAD_ALLOC_H // Pthread-specific node allocator. // This is similar to the default allocator, except that free-list // information is kept separately for each thread, avoiding locking. // This should be reasonably fast even in the presence of threads. // The down side is that storage may not be well-utilized. // It is not an error to allocate memory in thread A and deallocate // it n thread B. But this effectively transfers ownership of the memory, // so that it can only be reallocated by thread B. Thus this can effectively // result in a storage leak if it's done on a regular basis. // It can also result in frequent sharing of // cache lines among processors, with potentially serious performance // consequences. #include #include #include #include #include #ifndef __RESTRICT # define __RESTRICT #endif // Note that this class has nonstatic members. We instantiate it once // per thread. template class __pthread_alloc_template { private: enum {ALIGN = 8}; enum {MAX_BYTES = 128}; // power of 2 enum {NFREELISTS = MAX_BYTES/ALIGN}; union obj { union obj * free_list_link; char client_data[ALIGN]; /* The client sees this. */ }; // Per instance state obj* volatile free_list[NFREELISTS]; __pthread_alloc_template* next; // Free list link static size_t ROUND_UP(size_t bytes) { return (((bytes) + ALIGN-1) & ~(ALIGN - 1)); } static size_t FREELIST_INDEX(size_t bytes) { return (((bytes) + ALIGN-1)/ALIGN - 1); } // Returns an object of size n, and optionally adds to size n free list. void *refill(size_t n); // Allocates a chunk for nobjs of size size. nobjs may be reduced // if it is inconvenient to allocate the requested number. static char *chunk_alloc(size_t size, int &nobjs); // Chunk allocation state. And other shared state. // Protected by chunk_allocator_lock. static pthread_mutex_t chunk_allocator_lock; static char *start_free; static char *end_free; static size_t heap_size; static __pthread_alloc_template* free_allocators; static pthread_key_t key; static bool key_initialized; // Pthread key under which allocator is stored. // Allocator instances that are currently unclaimed by any thread. static void destructor(void *instance); // Function to be called on thread exit to reclaim allocator // instance. static __pthread_alloc_template *new_allocator(); // Return a recycled or new allocator instance. static __pthread_alloc_template *get_allocator_instance(); // ensure that the current thread has an associated // allocator instance. class lock { public: lock () { pthread_mutex_lock(&chunk_allocator_lock); } ~lock () { pthread_mutex_unlock(&chunk_allocator_lock); } }; friend class lock; public: __pthread_alloc_template() : next(0) { memset((void *)free_list, 0, NFREELISTS * sizeof(obj *)); } /* n must be > 0 */ static void * allocate(size_t n) { obj * volatile * my_free_list; obj * __RESTRICT result; __pthread_alloc_template* a; if (n > MAX_BYTES) { return(malloc(n)); } if (!key_initialized || !(a = (__pthread_alloc_template*) pthread_getspecific(key))) { a = get_allocator_instance(); } my_free_list = a -> free_list + FREELIST_INDEX(n); result = *my_free_list; if (result == 0) { void *r = a -> refill(ROUND_UP(n)); return r; } *my_free_list = result -> free_list_link; return (result); }; /* p may not be 0 */ static void deallocate(void *p, size_t n) { obj *q = (obj *)p; obj * volatile * my_free_list; __pthread_alloc_template* a; if (n > MAX_BYTES) { free(p); return; } if (!key_initialized || !(a = (__pthread_alloc_template*) pthread_getspecific(key))) { a = get_allocator_instance(); } my_free_list = a->free_list + FREELIST_INDEX(n); q -> free_list_link = *my_free_list; *my_free_list = q; } static void * reallocate(void *p, size_t old_sz, size_t new_sz); } ; typedef __pthread_alloc_template pthread_alloc; template void __pthread_alloc_template::destructor(void * instance) { __pthread_alloc_template* a = (__pthread_alloc_template*)instance; a -> next = free_allocators; free_allocators = a; } template __pthread_alloc_template* __pthread_alloc_template::new_allocator() { if (0 != free_allocators) { __pthread_alloc_template* result = free_allocators; free_allocators = free_allocators -> next; return result; } else { return new __pthread_alloc_template; } } template __pthread_alloc_template* __pthread_alloc_template::get_allocator_instance() { __pthread_alloc_template* result; if (!key_initialized) { /*REFERENCED*/ lock lock_instance; if (!key_initialized) { if (pthread_key_create(&key, destructor)) { abort(); // failed } key_initialized = true; } } result = new_allocator(); if (pthread_setspecific(key, result)) abort(); return result; } /* We allocate memory in large chunks in order to avoid fragmenting */ /* the malloc heap too much. */ /* We assume that size is properly aligned. */ template char *__pthread_alloc_template ::chunk_alloc(size_t size, int &nobjs) { { char * result; size_t total_bytes; size_t bytes_left; /*REFERENCED*/ lock lock_instance; // Acquire lock for this routine total_bytes = size * nobjs; bytes_left = end_free - start_free; if (bytes_left >= total_bytes) { result = start_free; start_free += total_bytes; return(result); } else if (bytes_left >= size) { nobjs = bytes_left/size; total_bytes = size * nobjs; result = start_free; start_free += total_bytes; return(result); } else { size_t bytes_to_get = 2 * total_bytes + ROUND_UP(heap_size >> 4); // Try to make use of the left-over piece. if (bytes_left > 0) { __pthread_alloc_template* a = (__pthread_alloc_template*)pthread_getspecific(key); obj * volatile * my_free_list = a->free_list + FREELIST_INDEX(bytes_left); ((obj *)start_free) -> free_list_link = *my_free_list; *my_free_list = (obj *)start_free; } # ifdef _SGI_SOURCE // Try to get memory that's aligned on something like a // cache line boundary, so as to avoid parceling out // parts of the same line to different threads and thus // possibly different processors. { const int cache_line_size = 128; // probable upper bound bytes_to_get &= ~(cache_line_size-1); start_free = (char *)memalign(cache_line_size, bytes_to_get); if (0 == start_free) { start_free = (char *)malloc_alloc::allocate(bytes_to_get); } } # else /* !SGI_SOURCE */ start_free = (char *)malloc_alloc::allocate(bytes_to_get); # endif heap_size += bytes_to_get; end_free = start_free + bytes_to_get; } } // lock is released here return(chunk_alloc(size, nobjs)); } /* Returns an object of size n, and optionally adds to size n free list.*/ /* We assume that n is properly aligned. */ /* We hold the allocation lock. */ template void *__pthread_alloc_template ::refill(size_t n) { int nobjs = 128; char * chunk = chunk_alloc(n, nobjs); obj * volatile * my_free_list; obj * result; obj * current_obj, * next_obj; int i; if (1 == nobjs) { return(chunk); } my_free_list = free_list + FREELIST_INDEX(n); /* Build free list in chunk */ result = (obj *)chunk; *my_free_list = next_obj = (obj *)(chunk + n); for (i = 1; ; i++) { current_obj = next_obj; next_obj = (obj *)((char *)next_obj + n); if (nobjs - 1 == i) { current_obj -> free_list_link = 0; break; } else { current_obj -> free_list_link = next_obj; } } return(result); } template void *__pthread_alloc_template ::reallocate(void *p, size_t old_sz, size_t new_sz) { void * result; size_t copy_sz; if (old_sz > MAX_BYTES && new_sz > MAX_BYTES) { return(realloc(p, new_sz)); } if (ROUND_UP(old_sz) == ROUND_UP(new_sz)) return(p); result = allocate(new_sz); copy_sz = new_sz > old_sz? old_sz : new_sz; memcpy(result, p, copy_sz); deallocate(p, old_sz); return(result); } template __pthread_alloc_template * __pthread_alloc_template::free_allocators = 0; template pthread_key_t __pthread_alloc_template::key; template bool __pthread_alloc_template::key_initialized = false; template pthread_mutex_t __pthread_alloc_template::chunk_allocator_lock = PTHREAD_MUTEX_INITIALIZER; template char *__pthread_alloc_template ::start_free = 0; template char *__pthread_alloc_template ::end_free = 0; template size_t __pthread_alloc_template ::heap_size = 0; #endif /* __NODE_ALLOC_H */