auto merge of #6895 : cmr/rust/jemalloc, r=brson

2013-06-06 18:43:37 -07:00 · 2013-06-06 18:43:37 -07:00 · 5d2cadbfea
parent f2e6c01eaf 829b5de998
commit 5d2cadbfea
104 changed files with 44444 additions and 13 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -7,3 +7,4 @@
 src/rt/msvc/* -whitespace
 src/rt/vg/* -whitespace
 src/rt/linenoise/* -whitespace
+src/rt/jemalloc/**/* -whitespace
--- a/3
+++ b/3
@ -683,7 +683,8 @@ do
    for i in                                          \
      isaac linenoise sync test \
      arch/i386 arch/x86_64 arch/arm arch/mips  \
-      libuv libuv/src/ares libuv/src/eio libuv/src/ev
+      libuv libuv/src/ares libuv/src/eio libuv/src/ev \
+      jemalloc
    do
      make_dir rt/$t/stage$s/$i
    done
--- a/mk/platform.mk
+++ b/mk/platform.mk
@ -146,7 +146,7 @@ CFG_LIB_GLOB_x86_64-unknown-linux-gnu=lib$(1)-*.so
 CFG_LIB_DSYM_GLOB_x86_64-unknown-linux-gnu=lib$(1)-*.dylib.dSYM
 CFG_GCCISH_CFLAGS_x86_64-unknown-linux-gnu := -Wall -Werror -g -fPIC -m64
 CFG_GCCISH_CXXFLAGS_x86_64-unknown-linux-gnu := -fno-rtti
-CFG_GCCISH_LINK_FLAGS_x86_64-unknown-linux-gnu := -shared -fPIC -ldl -lpthread -lrt -g -m64
+CFG_GCCISH_LINK_FLAGS_x86_64-unknown-linux-gnu := -shared -fPIC -ldl -pthread  -lrt -g -m64
 CFG_GCCISH_DEF_FLAG_x86_64-unknown-linux-gnu := -Wl,--export-dynamic,--dynamic-list=
 CFG_GCCISH_PRE_LIB_FLAGS_x86_64-unknown-linux-gnu := -Wl,-whole-archive
 CFG_GCCISH_POST_LIB_FLAGS_x86_64-unknown-linux-gnu := -Wl,-no-whole-archive
@ -172,7 +172,7 @@ CFG_LIB_GLOB_i686-unknown-linux-gnu=lib$(1)-*.so
 CFG_LIB_DSYM_GLOB_i686-unknown-linux-gnu=lib$(1)-*.dylib.dSYM
 CFG_GCCISH_CFLAGS_i686-unknown-linux-gnu := -Wall -Werror -g -fPIC -m32
 CFG_GCCISH_CXXFLAGS_i686-unknown-linux-gnu := -fno-rtti
-CFG_GCCISH_LINK_FLAGS_i686-unknown-linux-gnu := -shared -fPIC -ldl -lpthread -lrt -g -m32
+CFG_GCCISH_LINK_FLAGS_i686-unknown-linux-gnu := -shared -fPIC -ldl -pthread  -lrt -g -m32
 CFG_GCCISH_DEF_FLAG_i686-unknown-linux-gnu := -Wl,--export-dynamic,--dynamic-list=
 CFG_GCCISH_PRE_LIB_FLAGS_i686-unknown-linux-gnu := -Wl,-whole-archive
 CFG_GCCISH_POST_LIB_FLAGS_i686-unknown-linux-gnu := -Wl,-no-whole-archive
@ -198,7 +198,7 @@ CFG_LIB_GLOB_x86_64-apple-darwin=lib$(1)-*.dylib
 CFG_LIB_DSYM_GLOB_x86_64-apple-darwin=lib$(1)-*.dylib.dSYM
 CFG_GCCISH_CFLAGS_x86_64-apple-darwin := -Wall -Werror -g -fPIC -m64 -arch x86_64
 CFG_GCCISH_CXXFLAGS_x86_64-apple-darwin := -fno-rtti
-CFG_GCCISH_LINK_FLAGS_x86_64-apple-darwin := -dynamiclib -lpthread -framework CoreServices -Wl,-no_compact_unwind -m64
+CFG_GCCISH_LINK_FLAGS_x86_64-apple-darwin := -dynamiclib -pthread  -framework CoreServices -Wl,-no_compact_unwind -m64
 CFG_GCCISH_DEF_FLAG_x86_64-apple-darwin := -Wl,-exported_symbols_list,
 CFG_GCCISH_PRE_LIB_FLAGS_x86_64-apple-darwin :=
 CFG_GCCISH_POST_LIB_FLAGS_x86_64-apple-darwin :=
@ -223,7 +223,7 @@ CFG_LIB_GLOB_i686-apple-darwin=lib$(1)-*.dylib
 CFG_LIB_DSYM_GLOB_i686-apple-darwin=lib$(1)-*.dylib.dSYM
 CFG_GCCISH_CFLAGS_i686-apple-darwin := -Wall -Werror -g -fPIC -m32 -arch i386
 CFG_GCCISH_CXXFLAGS_i686-apple-darwin := -fno-rtti
-CFG_GCCISH_LINK_FLAGS_i686-apple-darwin := -dynamiclib -lpthread -framework CoreServices -Wl,-no_compact_unwind -m32
+CFG_GCCISH_LINK_FLAGS_i686-apple-darwin := -dynamiclib -pthread  -framework CoreServices -Wl,-no_compact_unwind -m32
 CFG_GCCISH_DEF_FLAG_i686-apple-darwin := -Wl,-exported_symbols_list,
 CFG_GCCISH_PRE_LIB_FLAGS_i686-apple-darwin :=
 CFG_GCCISH_POST_LIB_FLAGS_i686-apple-darwin :=
@ -376,13 +376,13 @@ CFG_LIB_NAME_x86_64-unknown-freebsd=lib$(1).so
 CFG_LIB_GLOB_x86_64-unknown-freebsd=lib$(1)-*.so
 CFG_LIB_DSYM_GLOB_x86_64-unknown-freebsd=$(1)-*.dylib.dSYM
 CFG_GCCISH_CFLAGS_x86_64-unknown-freebsd := -Wall -Werror -g -fPIC -I/usr/local/include
-CFG_GCCISH_LINK_FLAGS_x86_64-unknown-freebsd := -shared -fPIC -g -lpthread -lrt
+CFG_GCCISH_LINK_FLAGS_x86_64-unknown-freebsd := -shared -fPIC -g -pthread  -lrt
 CFG_GCCISH_DEF_FLAG_x86_64-unknown-freebsd := -Wl,--export-dynamic,--dynamic-list=
 CFG_GCCISH_PRE_LIB_FLAGS_x86_64-unknown-freebsd := -Wl,-whole-archive
 CFG_GCCISH_POST_LIB_FLAGS_x86_64-unknown-freebsd := -Wl,-no-whole-archive
 CFG_DEF_SUFFIX_x86_64-unknown-freebsd := .bsd.def
 CFG_INSTALL_NAME_x86_64-unknown-freebsd =
-CFG_LIBUV_LINK_FLAGS_x86_64-unknown-freebsd := -lpthread -lkvm
+CFG_LIBUV_LINK_FLAGS_x86_64-unknown-freebsd := -pthread  -lkvm
 CFG_EXE_SUFFIX_x86_64-unknown-freebsd :=
 CFG_WINDOWSY_x86_64-unknown-freebsd :=
 CFG_UNIXY_x86_64-unknown-freebsd := 1
--- a/mk/rt.mk
+++ b/mk/rt.mk
@ -106,18 +106,23 @@ RUNTIME_S_$(1)_$(2) := rt/arch/$$(HOST_$(1))/_context.S \
 ifeq ($$(CFG_WINDOWSY_$(1)), 1)
  LIBUV_OSTYPE_$(1)_$(2) := win
  LIBUV_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/libuv/libuv.a
+  JEMALLOC_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/jemalloc/lib/jemalloc.lib
 else ifeq ($(OSTYPE_$(1)), apple-darwin)
  LIBUV_OSTYPE_$(1)_$(2) := mac
  LIBUV_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/libuv/libuv.a
+  JEMALLOC_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/jemalloc/lib/libjemalloc_pic.a
 else ifeq ($(OSTYPE_$(1)), unknown-freebsd)
  LIBUV_OSTYPE_$(1)_$(2) := unix/freebsd
  LIBUV_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/libuv/libuv.a
+  JEMALLOC_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/jemalloc/lib/libjemalloc_pic.a
 else ifeq ($(OSTYPE_$(1)), linux-androideabi)
  LIBUV_OSTYPE_$(1)_$(2) := unix/android
  LIBUV_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/libuv/libuv.a
+  JEMALLOC_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/jemalloc/lib/libjemalloc_pic.a
 else
  LIBUV_OSTYPE_$(1)_$(2) := unix/linux
  LIBUV_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/libuv/libuv.a
+  JEMALLOC_LIB_$(1)_$(2) := rt/$(1)/stage$(2)/jemalloc/lib/libjemalloc_pic.a
 endif

 RUNTIME_DEF_$(1)_$(2) := rt/rustrt$(CFG_DEF_SUFFIX_$(1))
@ -133,8 +138,6 @@ ALL_OBJ_FILES += $$(RUNTIME_OBJS_$(1)_$(2))
 MORESTACK_OBJ_$(1)_$(2) := rt/$(1)/stage$(2)/arch/$$(HOST_$(1))/morestack.o
 ALL_OBJ_FILES += $$(MORESTACK_OBJS_$(1)_$(2))

-RUNTIME_LIBS_$(1)_$(2) := $$(LIBUV_LIB_$(1)_$(2))
-
 rt/$(1)/stage$(2)/%.o: rt/%.cpp $$(MKFILE_DEPS)
 	@$$(call E, compile: $$@)
 	$$(Q)$$(call CFG_COMPILE_CXX_$(1), $$@, $$(RUNTIME_INCS_$(1)_$(2)) \
@ -155,11 +158,10 @@ rt/$(1)/stage$(2)/arch/$$(HOST_$(1))/libmorestack.a: $$(MORESTACK_OBJ_$(1)_$(2))
 	$$(Q)$(AR_$(1)) rcs $$@ $$<

 rt/$(1)/stage$(2)/$(CFG_RUNTIME_$(1)): $$(RUNTIME_OBJS_$(1)_$(2)) $$(MKFILE_DEPS) \
-                        $$(RUNTIME_DEF_$(1)_$(2)) \
-                        $$(RUNTIME_LIBS_$(1)_$(2))
+                        $$(RUNTIME_DEF_$(1)_$(2)) $$(LIBUV_LIB_$(1)_$(2)) $$(JEMALLOC_LIB_$(1)_$(2))
 	@$$(call E, link: $$@)
 	$$(Q)$$(call CFG_LINK_CXX_$(1),$$@, $$(RUNTIME_OBJS_$(1)_$(2)) \
-	  $$(CFG_GCCISH_POST_LIB_FLAGS_$(1)) $$(RUNTIME_LIBS_$(1)_$(2)) \
+	  $$(JEMALLOC_LIB_$(1)_$(2)) $$(CFG_GCCISH_POST_LIB_FLAGS_$(1)) $$(LIBUV_LIB_$(1)_$(2)) \
 	  $$(CFG_LIBUV_LINK_FLAGS_$(1)),$$(RUNTIME_DEF_$(1)_$(2)),$$(CFG_RUNTIME_$(1)))

 # FIXME: For some reason libuv's makefiles can't figure out the
@ -208,6 +210,9 @@ $$(LIBUV_LIB_$(1)_$(2)): $$(LIBUV_DEPS)
 		V=$$(VERBOSE)
 endif

+$$(JEMALLOC_LIB_$(1)_$(2)):
+	cd $$(CFG_BUILD_DIR)/rt/$(1)/stage$(2)/jemalloc; $(S)src/rt/jemalloc/configure --disable-experimental
+	$$(Q)$$(MAKE) -C $$(CFG_BUILD_DIR)/rt/$(1)/stage$(2)/jemalloc

 # These could go in rt.mk or rustllvm.mk, they're needed for both.

--- a/src/compiletest/procsrv.rs
+++ b/src/compiletest/procsrv.rs
@ -17,6 +17,7 @@ use core::os;
 use core::run;
 use core::str;
 use core::task;
+use core::vec;

 #[cfg(target_os = "win32")]
 fn target_env(lib_path: &str, prog: &str) -> ~[(~str,~str)] {
@ -28,7 +29,7 @@ fn target_env(lib_path: &str, prog: &str) -> ~[(~str,~str)] {
    let aux_path = prog.slice(0u, prog.len() - 4u).to_owned() + ".libaux";

    env = do vec::map(env) |pair| {
-        let (k,v) = *pair;
+        let (k,v) = copy *pair;
        if k == ~"PATH" { (~"PATH", v + ";" + lib_path + ";" + aux_path) }
        else { (k,v) }
    };
--- a/src/rt/jemalloc/COPYING
+++ b/src/rt/jemalloc/COPYING
@ -0,0 +1,27 @@
+Unless otherwise specified, files in the jemalloc source distribution are
+subject to the following license:
+--------------------------------------------------------------------------------
+Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice(s),
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice(s),
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
--- a/src/rt/jemalloc/ChangeLog
+++ b/src/rt/jemalloc/ChangeLog
@ -0,0 +1,425 @@
+Following are change highlights associated with official releases.  Important
+bug fixes are all mentioned, but internal enhancements are omitted here for
+brevity (even though they are more fun to write about).  Much more detail can be
+found in the git revision history:
+
+    http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
+    git://canonware.com/jemalloc.git
+
+* 3.3.1 (March 6, 2013)
+
+  This version fixes bugs that are typically encountered only when utilizing
+  custom run-time options.
+
+  Bug fixes:
+  - Fix a locking order bug that could cause deadlock during fork if heap
+    profiling were enabled.
+  - Fix a chunk recycling bug that could cause the allocator to lose track of
+    whether a chunk was zeroed.   On FreeBSD, NetBSD, and OS X, it could cause
+    corruption if allocating via sbrk(2) (unlikely unless running with the
+    "dss:primary" option specified).  This was completely harmless on Linux
+    unless using mlockall(2) (and unlikely even then, unless the
+    --disable-munmap configure option or the "dss:primary" option was
+    specified).  This regression was introduced in 3.1.0 by the
+    mlockall(2)/madvise(2) interaction fix.
+  - Fix TLS-related memory corruption that could occur during thread exit if the
+    thread never allocated memory.  Only the quarantine and prof facilities were
+    susceptible.
+  - Fix two quarantine bugs:
+    + Internal reallocation of the quarantined object array leaked the old
+      array.
+    + Reallocation failure for internal reallocation of the quarantined object
+      array (very unlikely) resulted in memory corruption.
+  - Fix Valgrind integration to annotate all internally allocated memory in a
+    way that keeps Valgrind happy about internal data structure access.
+  - Fix building for s390 systems.
+
+* 3.3.0 (January 23, 2013)
+
+  This version includes a few minor performance improvements in addition to the
+  listed new features and bug fixes.
+
+  New features:
+  - Add clipping support to lg_chunk option processing.
+  - Add the --enable-ivsalloc option.
+  - Add the --without-export option.
+  - Add the --disable-zone-allocator option.
+
+  Bug fixes:
+  - Fix "arenas.extend" mallctl to output the number of arenas.
+  - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory
+    is undefined.
+  - Fix build break on FreeBSD related to alloca.h.
+
+* 3.2.0 (November 9, 2012)
+
+  In addition to a couple of bug fixes, this version modifies page run
+  allocation and dirty page purging algorithms in order to better control
+  page-level virtual memory fragmentation.
+
+  Incompatible changes:
+  - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1).
+
+  Bug fixes:
+  - Fix dss/mmap allocation precedence code to use recyclable mmap memory only
+    after primary dss allocation fails.
+  - Fix deadlock in the "arenas.purge" mallctl.  This regression was introduced
+    in 3.1.0 by the addition of the "arena.<i>.purge" mallctl.
+
+* 3.1.0 (October 16, 2012)
+
+  New features:
+  - Auto-detect whether running inside Valgrind, thus removing the need to
+    manually specify MALLOC_CONF=valgrind:true.
+  - Add the "arenas.extend" mallctl, which allows applications to create
+    manually managed arenas.
+  - Add the ALLOCM_ARENA() flag for {,r,d}allocm().
+  - Add the "opt.dss", "arena.<i>.dss", and "stats.arenas.<i>.dss" mallctls,
+    which provide control over dss/mmap precedence.
+  - Add the "arena.<i>.purge" mallctl, which obsoletes "arenas.purge".
+  - Define LG_QUANTUM for hppa.
+
+  Incompatible changes:
+  - Disable tcache by default if running inside Valgrind, in order to avoid
+    making unallocated objects appear reachable to Valgrind.
+  - Drop const from malloc_usable_size() argument on Linux.
+
+  Bug fixes:
+  - Fix heap profiling crash if sampled object is freed via realloc(p, 0).
+  - Remove const from __*_hook variable declarations, so that glibc can modify
+    them during process forking.
+  - Fix mlockall(2)/madvise(2) interaction.
+  - Fix fork(2)-related deadlocks.
+  - Fix error return value for "thread.tcache.enabled" mallctl.
+
+* 3.0.0 (May 11, 2012)
+
+  Although this version adds some major new features, the primary focus is on
+  internal code cleanup that facilitates maintainability and portability, most
+  of which is not reflected in the ChangeLog.  This is the first release to
+  incorporate substantial contributions from numerous other developers, and the
+  result is a more broadly useful allocator (see the git revision history for
+  contribution details).  Note that the license has been unified, thanks to
+  Facebook granting a license under the same terms as the other copyright
+  holders (see COPYING).
+
+  New features:
+  - Implement Valgrind support, redzones, and quarantine.
+  - Add support for additional platforms:
+    + FreeBSD
+    + Mac OS X Lion
+    + MinGW
+    + Windows (no support yet for replacing the system malloc)
+  - Add support for additional architectures:
+    + MIPS
+    + SH4
+    + Tilera
+  - Add support for cross compiling.
+  - Add nallocm(), which rounds a request size up to the nearest size class
+    without actually allocating.
+  - Implement aligned_alloc() (blame C11).
+  - Add the "thread.tcache.enabled" mallctl.
+  - Add the "opt.prof_final" mallctl.
+  - Update pprof (from gperftools 2.0).
+  - Add the --with-mangling option.
+  - Add the --disable-experimental option.
+  - Add the --disable-munmap option, and make it the default on Linux.
+  - Add the --enable-mremap option, which disables use of mremap(2) by default.
+
+  Incompatible changes:
+  - Enable stats by default.
+  - Enable fill by default.
+  - Disable lazy locking by default.
+  - Rename the "tcache.flush" mallctl to "thread.tcache.flush".
+  - Rename the "arenas.pagesize" mallctl to "arenas.page".
+  - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB).
+  - Change the "opt.prof_accum" default from true to false.
+
+  Removed features:
+  - Remove the swap feature, including the "config.swap", "swap.avail",
+    "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls.
+  - Remove highruns statistics, including the
+    "stats.arenas.<i>.bins.<j>.highruns" and
+    "stats.arenas.<i>.lruns.<j>.highruns" mallctls.
+  - As part of small size class refactoring, remove the "opt.lg_[qc]space_max",
+    "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and
+    "arenas.[tqcs]bins" mallctls.
+  - Remove the "arenas.chunksize" mallctl.
+  - Remove the "opt.lg_prof_tcmax" option.
+  - Remove the "opt.lg_prof_bt_max" option.
+  - Remove the "opt.lg_tcache_gc_sweep" option.
+  - Remove the --disable-tiny option, including the "config.tiny" mallctl.
+  - Remove the --enable-dynamic-page-shift configure option.
+  - Remove the --enable-sysv configure option.
+
+  Bug fixes:
+  - Fix a statistics-related bug in the "thread.arena" mallctl that could cause
+    invalid statistics and crashes.
+  - Work around TLS deallocation via free() on Linux.  This bug could cause
+    write-after-free memory corruption.
+  - Fix a potential deadlock that could occur during interval- and
+    growth-triggered heap profile dumps.
+  - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags.
+  - Fix chunk_alloc_dss() to stop claiming memory is zeroed.  This bug could
+    cause memory corruption and crashes with --enable-dss specified.
+  - Fix fork-related bugs that could cause deadlock in children between fork
+    and exec.
+  - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter.
+  - Fix realloc(p, 0) to act like free(p).
+  - Do not enforce minimum alignment in memalign().
+  - Check for NULL pointer in malloc_usable_size().
+  - Fix an off-by-one heap profile statistics bug that could be observed in
+    interval- and growth-triggered heap profiles.
+  - Fix the "epoch" mallctl to update cached stats even if the passed in epoch
+    is 0.
+  - Fix bin->runcur management to fix a layout policy bug.  This bug did not
+    affect correctness.
+  - Fix a bug in choose_arena_hard() that potentially caused more arenas to be
+    initialized than necessary.
+  - Add missing "opt.lg_tcache_max" mallctl implementation.
+  - Use glibc allocator hooks to make mixed allocator usage less likely.
+  - Fix build issues for --disable-tcache.
+  - Don't mangle pthread_create() when --with-private-namespace is specified.
+
+* 2.2.5 (November 14, 2011)
+
+  Bug fixes:
+  - Fix huge_ralloc() race when using mremap(2).  This is a serious bug that
+    could cause memory corruption and/or crashes.
+  - Fix huge_ralloc() to maintain chunk statistics.
+  - Fix malloc_stats_print(..., "a") output.
+
+* 2.2.4 (November 5, 2011)
+
+  Bug fixes:
+  - Initialize arenas_tsd before using it.  This bug existed for 2.2.[0-3], as
+    well as for --disable-tls builds in earlier releases.
+  - Do not assume a 4 KiB page size in test/rallocm.c.
+
+* 2.2.3 (August 31, 2011)
+
+  This version fixes numerous bugs related to heap profiling.
+
+  Bug fixes:
+  - Fix a prof-related race condition.  This bug could cause memory corruption,
+    but only occurred in non-default configurations (prof_accum:false).
+  - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is
+    excluded from backtraces).
+  - Fix a prof-related bug in realloc() (only triggered by OOM errors).
+  - Fix prof-related bugs in allocm() and rallocm().
+  - Fix prof_tdata_cleanup() for --disable-tls builds.
+  - Fix a relative include path, to fix objdir builds.
+
+* 2.2.2 (July 30, 2011)
+
+  Bug fixes:
+  - Fix a build error for --disable-tcache.
+  - Fix assertions in arena_purge() (for real this time).
+  - Add the --with-private-namespace option.  This is a workaround for symbol
+    conflicts that can inadvertently arise when using static libraries.
+
+* 2.2.1 (March 30, 2011)
+
+  Bug fixes:
+  - Implement atomic operations for x86/x64.  This fixes compilation failures
+    for versions of gcc that are still in wide use.
+  - Fix an assertion in arena_purge().
+
+* 2.2.0 (March 22, 2011)
+
+  This version incorporates several improvements to algorithms and data
+  structures that tend to reduce fragmentation and increase speed.
+
+  New features:
+  - Add the "stats.cactive" mallctl.
+  - Update pprof (from google-perftools 1.7).
+  - Improve backtracing-related configuration logic, and add the
+    --disable-prof-libgcc option.
+
+  Bug fixes:
+  - Change default symbol visibility from "internal", to "hidden", which
+    decreases the overhead of library-internal function calls.
+  - Fix symbol visibility so that it is also set on OS X.
+  - Fix a build dependency regression caused by the introduction of the .pic.o
+    suffix for PIC object files.
+  - Add missing checks for mutex initialization failures.
+  - Don't use libgcc-based backtracing except on x64, where it is known to work.
+  - Fix deadlocks on OS X that were due to memory allocation in
+    pthread_mutex_lock().
+  - Heap profiling-specific fixes:
+    + Fix memory corruption due to integer overflow in small region index
+      computation, when using a small enough sample interval that profiling
+      context pointers are stored in small run headers.
+    + Fix a bootstrap ordering bug that only occurred with TLS disabled.
+    + Fix a rallocm() rsize bug.
+    + Fix error detection bugs for aligned memory allocation.
+
+* 2.1.3 (March 14, 2011)
+
+  Bug fixes:
+  - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix
+    for OS X in 2.1.2).
+  - Fix a "thread.arena" mallctl bug.
+  - Fix a thread cache stats merging bug.
+
+* 2.1.2 (March 2, 2011)
+
+  Bug fixes:
+  - Fix "thread.{de,}allocatedp" mallctl for OS X.
+  - Add missing jemalloc.a to build system.
+
+* 2.1.1 (January 31, 2011)
+
+  Bug fixes:
+  - Fix aligned huge reallocation (affected allocm()).
+  - Fix the ALLOCM_LG_ALIGN macro definition.
+  - Fix a heap dumping deadlock.
+  - Fix a "thread.arena" mallctl bug.
+
+* 2.1.0 (December 3, 2010)
+
+  This version incorporates some optimizations that can't quite be considered
+  bug fixes.
+
+  New features:
+  - Use Linux's mremap(2) for huge object reallocation when possible.
+  - Avoid locking in mallctl*() when possible.
+  - Add the "thread.[de]allocatedp" mallctl's.
+  - Convert the manual page source from roff to DocBook, and generate both roff
+    and HTML manuals.
+
+  Bug fixes:
+  - Fix a crash due to incorrect bootstrap ordering.  This only impacted
+    --enable-debug --enable-dss configurations.
+  - Fix a minor statistics bug for mallctl("swap.avail", ...).
+
+* 2.0.1 (October 29, 2010)
+
+  Bug fixes:
+  - Fix a race condition in heap profiling that could cause undefined behavior
+    if "opt.prof_accum" were disabled.
+  - Add missing mutex unlocks for some OOM error paths in the heap profiling
+    code.
+  - Fix a compilation error for non-C99 builds.
+
+* 2.0.0 (October 24, 2010)
+
+  This version focuses on the experimental *allocm() API, and on improved
+  run-time configuration/introspection.  Nonetheless, numerous performance
+  improvements are also included.
+
+  New features:
+  - Implement the experimental {,r,s,d}allocm() API, which provides a superset
+    of the functionality available via malloc(), calloc(), posix_memalign(),
+    realloc(), malloc_usable_size(), and free().  These functions can be used to
+    allocate/reallocate aligned zeroed memory, ask for optional extra memory
+    during reallocation, prevent object movement during reallocation, etc.
+  - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
+    more human-readable, and more flexible.  For example:
+      JEMALLOC_OPTIONS=AJP
+    is now:
+      MALLOC_CONF=abort:true,fill:true,stats_print:true
+  - Port to Apple OS X.  Sponsored by Mozilla.
+  - Make it possible for the application to control thread-->arena mappings via
+    the "thread.arena" mallctl.
+  - Add compile-time support for all TLS-related functionality via pthreads TSD.
+    This is mainly of interest for OS X, which does not support TLS, but has a
+    TSD implementation with similar performance.
+  - Override memalign() and valloc() if they are provided by the system.
+  - Add the "arenas.purge" mallctl, which can be used to synchronously purge all
+    dirty unused pages.
+  - Make cumulative heap profiling data optional, so that it is possible to
+    limit the amount of memory consumed by heap profiling data structures.
+  - Add per thread allocation counters that can be accessed via the
+    "thread.allocated" and "thread.deallocated" mallctls.
+
+  Incompatible changes:
+  - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
+  - Increase default backtrace depth from 4 to 128 for heap profiling.
+  - Disable interval-based profile dumps by default.
+
+  Bug fixes:
+  - Remove bad assertions in fork handler functions.  These assertions could
+    cause aborts for some combinations of configure settings.
+  - Fix strerror_r() usage to deal with non-standard semantics in GNU libc.
+  - Fix leak context reporting.  This bug tended to cause the number of contexts
+    to be underreported (though the reported number of objects and bytes were
+    correct).
+  - Fix a realloc() bug for large in-place growing reallocation.  This bug could
+    cause memory corruption, but it was hard to trigger.
+  - Fix an allocation bug for small allocations that could be triggered if
+    multiple threads raced to create a new run of backing pages.
+  - Enhance the heap profiler to trigger samples based on usable size, rather
+    than request size.
+  - Fix a heap profiling bug due to sometimes losing track of requested object
+    size for sampled objects.
+
+* 1.0.3 (August 12, 2010)
+
+  Bug fixes:
+  - Fix the libunwind-based implementation of stack backtracing (used for heap
+    profiling).  This bug could cause zero-length backtraces to be reported.
+  - Add a missing mutex unlock in library initialization code.  If multiple
+    threads raced to initialize malloc, some of them could end up permanently
+    blocked.
+
+* 1.0.2 (May 11, 2010)
+
+  Bug fixes:
+  - Fix junk filling of large objects, which could cause memory corruption.
+  - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual
+    memory limits could cause swap file configuration to fail.  Contributed by
+    Jordan DeLong.
+
+* 1.0.1 (April 14, 2010)
+
+  Bug fixes:
+  - Fix compilation when --enable-fill is specified.
+  - Fix threads-related profiling bugs that affected accuracy and caused memory
+    to be leaked during thread exit.
+  - Fix dirty page purging race conditions that could cause crashes.
+  - Fix crash in tcache flushing code during thread destruction.
+
+* 1.0.0 (April 11, 2010)
+
+  This release focuses on speed and run-time introspection.  Numerous
+  algorithmic improvements make this release substantially faster than its
+  predecessors.
+
+  New features:
+  - Implement autoconf-based configuration system.
+  - Add mallctl*(), for the purposes of introspection and run-time
+    configuration.
+  - Make it possible for the application to manually flush a thread's cache, via
+    the "tcache.flush" mallctl.
+  - Base maximum dirty page count on proportion of active memory.
+  - Compute various addtional run-time statistics, including per size class
+    statistics for large objects.
+  - Expose malloc_stats_print(), which can be called repeatedly by the
+    application.
+  - Simplify the malloc_message() signature to only take one string argument,
+    and incorporate an opaque data pointer argument for use by the application
+    in combination with malloc_stats_print().
+  - Add support for allocation backed by one or more swap files, and allow the
+    application to disable over-commit if swap files are in use.
+  - Implement allocation profiling and leak checking.
+
+  Removed features:
+  - Remove the dynamic arena rebalancing code, since thread-specific caching
+    reduces its utility.
+
+  Bug fixes:
+  - Modify chunk allocation to work when address space layout randomization
+    (ASLR) is in use.
+  - Fix thread cleanup bugs related to TLS destruction.
+  - Handle 0-size allocation requests in posix_memalign().
+  - Fix a chunk leak.  The leaked chunks were never touched, so this impacted
+    virtual memory usage, but not physical memory usage.
+
+* linux_2008082[78]a (August 27/28, 2008)
+
+  These snapshot releases are the simple result of incorporating Linux-specific
+  support into the FreeBSD malloc sources.
+
+--------------------------------------------------------------------------------
+vim:filetype=text:textwidth=80
--- a/src/rt/jemalloc/INSTALL
+++ b/src/rt/jemalloc/INSTALL
@ -0,0 +1,293 @@
+Building and installing jemalloc can be as simple as typing the following while
+in the root directory of the source tree:
+
+    ./configure
+    make
+    make install
+
+=== Advanced configuration =====================================================
+
+The 'configure' script supports numerous options that allow control of which
+functionality is enabled, where jemalloc is installed, etc.  Optionally, pass
+any of the following arguments (not a definitive list) to 'configure':
+
+--help
+    Print a definitive list of options.
+
+--prefix=<install-root-dir>
+    Set the base directory in which to install.  For example:
+
+        ./configure --prefix=/usr/local
+
+    will cause files to be installed into /usr/local/include, /usr/local/lib,
+    and /usr/local/man.
+
+--with-rpath=<colon-separated-rpath>
+    Embed one or more library paths, so that libjemalloc can find the libraries
+    it is linked to.  This works only on ELF-based systems.
+
+--with-mangling=<map>
+    Mangle public symbols specified in <map> which is a comma-separated list of
+    name:mangled pairs.
+
+    For example, to use ld's --wrap option as an alternative method for
+    overriding libc's malloc implementation, specify something like:
+
+      --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...]
+
+    Note that mangling happens prior to application of the prefix specified by
+    --with-jemalloc-prefix, and mangled symbols are then ignored when applying
+    the prefix.
+
+--with-jemalloc-prefix=<prefix>
+    Prefix all public APIs with <prefix>.  For example, if <prefix> is
+    "prefix_", API changes like the following occur:
+
+      malloc()         --> prefix_malloc()
+      malloc_conf      --> prefix_malloc_conf
+      /etc/malloc.conf --> /etc/prefix_malloc.conf
+      MALLOC_CONF      --> PREFIX_MALLOC_CONF
+
+    This makes it possible to use jemalloc at the same time as the system
+    allocator, or even to use multiple copies of jemalloc simultaneously.
+
+    By default, the prefix is "", except on OS X, where it is "je_".  On OS X,
+    jemalloc overlays the default malloc zone, but makes no attempt to actually
+    replace the "malloc", "calloc", etc. symbols.
+
+--without-export
+    Don't export public APIs. This can be useful when building jemalloc as a
+    static library, or to avoid exporting public APIs when using the zone
+    allocator on OSX.
+
+--with-private-namespace=<prefix>
+    Prefix all library-private APIs with <prefix>.  For shared libraries,
+    symbol visibility mechanisms prevent these symbols from being exported, but
+    for static libraries, naming collisions are a real possibility.  By
+    default, the prefix is "" (empty string).
+
+--with-install-suffix=<suffix>
+    Append <suffix> to the base name of all installed files, such that multiple
+    versions of jemalloc can coexist in the same installation directory.  For
+    example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
+
+--enable-cc-silence
+    Enable code that silences non-useful compiler warnings.  This is helpful
+    when trying to tell serious warnings from those due to compiler
+    limitations, but it potentially incurs a performance penalty.
+
+--enable-debug
+    Enable assertions and validation code.  This incurs a substantial
+    performance hit, but is very useful during application development.
+    Implies --enable-ivsalloc.
+
+--enable-ivsalloc
+    Enable validation code, which verifies that pointers reside within
+    jemalloc-owned chunks before dereferencing them. This incurs a substantial
+    performance hit.
+
+--disable-stats
+    Disable statistics gathering functionality.  See the "opt.stats_print"
+    option documentation for usage details.
+
+--enable-prof
+    Enable heap profiling and leak detection functionality.  See the "opt.prof"
+    option documentation for usage details.  When enabled, there are several
+    approaches to backtracing, and the configure script chooses the first one
+    in the following list that appears to function correctly:
+
+    + libunwind      (requires --enable-prof-libunwind)
+    + libgcc         (unless --disable-prof-libgcc)
+    + gcc intrinsics (unless --disable-prof-gcc)
+
+--enable-prof-libunwind
+    Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
+    backtracing.
+
+--disable-prof-libgcc
+    Disable the use of libgcc's backtracing functionality.
+
+--disable-prof-gcc
+    Disable the use of gcc intrinsics for backtracing.
+
+--with-static-libunwind=<libunwind.a>
+    Statically link against the specified libunwind.a rather than dynamically
+    linking with -lunwind.
+
+--disable-tcache
+    Disable thread-specific caches for small objects.  Objects are cached and
+    released in bulk, thus reducing the total number of mutex operations.  See
+    the "opt.tcache" option for usage details.
+
+--enable-mremap
+    Enable huge realloc() via mremap(2).  mremap() is disabled by default
+    because the flavor used is specific to Linux, which has a quirk in its
+    virtual memory allocation algorithm that causes semi-permanent VM map holes
+    under normal jemalloc operation.
+
+--disable-munmap
+    Disable virtual memory deallocation via munmap(2); instead keep track of
+    the virtual memory for later use.  munmap() is disabled by default (i.e.
+    --disable-munmap is implied) on Linux, which has a quirk in its virtual
+    memory allocation algorithm that causes semi-permanent VM map holes under
+    normal jemalloc operation.
+
+--enable-dss
+    Enable support for page allocation/deallocation via sbrk(2), in addition to
+    mmap(2).
+
+--disable-fill
+    Disable support for junk/zero filling of memory, quarantine, and redzones.
+    See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
+    documentation for usage details.
+
+--disable-valgrind
+    Disable support for Valgrind.
+
+--disable-experimental
+    Disable support for the experimental API (*allocm()).
+
+--disable-zone-allocator
+    Disable zone allocator for Darwin. This means jemalloc won't be hooked as
+    the default allocator on OSX/iOS.
+
+--enable-utrace
+    Enable utrace(2)-based allocation tracing.  This feature is not broadly
+    portable (FreeBSD has it, but Linux and OS X do not).
+
+--enable-xmalloc
+    Enable support for optional immediate termination due to out-of-memory
+    errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
+    See the "opt.xmalloc" option documentation for usage details.
+
+--enable-lazy-lock
+    Enable code that wraps pthread_create() to detect when an application
+    switches from single-threaded to multi-threaded mode, so that it can avoid
+    mutex locking/unlocking operations while in single-threaded mode.  In
+    practice, this feature usually has little impact on performance unless
+    thread-specific caching is disabled.
+
+--disable-tls
+    Disable thread-local storage (TLS), which allows for fast access to
+    thread-local variables via the __thread keyword.  If TLS is available,
+    jemalloc uses it for several purposes.
+
+--with-xslroot=<path>
+    Specify where to find DocBook XSL stylesheets when building the
+    documentation.
+
+The following environment variables (not a definitive list) impact configure's
+behavior:
+
+CFLAGS="?"
+    Pass these flags to the compiler.  You probably shouldn't define this unless
+    you know what you are doing.  (Use EXTRA_CFLAGS instead.)
+
+EXTRA_CFLAGS="?"
+    Append these flags to CFLAGS.  This makes it possible to add flags such as
+    -Werror, while allowing the configure script to determine what other flags
+    are appropriate for the specified configuration.
+
+    The configure script specifically checks whether an optimization flag (-O*)
+    is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
+    level if it finds that one has already been specified.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
+    'cpp' when 'configure' is looking for include files, so you must use
+    CPPFLAGS instead if you need to help 'configure' find header files.
+
+LD_LIBRARY_PATH="?"
+    'ld' uses this colon-separated list to find libraries.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    'configure' uses this to find programs.
+
+=== Advanced compilation =======================================================
+
+To build only parts of jemalloc, use the following targets:
+
+    build_lib_shared
+    build_lib_static
+    build_lib
+    build_doc_html
+    build_doc_man
+    build_doc
+
+To install only parts of jemalloc, use the following targets:
+
+    install_bin
+    install_include
+    install_lib_shared
+    install_lib_static
+    install_lib
+    install_doc_html
+    install_doc_man
+    install_doc
+
+To clean up build results to varying degrees, use the following make targets:
+
+    clean
+    distclean
+    relclean
+
+=== Advanced installation ======================================================
+
+Optionally, define make variables when invoking make, including (not
+exclusively):
+
+INCLUDEDIR="?"
+    Use this as the installation prefix for header files.
+
+LIBDIR="?"
+    Use this as the installation prefix for libraries.
+
+MANDIR="?"
+    Use this as the installation prefix for man pages.
+
+DESTDIR="?"
+    Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR.  This is useful
+    when installing to a different path than was specified via --prefix.
+
+CC="?"
+    Use this to invoke the C compiler.
+
+CFLAGS="?"
+    Pass these flags to the compiler.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    Use this to search for programs used during configuration and building.
+
+=== Development ================================================================
+
+If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
+script rather than 'configure'.  This re-generates 'configure', enables
+configuration dependency rules, and enables re-generation of automatically
+generated source files.
+
+The build system supports using an object directory separate from the source
+tree.  For example, you can create an 'obj' directory, and from within that
+directory, issue configuration and build commands:
+
+    autoconf
+    mkdir obj
+    cd obj
+    ../configure --enable-autogen
+    make
+
+=== Documentation ==============================================================
+
+The manual page is generated in both html and roff formats.  Any web browser
+can be used to view the html manual.  The roff manual page can be formatted
+prior to installation via the following command:
+
+    nroff -man -t doc/jemalloc.3
--- a/src/rt/jemalloc/Makefile.in
+++ b/src/rt/jemalloc/Makefile.in
@ -0,0 +1,324 @@
+# Clear out all vpaths, then set just one (default vpath) for the main build
+# directory.
+vpath
+vpath % .
+
+# Clear the default suffixes, so that built-in rules are not used.
+.SUFFIXES :
+
+SHELL := /bin/sh
+
+CC := @CC@
+
+# Configuration parameters.
+DESTDIR =
+BINDIR := $(DESTDIR)@BINDIR@
+INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@
+LIBDIR := $(DESTDIR)@LIBDIR@
+DATADIR := $(DESTDIR)@DATADIR@
+MANDIR := $(DESTDIR)@MANDIR@
+srcroot := @srcroot@
+objroot := @objroot@
+abs_srcroot := @abs_srcroot@
+abs_objroot := @abs_objroot@
+
+# Build parameters.
+CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
+CFLAGS := @CFLAGS@
+LDFLAGS := @LDFLAGS@
+EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
+LIBS := @LIBS@
+RPATH_EXTRA := @RPATH_EXTRA@
+SO := @so@
+IMPORTLIB := @importlib@
+O := @o@
+A := @a@
+EXE := @exe@
+LIBPREFIX := @libprefix@
+REV := @rev@
+install_suffix := @install_suffix@
+ABI := @abi@
+XSLTPROC := @XSLTPROC@
+AUTOCONF := @AUTOCONF@
+_RPATH = @RPATH@
+RPATH = $(if $(1),$(call _RPATH,$(1)))
+cfghdrs_in := @cfghdrs_in@
+cfghdrs_out := @cfghdrs_out@
+cfgoutputs_in := @cfgoutputs_in@
+cfgoutputs_out := @cfgoutputs_out@
+enable_autogen := @enable_autogen@
+enable_experimental := @enable_experimental@
+enable_zone_allocator := @enable_zone_allocator@
+DSO_LDFLAGS = @DSO_LDFLAGS@
+SOREV = @SOREV@
+PIC_CFLAGS = @PIC_CFLAGS@
+CTARGET = @CTARGET@
+LDTARGET = @LDTARGET@
+MKLIB = @MKLIB@
+CC_MM = @CC_MM@
+
+ifeq (macho, $(ABI))
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
+else
+ifeq (pecoff, $(ABI))
+TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib"
+else
+TEST_LIBRARY_PATH :=
+endif
+endif
+
+LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
+
+# Lists of files.
+BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh
+CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \
+	$(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h
+CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \
+	$(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \
+	$(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \
+	$(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \
+	$(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \
+	$(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \
+	$(srcroot)src/util.c $(srcroot)src/tsd.c
+ifeq ($(enable_zone_allocator), 1)
+CSRCS += $(srcroot)src/zone.c
+endif
+ifeq ($(IMPORTLIB),$(SO))
+STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
+endif
+ifdef PIC_CFLAGS
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A)
+else
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A)
+endif
+DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
+endif
+MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
+DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
+DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html)
+DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3)
+DOCS := $(DOCS_HTML) $(DOCS_MAN3)
+CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \
+	$(srcroot)test/ALLOCM_ARENA.c $(srcroot)test/bitmap.c \
+	$(srcroot)test/mremap.c $(srcroot)test/posix_memalign.c \
+	$(srcroot)test/thread_arena.c $(srcroot)test/thread_tcache_enabled.c
+ifeq ($(enable_experimental), 1)
+CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c
+endif
+
+COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O))
+CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
+CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O))
+
+.PHONY: all dist build_doc_html build_doc_man build_doc
+.PHONY: install_bin install_include install_lib
+.PHONY: install_doc_html install_doc_man install_doc install
+.PHONY: tests check clean distclean relclean
+
+.SECONDARY : $(CTESTOBJS)
+
+# Default target.
+all: build
+
+dist: build_doc
+
+$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
+
+$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
+
+build_doc_html: $(DOCS_HTML)
+build_doc_man: $(DOCS_MAN3)
+build_doc: $(DOCS)
+
+#
+# Include generated dependency files.
+#
+ifdef CC_MM
+-include $(COBJS:%.$(O)=%.d)
+-include $(CPICOBJS:%.$(O)=%.d)
+-include $(CTESTOBJS:%.$(O)=%.d)
+endif
+
+$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
+$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
+$(CPICOBJS): CFLAGS += $(PIC_CFLAGS)
+$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
+$(CTESTOBJS): CPPFLAGS += -I$(objroot)test
+ifneq ($(IMPORTLIB),$(SO))
+$(COBJS): CPPFLAGS += -DDLLEXPORT
+endif
+
+ifndef CC_MM
+# Dependencies
+HEADER_DIRS = $(srcroot)include/jemalloc/internal \
+	$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
+HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS)
+$(CTESTOBJS): $(objroot)test/jemalloc_test.h
+endif
+
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O):
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
+ifneq ($(SOREV),$(SO))
+%.$(SO) : %.$(SOREV)
+	@mkdir -p $(@D)
+	ln -sf $(<F) $@
+endif
+
+$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(CPICOBJS),$(COBJS))
+	@mkdir -p $(@D)
+	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+
+$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CPICOBJS)
+$(objroot)lib/$(LIBJEMALLOC).$(A) : $(COBJS)
+$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS)
+
+$(STATIC_LIBS):
+	@mkdir -p $(@D)
+	$(MKLIB) $+
+
+$(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O)
+
+$(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS)
+
+build_lib_shared: $(DSOS)
+build_lib_static: $(STATIC_LIBS)
+build: build_lib_shared build_lib_static
+
+install_bin:
+	install -d $(BINDIR)
+	@for b in $(BINS); do \
+	echo "install -m 755 $$b $(BINDIR)"; \
+	install -m 755 $$b $(BINDIR); \
+done
+
+install_include:
+	install -d $(INCLUDEDIR)/jemalloc
+	@for h in $(CHDRS); do \
+	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+done
+
+install_lib_shared: $(DSOS)
+	install -d $(LIBDIR)
+	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+ifneq ($(SOREV),$(SO))
+	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif
+
+install_lib_static: $(STATIC_LIBS)
+	install -d $(LIBDIR)
+	@for l in $(STATIC_LIBS); do \
+	echo "install -m 755 $$l $(LIBDIR)"; \
+	install -m 755 $$l $(LIBDIR); \
+done
+
+install_lib: install_lib_shared install_lib_static
+
+install_doc_html:
+	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	@for d in $(DOCS_HTML); do \
+	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+done
+
+install_doc_man:
+	install -d $(MANDIR)/man3
+	@for d in $(DOCS_MAN3); do \
+	echo "install -m 644 $$d $(MANDIR)/man3"; \
+	install -m 644 $$d $(MANDIR)/man3; \
+done
+
+install_doc: install_doc_html install_doc_man
+
+install: install_bin install_include install_lib install_doc
+
+tests: $(CTESTS:$(srcroot)%.c=$(objroot)%$(EXE))
+
+check: tests
+	@mkdir -p $(objroot)test
+	@$(SHELL) -c 'total=0; \
+		failures=0; \
+		echo "========================================="; \
+		for t in $(CTESTS:$(srcroot)%.c=$(objroot)%); do \
+			total=`expr $$total + 1`; \
+			/bin/echo -n "$${t} ... "; \
+			$(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) \
+			  $(abs_objroot) > $(objroot)$${t}.out 2>&1; \
+			if test -e "$(srcroot)$${t}.exp"; then \
+				diff -w -u $(srcroot)$${t}.exp \
+				  $(objroot)$${t}.out >/dev/null 2>&1; \
+				fail=$$?; \
+				if test "$${fail}" -eq "1" ; then \
+					failures=`expr $${failures} + 1`; \
+					echo "*** FAIL ***"; \
+				else \
+					echo "pass"; \
+				fi; \
+			else \
+				echo "*** FAIL *** (.exp file is missing)"; \
+				failures=`expr $${failures} + 1`; \
+			fi; \
+		done; \
+		echo "========================================="; \
+		echo "Failures: $${failures}/$${total}"'
+
+clean:
+	rm -f $(COBJS)
+	rm -f $(CPICOBJS)
+	rm -f $(COBJS:%.$(O)=%.d)
+	rm -f $(CPICOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%$(EXE))
+	rm -f $(CTESTOBJS)
+	rm -f $(CTESTOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%.out)
+	rm -f $(DSOS) $(STATIC_LIBS)
+
+distclean: clean
+	rm -rf $(objroot)autom4te.cache
+	rm -f $(objroot)config.log
+	rm -f $(objroot)config.status
+	rm -f $(objroot)config.stamp
+	rm -f $(cfghdrs_out)
+	rm -f $(cfgoutputs_out)
+
+relclean: distclean
+	rm -f $(objroot)configure
+	rm -f $(srcroot)VERSION
+	rm -f $(DOCS_HTML)
+	rm -f $(DOCS_MAN3)
+
+#===============================================================================
+# Re-configuration rules.
+
+ifeq ($(enable_autogen), 1)
+$(srcroot)configure : $(srcroot)configure.ac
+	cd ./$(srcroot) && $(AUTOCONF)
+
+$(objroot)config.status : $(srcroot)configure
+	./$(objroot)config.status --recheck
+
+$(srcroot)config.stamp.in : $(srcroot)configure.ac
+	echo stamp > $(srcroot)config.stamp.in
+
+$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure
+	./$(objroot)config.status
+	@touch $@
+
+# There must be some action in order for make to re-read Makefile when it is
+# out of date.
+$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp
+	@true
+endif
--- a/src/rt/jemalloc/README
+++ b/src/rt/jemalloc/README
@ -0,0 +1,16 @@
+jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
+This distribution is a "portable" implementation that currently targets
+FreeBSD, Linux, Apple OS X, and MinGW.  jemalloc is included as the default
+allocator in the FreeBSD and NetBSD operating systems, and it is used by the
+Mozilla Firefox web browser on Microsoft Windows-related platforms.  Depending
+on your needs, one of the other divergent versions may suit your needs better
+than this distribution.
+
+The COPYING file contains copyright and licensing information.
+
+The INSTALL file contains information on how to configure, build, and install
+jemalloc.
+
+The ChangeLog file contains a brief summary of changes for each release.
+
+URL: http://www.canonware.com/jemalloc/
--- a/src/rt/jemalloc/VERSION
+++ b/src/rt/jemalloc/VERSION
@ -0,0 +1 @@
+3.3.1-0-g9ef9d9e8c271cdf14f664b871a8f98c827714784
--- a/src/rt/jemalloc/autogen.sh
+++ b/src/rt/jemalloc/autogen.sh
@ -0,0 +1,17 @@
+#!/bin/sh
+
+for i in autoconf; do
+    echo "$i"
+    $i
+    if [ $? -ne 0 ]; then
+	echo "Error $? in $i"
+	exit 1
+    fi
+done
+
+echo "./configure --enable-autogen $@"
+./configure --enable-autogen $@
+if [ $? -ne 0 ]; then
+    echo "Error $? in ./configure"
+    exit 1
+fi
--- a/src/rt/jemalloc/bin/jemalloc.sh
+++ b/src/rt/jemalloc/bin/jemalloc.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+
+prefix=/usr/local
+exec_prefix=/usr/local
+libdir=${exec_prefix}/lib
+
+LD_PRELOAD=${libdir}/libjemalloc.so.1
+export LD_PRELOAD
+exec "$@"
--- a/src/rt/jemalloc/bin/jemalloc.sh.in
+++ b/src/rt/jemalloc/bin/jemalloc.sh.in
@ -0,0 +1,9 @@
+#!/bin/sh
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+
+@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@
+export @LD_PRELOAD_VAR@
+exec "$@"
--- a/src/rt/jemalloc/bin/pprof
+++ b/src/rt/jemalloc/bin/pprof
--- a/src/rt/jemalloc/config.guess
+++ b/src/rt/jemalloc/config.guess
--- a/src/rt/jemalloc/config.stamp.in
+++ b/src/rt/jemalloc/config.stamp.in
--- a/src/rt/jemalloc/config.sub
+++ b/src/rt/jemalloc/config.sub
--- a/src/rt/jemalloc/configure
+++ b/src/rt/jemalloc/configure
--- a/src/rt/jemalloc/configure.ac
+++ b/src/rt/jemalloc/configure.ac
--- a/src/rt/jemalloc/doc/html.xsl.in
+++ b/src/rt/jemalloc/doc/html.xsl.in
@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
--- a/src/rt/jemalloc/doc/jemalloc.3
+++ b/src/rt/jemalloc/doc/jemalloc.3
--- a/src/rt/jemalloc/doc/jemalloc.xml.in
+++ b/src/rt/jemalloc/doc/jemalloc.xml.in
--- a/src/rt/jemalloc/doc/manpages.xsl.in
+++ b/src/rt/jemalloc/doc/manpages.xsl.in
@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/manpages/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
--- a/src/rt/jemalloc/doc/stylesheet.xsl
+++ b/src/rt/jemalloc/doc/stylesheet.xsl
@ -0,0 +1,7 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:param name="funcsynopsis.style">ansi</xsl:param>
+  <xsl:param name="function.parens" select="1"/>
+  <xsl:template match="mallctl">
+    "<xsl:call-template name="inline.monoseq"/>"
+  </xsl:template>
+</xsl:stylesheet>
--- a/src/rt/jemalloc/include/jemalloc/internal/arena.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/arena.h
--- a/src/rt/jemalloc/include/jemalloc/internal/atomic.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/atomic.h
@ -0,0 +1,304 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
+#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
+#define	atomic_read_z(p)	atomic_add_z(p, 0)
+#define	atomic_read_u(p)	atomic_add_u(p, 0)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
+uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
+uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
+uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
+size_t	atomic_add_z(size_t *p, size_t x);
+size_t	atomic_sub_z(size_t *p, size_t x);
+unsigned	atomic_add_u(unsigned *p, unsigned x);
+unsigned	atomic_sub_u(unsigned *p, unsigned x);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
+/******************************************************************************/
+/* 64-bit operations. */
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (InterlockedExchangeAdd64(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+#  elif (defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    asm volatile (
+        "lock; xaddq %0, %1;"
+        : "+r" (x), "=m" (*p) /* Outputs. */
+        : "m" (*p) /* Inputs. */
+        );
+
+    return (x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    x = (uint64_t)(-(int64_t)x);
+    asm volatile (
+        "lock; xaddq %0, %1;"
+        : "+r" (x), "=m" (*p) /* Outputs. */
+        : "m" (*p) /* Inputs. */
+        );
+
+    return (x);
+}
+#  elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    /*
+     * atomic_fetchadd_64() doesn't exist, but we only ever use this
+     * function on LP64 systems, so atomic_fetchadd_long() will do.
+     */
+    assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+    return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+    return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+#  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+    return (__sync_sub_and_fetch(p, x));
+}
+#  else
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (InterlockedExchangeAdd(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (InterlockedExchangeAdd(p, -((int32_t)x)));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
+#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    asm volatile (
+        "lock; xaddl %0, %1;"
+        : "+r" (x), "=m" (*p) /* Outputs. */
+        : "m" (*p) /* Inputs. */
+        );
+
+    return (x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    x = (uint32_t)(-(int32_t)x);
+    asm volatile (
+        "lock; xaddl %0, %1;"
+        : "+r" (x), "=m" (*p) /* Outputs. */
+        : "m" (*p) /* Inputs. */
+        );
+
+    return (x);
+}
+#elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (atomic_fetchadd_32(p, x) + x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+    return (__sync_sub_and_fetch(p, x));
+}
+#else
+#  error "Missing implementation for 32-bit atomic operations"
+#endif
+
+/******************************************************************************/
+/* size_t operations. */
+JEMALLOC_INLINE size_t
+atomic_add_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+    return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+    return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE size_t
+atomic_sub_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+    return ((size_t)atomic_add_uint64((uint64_t *)p,
+        (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+    return ((size_t)atomic_add_uint32((uint32_t *)p,
+        (uint32_t)-((int32_t)x)));
+#endif
+}
+
+/******************************************************************************/
+/* unsigned operations. */
+JEMALLOC_INLINE unsigned
+atomic_add_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+    return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_INT == 2)
+    return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE unsigned
+atomic_sub_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+    return ((unsigned)atomic_add_uint64((uint64_t *)p,
+        (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_INT == 2)
+    return ((unsigned)atomic_add_uint32((uint32_t *)p,
+        (uint32_t)-((int32_t)x)));
+#endif
+}
+/******************************************************************************/
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/base.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/base.h
@ -0,0 +1,26 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*base_alloc(size_t size);
+void	*base_calloc(size_t number, size_t size);
+extent_node_t *base_node_alloc(void);
+void	base_node_dealloc(extent_node_t *node);
+bool	base_boot(void);
+void	base_prefork(void);
+void	base_postfork_parent(void);
+void	base_postfork_child(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/bitmap.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/bitmap.h
@ -0,0 +1,184 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#define	LG_BITMAP_MAXBITS	LG_RUN_MAXREGS
+
+typedef struct bitmap_level_s bitmap_level_t;
+typedef struct bitmap_info_s bitmap_info_t;
+typedef unsigned long bitmap_t;
+#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+
+/* Number of bits per group. */
+#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
+#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+
+/* Maximum number of levels possible. */
+#define	BITMAP_MAX_LEVELS						\
+    (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
+    + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct bitmap_level_s {
+    /* Offset of this level's groups within the array of groups. */
+    size_t group_offset;
+};
+
+struct bitmap_info_s {
+    /* Logical number of bits in bitmap (stored at bottom level). */
+    size_t nbits;
+
+    /* Number of levels necessary for nbits. */
+    unsigned nlevels;
+
+    /*
+     * Only the first (nlevels+1) elements are used, and levels are ordered
+     * bottom to top (e.g. the bottom level is stored in levels[0]).
+     */
+    bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
+size_t	bitmap_size(size_t nbits);
+void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
+JEMALLOC_INLINE bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+    unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+    bitmap_t rg = bitmap[rgoff];
+    /* The bitmap is full iff the root group is 0. */
+    return (rg == 0);
+}
+
+JEMALLOC_INLINE bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+    size_t goff;
+    bitmap_t g;
+
+    assert(bit < binfo->nbits);
+    goff = bit >> LG_BITMAP_GROUP_NBITS;
+    g = bitmap[goff];
+    return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+}
+
+JEMALLOC_INLINE void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+    size_t goff;
+    bitmap_t *gp;
+    bitmap_t g;
+
+    assert(bit < binfo->nbits);
+    assert(bitmap_get(bitmap, binfo, bit) == false);
+    goff = bit >> LG_BITMAP_GROUP_NBITS;
+    gp = &bitmap[goff];
+    g = *gp;
+    assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+    g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+    *gp = g;
+    assert(bitmap_get(bitmap, binfo, bit));
+    /* Propagate group state transitions up the tree. */
+    if (g == 0) {
+        unsigned i;
+        for (i = 1; i < binfo->nlevels; i++) {
+            bit = goff;
+            goff = bit >> LG_BITMAP_GROUP_NBITS;
+            gp = &bitmap[binfo->levels[i].group_offset + goff];
+            g = *gp;
+            assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+            g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+            *gp = g;
+            if (g != 0)
+                break;
+        }
+    }
+}
+
+/* sfu: set first unset. */
+JEMALLOC_INLINE size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+    size_t bit;
+    bitmap_t g;
+    unsigned i;
+
+    assert(bitmap_full(bitmap, binfo) == false);
+
+    i = binfo->nlevels - 1;
+    g = bitmap[binfo->levels[i].group_offset];
+    bit = ffsl(g) - 1;
+    while (i > 0) {
+        i--;
+        g = bitmap[binfo->levels[i].group_offset + bit];
+        bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+    }
+
+    bitmap_set(bitmap, binfo, bit);
+    return (bit);
+}
+
+JEMALLOC_INLINE void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+    size_t goff;
+    bitmap_t *gp;
+    bitmap_t g;
+    bool propagate;
+
+    assert(bit < binfo->nbits);
+    assert(bitmap_get(bitmap, binfo, bit));
+    goff = bit >> LG_BITMAP_GROUP_NBITS;
+    gp = &bitmap[goff];
+    g = *gp;
+    propagate = (g == 0);
+    assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+    g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+    *gp = g;
+    assert(bitmap_get(bitmap, binfo, bit) == false);
+    /* Propagate group state transitions up the tree. */
+    if (propagate) {
+        unsigned i;
+        for (i = 1; i < binfo->nlevels; i++) {
+            bit = goff;
+            goff = bit >> LG_BITMAP_GROUP_NBITS;
+            gp = &bitmap[binfo->levels[i].group_offset + goff];
+            g = *gp;
+            propagate = (g == 0);
+            assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+                == 0);
+            g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+            *gp = g;
+            if (propagate == false)
+                break;
+        }
+    }
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/chunk.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/chunk.h
@ -0,0 +1,63 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Size and alignment of memory chunks that are allocated by the OS's virtual
+ * memory system.
+ */
+#define	LG_CHUNK_DEFAULT	22
+
+/* Return the chunk address for allocation address a. */
+#define	CHUNK_ADDR2BASE(a)						\
+    ((void *)((uintptr_t)(a) & ~chunksize_mask))
+
+/* Return the chunk offset of address a. */
+#define	CHUNK_ADDR2OFFSET(a)						\
+    ((size_t)((uintptr_t)(a) & chunksize_mask))
+
+/* Return the smallest chunk multiple that is >= s. */
+#define	CHUNK_CEILING(s)						\
+    (((s) + chunksize_mask) & ~chunksize_mask)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t		opt_lg_chunk;
+extern const char	*opt_dss;
+
+/* Protects stats_chunks; currently not used for any other purpose. */
+extern malloc_mutex_t	chunks_mtx;
+/* Chunk statistics. */
+extern chunk_stats_t	stats_chunks;
+
+extern rtree_t		*chunks_rtree;
+
+extern size_t		chunksize;
+extern size_t		chunksize_mask; /* (chunksize - 1). */
+extern size_t		chunk_npages;
+extern size_t		map_bias; /* Number of arena chunk header pages. */
+extern size_t		arena_maxclass; /* Max size class for arenas. */
+
+void	*chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
+    dss_prec_t dss_prec);
+void	chunk_unmap(void *chunk, size_t size);
+void	chunk_dealloc(void *chunk, size_t size, bool unmap);
+bool	chunk_boot(void);
+void	chunk_prefork(void);
+void	chunk_postfork_parent(void);
+void	chunk_postfork_child(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/chunk_dss.h"
+#include "jemalloc/internal/chunk_mmap.h"
--- a/src/rt/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/chunk_dss.h
@ -0,0 +1,38 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef enum {
+    dss_prec_disabled  = 0,
+    dss_prec_primary   = 1,
+    dss_prec_secondary = 2,
+
+    dss_prec_limit     = 3
+} dss_prec_t ;
+#define	DSS_PREC_DEFAULT	dss_prec_secondary
+#define	DSS_DEFAULT		"secondary"
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+extern const char *dss_prec_names[];
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+dss_prec_t	chunk_dss_prec_get(void);
+bool	chunk_dss_prec_set(dss_prec_t dss_prec);
+void	*chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dss_boot(void);
+void	chunk_dss_prefork(void);
+void	chunk_dss_postfork_parent(void);
+void	chunk_dss_postfork_child(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/chunk_mmap.h
@ -0,0 +1,22 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool	pages_purge(void *addr, size_t length);
+
+void	*chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
+bool	chunk_dealloc_mmap(void *chunk, size_t size);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/ckh.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/ckh.h
@ -0,0 +1,88 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ckh_s ckh_t;
+typedef struct ckhc_s ckhc_t;
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, size_t[2]);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Maintain counters used to get an idea of performance. */
+/* #define	CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define	CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
+ * one bucket per L1 cache line.
+ */
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Hash table cell. */
+struct ckhc_s {
+    const void	*key;
+    const void	*data;
+};
+
+struct ckh_s {
+#ifdef CKH_COUNT
+    /* Counters used to get an idea of performance. */
+    uint64_t	ngrows;
+    uint64_t	nshrinks;
+    uint64_t	nshrinkfails;
+    uint64_t	ninserts;
+    uint64_t	nrelocs;
+#endif
+
+    /* Used for pseudo-random number generation. */
+#define	CKH_A		1103515241
+#define	CKH_C		12347
+    uint32_t	prng_state;
+
+    /* Total number of items. */
+    size_t		count;
+
+    /*
+     * Minimum and current number of hash table buckets.  There are
+     * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+     */
+    unsigned	lg_minbuckets;
+    unsigned	lg_curbuckets;
+
+    /* Hash and comparison functions. */
+    ckh_hash_t	*hash;
+    ckh_keycomp_t	*keycomp;
+
+    /* Hash table with 2^lg_curbuckets buckets. */
+    ckhc_t		*tab;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool	ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp);
+void	ckh_delete(ckh_t *ckh);
+size_t	ckh_count(ckh_t *ckh);
+bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+bool	ckh_insert(ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+    void **data);
+bool	ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+void	ckh_string_hash(const void *key, size_t r_hash[2]);
+bool	ckh_string_keycomp(const void *k1, const void *k2);
+void	ckh_pointer_hash(const void *key, size_t r_hash[2]);
+bool	ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/ctl.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/ctl.h
@ -0,0 +1,116 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_named_node_s ctl_named_node_t;
+typedef struct ctl_indexed_node_s ctl_indexed_node_t;
+typedef struct ctl_arena_stats_s ctl_arena_stats_t;
+typedef struct ctl_stats_s ctl_stats_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ctl_node_s {
+    bool			named;
+};
+
+struct ctl_named_node_s {
+    struct ctl_node_s	node;
+    const char		*name;
+    /* If (nchildren == 0), this is a terminal node. */
+    unsigned		nchildren;
+    const			ctl_node_t *children;
+    int			(*ctl)(const size_t *, size_t, void *, size_t *,
+        void *, size_t);
+};
+
+struct ctl_indexed_node_s {
+    struct ctl_node_s	node;
+    const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+};
+
+struct ctl_arena_stats_s {
+    bool			initialized;
+    unsigned		nthreads;
+    const char		*dss;
+    size_t			pactive;
+    size_t			pdirty;
+    arena_stats_t		astats;
+
+    /* Aggregate stats for small size classes, based on bin stats. */
+    size_t			allocated_small;
+    uint64_t		nmalloc_small;
+    uint64_t		ndalloc_small;
+    uint64_t		nrequests_small;
+
+    malloc_bin_stats_t	bstats[NBINS];
+    malloc_large_stats_t	*lstats;	/* nlclasses elements. */
+};
+
+struct ctl_stats_s {
+    size_t			allocated;
+    size_t			active;
+    size_t			mapped;
+    struct {
+        size_t		current;	/* stats_chunks.curchunks */
+        uint64_t	total;		/* stats_chunks.nchunks */
+        size_t		high;		/* stats_chunks.highchunks */
+    } chunks;
+    struct {
+        size_t		allocated;	/* huge_allocated */
+        uint64_t	nmalloc;	/* huge_nmalloc */
+        uint64_t	ndalloc;	/* huge_ndalloc */
+    } huge;
+    unsigned		narenas;
+    ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen);
+int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+
+int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+bool	ctl_boot(void);
+void	ctl_prefork(void);
+void	ctl_postfork_parent(void);
+void	ctl_postfork_child(void);
+
+#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+    if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
+        != 0) {							\
+        malloc_printf(						\
+            "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
+            name);						\
+        abort();						\
+    }								\
+} while (0)
+
+#define	xmallctlnametomib(name, mibp, miblenp) do {			\
+    if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
+        malloc_printf("<jemalloc>: Failure in "			\
+            "xmallctlnametomib(\"%s\", ...)\n", name);		\
+        abort();						\
+    }								\
+} while (0)
+
+#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+    if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
+        newlen) != 0) {						\
+        malloc_write(						\
+            "<jemalloc>: Failure in xmallctlbymib()\n");	\
+        abort();						\
+    }								\
+} while (0)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/extent.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/extent.h
@ -0,0 +1,45 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct extent_node_s extent_node_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Tree of extents. */
+struct extent_node_s {
+    /* Linkage for the size/address-ordered tree. */
+    rb_node(extent_node_t)	link_szad;
+
+    /* Linkage for the address-ordered tree. */
+    rb_node(extent_node_t)	link_ad;
+
+    /* Profile counters, used for huge objects. */
+    prof_ctx_t		*prof_ctx;
+
+    /* Pointer to the extent that this tree node is responsible for. */
+    void			*addr;
+
+    /* Total region size. */
+    size_t			size;
+
+    /* True if zero-filled; used by chunk recycling code. */
+    bool			zeroed;
+};
+typedef rb_tree(extent_node_t) extent_tree_t;
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+
+rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/hash.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/hash.h
@ -0,0 +1,331 @@
+/*
+ * The following hash function is based on MurmurHash3, placed into the public
+ * domain by Austin Appleby.  See http://code.google.com/p/smhasher/ for
+ * details.
+ */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	hash(const void *key, size_t len, const uint32_t seed,
+    size_t r_hash[2]);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
+/******************************************************************************/
+/* Internal implementation. */
+JEMALLOC_INLINE uint32_t
+hash_rotl_32(uint32_t x, int8_t r)
+{
+
+    return (x << r) | (x >> (32 - r));
+}
+
+JEMALLOC_INLINE uint64_t
+hash_rotl_64(uint64_t x, int8_t r)
+{
+    return (x << r) | (x >> (64 - r));
+}
+
+JEMALLOC_INLINE uint32_t
+hash_get_block_32(const uint32_t *p, int i)
+{
+
+    return p[i];
+}
+
+JEMALLOC_INLINE uint64_t
+hash_get_block_64(const uint64_t *p, int i)
+{
+
+    return p[i];
+}
+
+JEMALLOC_INLINE uint32_t
+hash_fmix_32(uint32_t h)
+{
+
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+
+    return h;
+}
+
+JEMALLOC_INLINE uint64_t
+hash_fmix_64(uint64_t k)
+{
+
+    k ^= k >> 33;
+    k *= QU(0xff51afd7ed558ccdLLU);
+    k ^= k >> 33;
+    k *= QU(0xc4ceb9fe1a85ec53LLU);
+    k ^= k >> 33;
+
+    return k;
+}
+
+JEMALLOC_INLINE uint32_t
+hash_x86_32(const void *key, int len, uint32_t seed)
+{
+    const uint8_t *data = (const uint8_t *) key;
+    const int nblocks = len / 4;
+
+    uint32_t h1 = seed;
+
+    const uint32_t c1 = 0xcc9e2d51;
+    const uint32_t c2 = 0x1b873593;
+
+    /* body */
+    {
+        const uint32_t *blocks = (const uint32_t *) (data + nblocks*4);
+        int i;
+
+        for (i = -nblocks; i; i++) {
+            uint32_t k1 = hash_get_block_32(blocks, i);
+
+            k1 *= c1;
+            k1 = hash_rotl_32(k1, 15);
+            k1 *= c2;
+
+            h1 ^= k1;
+            h1 = hash_rotl_32(h1, 13);
+            h1 = h1*5 + 0xe6546b64;
+        }
+    }
+
+    /* tail */
+    {
+        const uint8_t *tail = (const uint8_t *) (data + nblocks*4);
+
+        uint32_t k1 = 0;
+
+        switch (len & 3) {
+        case 3: k1 ^= tail[2] << 16;
+        case 2: k1 ^= tail[1] << 8;
+        case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
+            k1 *= c2; h1 ^= k1;
+        }
+    }
+
+    /* finalization */
+    h1 ^= len;
+
+    h1 = hash_fmix_32(h1);
+
+    return h1;
+}
+
+UNUSED JEMALLOC_INLINE void
+hash_x86_128(const void *key, const int len, uint32_t seed,
+  uint64_t r_out[2])
+{
+    const uint8_t * data = (const uint8_t *) key;
+    const int nblocks = len / 16;
+
+    uint32_t h1 = seed;
+    uint32_t h2 = seed;
+    uint32_t h3 = seed;
+    uint32_t h4 = seed;
+
+    const uint32_t c1 = 0x239b961b;
+    const uint32_t c2 = 0xab0e9789;
+    const uint32_t c3 = 0x38b34ae5;
+    const uint32_t c4 = 0xa1e38b93;
+
+    /* body */
+    {
+        const uint32_t *blocks = (const uint32_t *) (data + nblocks*16);
+        int i;
+
+        for (i = -nblocks; i; i++) {
+            uint32_t k1 = hash_get_block_32(blocks, i*4 + 0);
+            uint32_t k2 = hash_get_block_32(blocks, i*4 + 1);
+            uint32_t k3 = hash_get_block_32(blocks, i*4 + 2);
+            uint32_t k4 = hash_get_block_32(blocks, i*4 + 3);
+
+            k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+
+            h1 = hash_rotl_32(h1, 19); h1 += h2;
+            h1 = h1*5 + 0x561ccd1b;
+
+            k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+
+            h2 = hash_rotl_32(h2, 17); h2 += h3;
+            h2 = h2*5 + 0x0bcaa747;
+
+            k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+
+            h3 = hash_rotl_32(h3, 15); h3 += h4;
+            h3 = h3*5 + 0x96cd1c35;
+
+            k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+
+            h4 = hash_rotl_32(h4, 13); h4 += h1;
+            h4 = h4*5 + 0x32ac3b17;
+        }
+    }
+
+    /* tail */
+    {
+        const uint8_t *tail = (const uint8_t *) (data + nblocks*16);
+        uint32_t k1 = 0;
+        uint32_t k2 = 0;
+        uint32_t k3 = 0;
+        uint32_t k4 = 0;
+
+        switch (len & 15) {
+        case 15: k4 ^= tail[14] << 16;
+        case 14: k4 ^= tail[13] << 8;
+        case 13: k4 ^= tail[12] << 0;
+            k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+
+        case 12: k3 ^= tail[11] << 24;
+        case 11: k3 ^= tail[10] << 16;
+        case 10: k3 ^= tail[ 9] << 8;
+        case  9: k3 ^= tail[ 8] << 0;
+             k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+
+        case  8: k2 ^= tail[ 7] << 24;
+        case  7: k2 ^= tail[ 6] << 16;
+        case  6: k2 ^= tail[ 5] << 8;
+        case  5: k2 ^= tail[ 4] << 0;
+            k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+
+        case  4: k1 ^= tail[ 3] << 24;
+        case  3: k1 ^= tail[ 2] << 16;
+        case  2: k1 ^= tail[ 1] << 8;
+        case  1: k1 ^= tail[ 0] << 0;
+            k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+        }
+    }
+
+    /* finalization */
+    h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+    h1 += h2; h1 += h3; h1 += h4;
+    h2 += h1; h3 += h1; h4 += h1;
+
+    h1 = hash_fmix_32(h1);
+    h2 = hash_fmix_32(h2);
+    h3 = hash_fmix_32(h3);
+    h4 = hash_fmix_32(h4);
+
+    h1 += h2; h1 += h3; h1 += h4;
+    h2 += h1; h3 += h1; h4 += h1;
+
+    r_out[0] = (((uint64_t) h2) << 32) | h1;
+    r_out[1] = (((uint64_t) h4) << 32) | h3;
+}
+
+UNUSED JEMALLOC_INLINE void
+hash_x64_128(const void *key, const int len, const uint32_t seed,
+  uint64_t r_out[2])
+{
+    const uint8_t *data = (const uint8_t *) key;
+    const int nblocks = len / 16;
+
+    uint64_t h1 = seed;
+    uint64_t h2 = seed;
+
+    const uint64_t c1 = QU(0x87c37b91114253d5LLU);
+    const uint64_t c2 = QU(0x4cf5ad432745937fLLU);
+
+    /* body */
+    {
+        const uint64_t *blocks = (const uint64_t *) (data);
+        int i;
+
+        for (i = 0; i < nblocks; i++) {
+            uint64_t k1 = hash_get_block_64(blocks, i*2 + 0);
+            uint64_t k2 = hash_get_block_64(blocks, i*2 + 1);
+
+            k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+
+            h1 = hash_rotl_64(h1, 27); h1 += h2;
+            h1 = h1*5 + 0x52dce729;
+
+            k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+
+            h2 = hash_rotl_64(h2, 31); h2 += h1;
+            h2 = h2*5 + 0x38495ab5;
+        }
+    }
+
+    /* tail */
+    {
+        const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
+        uint64_t k1 = 0;
+        uint64_t k2 = 0;
+
+        switch (len & 15) {
+        case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
+        case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
+        case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
+        case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
+        case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
+        case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
+        case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
+            k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+
+        case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
+        case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
+        case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
+        case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
+        case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
+        case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
+        case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
+        case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
+            k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+        }
+    }
+
+    /* finalization */
+    h1 ^= len; h2 ^= len;
+
+    h1 += h2;
+    h2 += h1;
+
+    h1 = hash_fmix_64(h1);
+    h2 = hash_fmix_64(h2);
+
+    h1 += h2;
+    h2 += h1;
+
+    r_out[0] = h1;
+    r_out[1] = h2;
+}
+
+
+/******************************************************************************/
+/* API. */
+JEMALLOC_INLINE void
+hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
+{
+#if (LG_SIZEOF_PTR == 3)
+    hash_x64_128(key, len, seed, (uint64_t *)r_hash);
+#else
+    uint64_t hashes[2];
+    hash_x86_128(key, len, seed, hashes);
+    r_hash[0] = (size_t)hashes[0];
+    r_hash[1] = (size_t)hashes[1];
+#endif
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/huge.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/huge.h
@ -0,0 +1,40 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+/* Huge allocation statistics. */
+extern uint64_t		huge_nmalloc;
+extern uint64_t		huge_ndalloc;
+extern size_t		huge_allocated;
+
+/* Protects chunk-related data structures. */
+extern malloc_mutex_t	huge_mtx;
+
+void	*huge_malloc(size_t size, bool zero);
+void	*huge_palloc(size_t size, size_t alignment, bool zero);
+void	*huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+    size_t extra);
+void	*huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero, bool try_tcache_dalloc);
+void	huge_dalloc(void *ptr, bool unmap);
+size_t	huge_salloc(const void *ptr);
+prof_ctx_t	*huge_prof_ctx_get(const void *ptr);
+void	huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+bool	huge_boot(void);
+void	huge_prefork(void);
+void	huge_postfork_parent(void);
+void	huge_postfork_child(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/src/rt/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
--- a/src/rt/jemalloc/include/jemalloc/internal/mb.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/mb.h
@ -0,0 +1,115 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	mb_write(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
+#ifdef __i386__
+/*
+ * According to the Intel Architecture Software Developer's Manual, current
+ * processors execute instructions in order from the perspective of other
+ * processors in a multiprocessor system, but 1) Intel reserves the right to
+ * change that, and 2) the compiler's optimizer could re-order instructions if
+ * there weren't some form of barrier.  Therefore, even if running on an
+ * architecture that does not need memory barriers (everything through at least
+ * i686), an "optimizer barrier" is necessary.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+#  if 0
+    /* This is a true memory barrier. */
+    asm volatile ("pusha;"
+        "xor  %%eax,%%eax;"
+        "cpuid;"
+        "popa;"
+        : /* Outputs. */
+        : /* Inputs. */
+        : "memory" /* Clobbers. */
+        );
+#else
+    /*
+     * This is hopefully enough to keep the compiler from reordering
+     * instructions around this one.
+     */
+    asm volatile ("nop;"
+        : /* Outputs. */
+        : /* Inputs. */
+        : "memory" /* Clobbers. */
+        );
+#endif
+}
+#elif (defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+    asm volatile ("sfence"
+        : /* Outputs. */
+        : /* Inputs. */
+        : "memory" /* Clobbers. */
+        );
+}
+#elif defined(__powerpc__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+    asm volatile ("eieio"
+        : /* Outputs. */
+        : /* Inputs. */
+        : "memory" /* Clobbers. */
+        );
+}
+#elif defined(__sparc64__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+    asm volatile ("membar #StoreStore"
+        : /* Outputs. */
+        : /* Inputs. */
+        : "memory" /* Clobbers. */
+        );
+}
+#elif defined(__tile__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+    __sync_synchronize();
+}
+#else
+/*
+ * This is much slower than a simple memory barrier, but the semantics of mutex
+ * unlock make this work.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+    malloc_mutex_t mtx;
+
+    malloc_mutex_init(&mtx);
+    malloc_mutex_lock(&mtx);
+    malloc_mutex_unlock(&mtx);
+}
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/mutex.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/mutex.h
@ -0,0 +1,99 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct malloc_mutex_s malloc_mutex_t;
+
+#ifdef _WIN32
+#  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OSSPIN))
+#  define MALLOC_MUTEX_INITIALIZER {0}
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#else
+#  if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) &&				\
+       defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
+#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#  else
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#  endif
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct malloc_mutex_s {
+#ifdef _WIN32
+    CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OSSPIN))
+    OSSpinLock		lock;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+    pthread_mutex_t		lock;
+    malloc_mutex_t		*postponed_next;
+#else
+    pthread_mutex_t		lock;
+#endif
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+#  undef isthreaded /* Undo private_namespace.h definition. */
+#  define isthreaded true
+#endif
+
+bool	malloc_mutex_init(malloc_mutex_t *mutex);
+void	malloc_mutex_prefork(malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
+bool	mutex_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	malloc_mutex_lock(malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock(malloc_mutex_t *mutex)
+{
+
+    if (isthreaded) {
+#ifdef _WIN32
+        EnterCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+        OSSpinLockLock(&mutex->lock);
+#else
+        pthread_mutex_lock(&mutex->lock);
+#endif
+    }
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_unlock(malloc_mutex_t *mutex)
+{
+
+    if (isthreaded) {
+#ifdef _WIN32
+        LeaveCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+        OSSpinLockUnlock(&mutex->lock);
+#else
+        pthread_mutex_unlock(&mutex->lock);
+#endif
+    }
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/private_namespace.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/private_namespace.h
@ -0,0 +1,390 @@
+#define	a0calloc JEMALLOC_N(a0calloc)
+#define	a0free JEMALLOC_N(a0free)
+#define	a0malloc JEMALLOC_N(a0malloc)
+#define	arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small)
+#define	arena_bin_index JEMALLOC_N(arena_bin_index)
+#define	arena_bin_info JEMALLOC_N(arena_bin_info)
+#define	arena_boot JEMALLOC_N(arena_boot)
+#define	arena_dalloc JEMALLOC_N(arena_dalloc)
+#define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define	arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked)
+#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
+#define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define	arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked)
+#define	arena_dalloc_small JEMALLOC_N(arena_dalloc_small)
+#define	arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get)
+#define	arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set)
+#define	arena_malloc JEMALLOC_N(arena_malloc)
+#define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
+#define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define	arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get)
+#define	arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get)
+#define	arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get)
+#define	arena_mapbits_get JEMALLOC_N(arena_mapbits_get)
+#define	arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set)
+#define	arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get)
+#define	arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set)
+#define	arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get)
+#define	arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get)
+#define	arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set)
+#define	arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set)
+#define	arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get)
+#define	arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set)
+#define	arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get)
+#define	arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set)
+#define	arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get)
+#define	arena_mapp_get JEMALLOC_N(arena_mapp_get)
+#define	arena_maxclass JEMALLOC_N(arena_maxclass)
+#define	arena_new JEMALLOC_N(arena_new)
+#define	arena_palloc JEMALLOC_N(arena_palloc)
+#define	arena_postfork_child JEMALLOC_N(arena_postfork_child)
+#define	arena_postfork_parent JEMALLOC_N(arena_postfork_parent)
+#define	arena_prefork JEMALLOC_N(arena_prefork)
+#define	arena_prof_accum JEMALLOC_N(arena_prof_accum)
+#define	arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl)
+#define	arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked)
+#define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
+#define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
+#define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define	arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get)
+#define	arena_purge_all JEMALLOC_N(arena_purge_all)
+#define	arena_ralloc JEMALLOC_N(arena_ralloc)
+#define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
+#define	arena_run_regind JEMALLOC_N(arena_run_regind)
+#define	arena_salloc JEMALLOC_N(arena_salloc)
+#define	arena_stats_merge JEMALLOC_N(arena_stats_merge)
+#define	arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
+#define	arenas JEMALLOC_N(arenas)
+#define	arenas_booted JEMALLOC_N(arenas_booted)
+#define	arenas_cleanup JEMALLOC_N(arenas_cleanup)
+#define	arenas_extend JEMALLOC_N(arenas_extend)
+#define	arenas_initialized JEMALLOC_N(arenas_initialized)
+#define	arenas_lock JEMALLOC_N(arenas_lock)
+#define	arenas_tls JEMALLOC_N(arenas_tls)
+#define	arenas_tsd JEMALLOC_N(arenas_tsd)
+#define	arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot)
+#define	arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper)
+#define	arenas_tsd_get JEMALLOC_N(arenas_tsd_get)
+#define	arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper)
+#define	arenas_tsd_set JEMALLOC_N(arenas_tsd_set)
+#define	atomic_add_u JEMALLOC_N(atomic_add_u)
+#define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
+#define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
+#define	atomic_add_z JEMALLOC_N(atomic_add_z)
+#define	atomic_sub_u JEMALLOC_N(atomic_sub_u)
+#define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
+#define	atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
+#define	atomic_sub_z JEMALLOC_N(atomic_sub_z)
+#define	base_alloc JEMALLOC_N(base_alloc)
+#define	base_boot JEMALLOC_N(base_boot)
+#define	base_calloc JEMALLOC_N(base_calloc)
+#define	base_node_alloc JEMALLOC_N(base_node_alloc)
+#define	base_node_dealloc JEMALLOC_N(base_node_dealloc)
+#define	base_postfork_child JEMALLOC_N(base_postfork_child)
+#define	base_postfork_parent JEMALLOC_N(base_postfork_parent)
+#define	base_prefork JEMALLOC_N(base_prefork)
+#define	bitmap_full JEMALLOC_N(bitmap_full)
+#define	bitmap_get JEMALLOC_N(bitmap_get)
+#define	bitmap_info_init JEMALLOC_N(bitmap_info_init)
+#define	bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups)
+#define	bitmap_init JEMALLOC_N(bitmap_init)
+#define	bitmap_set JEMALLOC_N(bitmap_set)
+#define	bitmap_sfu JEMALLOC_N(bitmap_sfu)
+#define	bitmap_size JEMALLOC_N(bitmap_size)
+#define	bitmap_unset JEMALLOC_N(bitmap_unset)
+#define	bt_init JEMALLOC_N(bt_init)
+#define	buferror JEMALLOC_N(buferror)
+#define	choose_arena JEMALLOC_N(choose_arena)
+#define	choose_arena_hard JEMALLOC_N(choose_arena_hard)
+#define	chunk_alloc JEMALLOC_N(chunk_alloc)
+#define	chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
+#define	chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
+#define	chunk_boot JEMALLOC_N(chunk_boot)
+#define	chunk_dealloc JEMALLOC_N(chunk_dealloc)
+#define	chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
+#define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
+#define	chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child)
+#define	chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent)
+#define	chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get)
+#define	chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set)
+#define	chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork)
+#define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
+#define	chunk_npages JEMALLOC_N(chunk_npages)
+#define	chunk_postfork_child JEMALLOC_N(chunk_postfork_child)
+#define	chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent)
+#define	chunk_prefork JEMALLOC_N(chunk_prefork)
+#define	chunk_unmap JEMALLOC_N(chunk_unmap)
+#define	chunks_mtx JEMALLOC_N(chunks_mtx)
+#define	chunks_rtree JEMALLOC_N(chunks_rtree)
+#define	chunksize JEMALLOC_N(chunksize)
+#define	chunksize_mask JEMALLOC_N(chunksize_mask)
+#define	ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
+#define	ckh_count JEMALLOC_N(ckh_count)
+#define	ckh_delete JEMALLOC_N(ckh_delete)
+#define	ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert)
+#define	ckh_insert JEMALLOC_N(ckh_insert)
+#define	ckh_isearch JEMALLOC_N(ckh_isearch)
+#define	ckh_iter JEMALLOC_N(ckh_iter)
+#define	ckh_new JEMALLOC_N(ckh_new)
+#define	ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash)
+#define	ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp)
+#define	ckh_rebuild JEMALLOC_N(ckh_rebuild)
+#define	ckh_remove JEMALLOC_N(ckh_remove)
+#define	ckh_search JEMALLOC_N(ckh_search)
+#define	ckh_string_hash JEMALLOC_N(ckh_string_hash)
+#define	ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
+#define	ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
+#define	ckh_try_insert JEMALLOC_N(ckh_try_insert)
+#define	ctl_boot JEMALLOC_N(ctl_boot)
+#define	ctl_bymib JEMALLOC_N(ctl_bymib)
+#define	ctl_byname JEMALLOC_N(ctl_byname)
+#define	ctl_nametomib JEMALLOC_N(ctl_nametomib)
+#define	ctl_postfork_child JEMALLOC_N(ctl_postfork_child)
+#define	ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent)
+#define	ctl_prefork JEMALLOC_N(ctl_prefork)
+#define	dss_prec_names JEMALLOC_N(dss_prec_names)
+#define	extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
+#define	extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
+#define	extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
+#define	extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
+#define	extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
+#define	extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
+#define	extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
+#define	extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
+#define	extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
+#define	extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
+#define	extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
+#define	extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
+#define	extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
+#define	extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
+#define	extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
+#define	extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
+#define	extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first)
+#define	extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert)
+#define	extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter)
+#define	extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse)
+#define	extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start)
+#define	extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last)
+#define	extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new)
+#define	extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next)
+#define	extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch)
+#define	extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev)
+#define	extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch)
+#define	extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove)
+#define	extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter)
+#define	extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse)
+#define	extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
+#define	extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
+#define	get_errno JEMALLOC_N(get_errno)
+#define	hash JEMALLOC_N(hash)
+#define	hash_fmix_32 JEMALLOC_N(hash_fmix_32)
+#define	hash_fmix_64 JEMALLOC_N(hash_fmix_64)
+#define	hash_get_block_32 JEMALLOC_N(hash_get_block_32)
+#define	hash_get_block_64 JEMALLOC_N(hash_get_block_64)
+#define	hash_rotl_32 JEMALLOC_N(hash_rotl_32)
+#define	hash_rotl_64 JEMALLOC_N(hash_rotl_64)
+#define	hash_x64_128 JEMALLOC_N(hash_x64_128)
+#define	hash_x86_128 JEMALLOC_N(hash_x86_128)
+#define	hash_x86_32 JEMALLOC_N(hash_x86_32)
+#define	huge_allocated JEMALLOC_N(huge_allocated)
+#define	huge_boot JEMALLOC_N(huge_boot)
+#define	huge_dalloc JEMALLOC_N(huge_dalloc)
+#define	huge_malloc JEMALLOC_N(huge_malloc)
+#define	huge_mtx JEMALLOC_N(huge_mtx)
+#define	huge_ndalloc JEMALLOC_N(huge_ndalloc)
+#define	huge_nmalloc JEMALLOC_N(huge_nmalloc)
+#define	huge_palloc JEMALLOC_N(huge_palloc)
+#define	huge_postfork_child JEMALLOC_N(huge_postfork_child)
+#define	huge_postfork_parent JEMALLOC_N(huge_postfork_parent)
+#define	huge_prefork JEMALLOC_N(huge_prefork)
+#define	huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
+#define	huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
+#define	huge_ralloc JEMALLOC_N(huge_ralloc)
+#define	huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
+#define	huge_salloc JEMALLOC_N(huge_salloc)
+#define	iallocm JEMALLOC_N(iallocm)
+#define	icalloc JEMALLOC_N(icalloc)
+#define	icallocx JEMALLOC_N(icallocx)
+#define	idalloc JEMALLOC_N(idalloc)
+#define	idallocx JEMALLOC_N(idallocx)
+#define	imalloc JEMALLOC_N(imalloc)
+#define	imallocx JEMALLOC_N(imallocx)
+#define	ipalloc JEMALLOC_N(ipalloc)
+#define	ipallocx JEMALLOC_N(ipallocx)
+#define	iqalloc JEMALLOC_N(iqalloc)
+#define	iqallocx JEMALLOC_N(iqallocx)
+#define	iralloc JEMALLOC_N(iralloc)
+#define	irallocx JEMALLOC_N(irallocx)
+#define	isalloc JEMALLOC_N(isalloc)
+#define	isthreaded JEMALLOC_N(isthreaded)
+#define	ivsalloc JEMALLOC_N(ivsalloc)
+#define	jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child)
+#define	jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent)
+#define	jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
+#define	malloc_cprintf JEMALLOC_N(malloc_cprintf)
+#define	malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
+#define	malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
+#define	malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child)
+#define	malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent)
+#define	malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork)
+#define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
+#define	malloc_printf JEMALLOC_N(malloc_printf)
+#define	malloc_snprintf JEMALLOC_N(malloc_snprintf)
+#define	malloc_strtoumax JEMALLOC_N(malloc_strtoumax)
+#define	malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot)
+#define	malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register)
+#define	malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc)
+#define	malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc)
+#define	malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup)
+#define	malloc_vcprintf JEMALLOC_N(malloc_vcprintf)
+#define	malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf)
+#define	malloc_write JEMALLOC_N(malloc_write)
+#define	map_bias JEMALLOC_N(map_bias)
+#define	mb_write JEMALLOC_N(mb_write)
+#define	mutex_boot JEMALLOC_N(mutex_boot)
+#define	narenas_auto JEMALLOC_N(narenas_auto)
+#define	narenas_total JEMALLOC_N(narenas_total)
+#define	narenas_total_get JEMALLOC_N(narenas_total_get)
+#define	ncpus JEMALLOC_N(ncpus)
+#define	nhbins JEMALLOC_N(nhbins)
+#define	opt_abort JEMALLOC_N(opt_abort)
+#define	opt_junk JEMALLOC_N(opt_junk)
+#define	opt_lg_chunk JEMALLOC_N(opt_lg_chunk)
+#define	opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult)
+#define	opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval)
+#define	opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample)
+#define	opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max)
+#define	opt_narenas JEMALLOC_N(opt_narenas)
+#define	opt_prof JEMALLOC_N(opt_prof)
+#define	opt_prof_accum JEMALLOC_N(opt_prof_accum)
+#define	opt_prof_active JEMALLOC_N(opt_prof_active)
+#define	opt_prof_final JEMALLOC_N(opt_prof_final)
+#define	opt_prof_gdump JEMALLOC_N(opt_prof_gdump)
+#define	opt_prof_leak JEMALLOC_N(opt_prof_leak)
+#define	opt_prof_prefix JEMALLOC_N(opt_prof_prefix)
+#define	opt_quarantine JEMALLOC_N(opt_quarantine)
+#define	opt_redzone JEMALLOC_N(opt_redzone)
+#define	opt_stats_print JEMALLOC_N(opt_stats_print)
+#define	opt_tcache JEMALLOC_N(opt_tcache)
+#define	opt_utrace JEMALLOC_N(opt_utrace)
+#define	opt_valgrind JEMALLOC_N(opt_valgrind)
+#define	opt_xmalloc JEMALLOC_N(opt_xmalloc)
+#define	opt_zero JEMALLOC_N(opt_zero)
+#define	p2rz JEMALLOC_N(p2rz)
+#define	pages_purge JEMALLOC_N(pages_purge)
+#define	pow2_ceil JEMALLOC_N(pow2_ceil)
+#define	prof_backtrace JEMALLOC_N(prof_backtrace)
+#define	prof_boot0 JEMALLOC_N(prof_boot0)
+#define	prof_boot1 JEMALLOC_N(prof_boot1)
+#define	prof_boot2 JEMALLOC_N(prof_boot2)
+#define	prof_ctx_get JEMALLOC_N(prof_ctx_get)
+#define	prof_ctx_set JEMALLOC_N(prof_ctx_set)
+#define	prof_free JEMALLOC_N(prof_free)
+#define	prof_gdump JEMALLOC_N(prof_gdump)
+#define	prof_idump JEMALLOC_N(prof_idump)
+#define	prof_interval JEMALLOC_N(prof_interval)
+#define	prof_lookup JEMALLOC_N(prof_lookup)
+#define	prof_malloc JEMALLOC_N(prof_malloc)
+#define	prof_mdump JEMALLOC_N(prof_mdump)
+#define	prof_postfork_child JEMALLOC_N(prof_postfork_child)
+#define	prof_postfork_parent JEMALLOC_N(prof_postfork_parent)
+#define	prof_prefork JEMALLOC_N(prof_prefork)
+#define	prof_promote JEMALLOC_N(prof_promote)
+#define	prof_realloc JEMALLOC_N(prof_realloc)
+#define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
+#define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
+#define	prof_tdata_booted JEMALLOC_N(prof_tdata_booted)
+#define	prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup)
+#define	prof_tdata_get JEMALLOC_N(prof_tdata_get)
+#define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
+#define	prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized)
+#define	prof_tdata_tls JEMALLOC_N(prof_tdata_tls)
+#define	prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd)
+#define	prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot)
+#define	prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper)
+#define	prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get)
+#define	prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper)
+#define	prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set)
+#define	quarantine JEMALLOC_N(quarantine)
+#define	quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook)
+#define	quarantine_boot JEMALLOC_N(quarantine_boot)
+#define	quarantine_booted JEMALLOC_N(quarantine_booted)
+#define	quarantine_cleanup JEMALLOC_N(quarantine_cleanup)
+#define	quarantine_init JEMALLOC_N(quarantine_init)
+#define	quarantine_tls JEMALLOC_N(quarantine_tls)
+#define	quarantine_tsd JEMALLOC_N(quarantine_tsd)
+#define	quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot)
+#define	quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper)
+#define	quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get)
+#define	quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper)
+#define	quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set)
+#define	register_zone JEMALLOC_N(register_zone)
+#define	rtree_get JEMALLOC_N(rtree_get)
+#define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
+#define	rtree_new JEMALLOC_N(rtree_new)
+#define	rtree_postfork_child JEMALLOC_N(rtree_postfork_child)
+#define	rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent)
+#define	rtree_prefork JEMALLOC_N(rtree_prefork)
+#define	rtree_set JEMALLOC_N(rtree_set)
+#define	s2u JEMALLOC_N(s2u)
+#define	sa2u JEMALLOC_N(sa2u)
+#define	set_errno JEMALLOC_N(set_errno)
+#define	stats_cactive JEMALLOC_N(stats_cactive)
+#define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
+#define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
+#define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
+#define	stats_chunks JEMALLOC_N(stats_chunks)
+#define	stats_print JEMALLOC_N(stats_print)
+#define	tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
+#define	tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
+#define	tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
+#define	tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
+#define	tcache_arena_associate JEMALLOC_N(tcache_arena_associate)
+#define	tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate)
+#define	tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
+#define	tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
+#define	tcache_bin_info JEMALLOC_N(tcache_bin_info)
+#define	tcache_boot0 JEMALLOC_N(tcache_boot0)
+#define	tcache_boot1 JEMALLOC_N(tcache_boot1)
+#define	tcache_booted JEMALLOC_N(tcache_booted)
+#define	tcache_create JEMALLOC_N(tcache_create)
+#define	tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
+#define	tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
+#define	tcache_destroy JEMALLOC_N(tcache_destroy)
+#define	tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted)
+#define	tcache_enabled_get JEMALLOC_N(tcache_enabled_get)
+#define	tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized)
+#define	tcache_enabled_set JEMALLOC_N(tcache_enabled_set)
+#define	tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls)
+#define	tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd)
+#define	tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot)
+#define	tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper)
+#define	tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get)
+#define	tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper)
+#define	tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set)
+#define	tcache_event JEMALLOC_N(tcache_event)
+#define	tcache_event_hard JEMALLOC_N(tcache_event_hard)
+#define	tcache_flush JEMALLOC_N(tcache_flush)
+#define	tcache_get JEMALLOC_N(tcache_get)
+#define	tcache_initialized JEMALLOC_N(tcache_initialized)
+#define	tcache_maxclass JEMALLOC_N(tcache_maxclass)
+#define	tcache_salloc JEMALLOC_N(tcache_salloc)
+#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
+#define	tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup)
+#define	tcache_tls JEMALLOC_N(tcache_tls)
+#define	tcache_tsd JEMALLOC_N(tcache_tsd)
+#define	tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot)
+#define	tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper)
+#define	tcache_tsd_get JEMALLOC_N(tcache_tsd_get)
+#define	tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper)
+#define	tcache_tsd_set JEMALLOC_N(tcache_tsd_set)
+#define	thread_allocated_booted JEMALLOC_N(thread_allocated_booted)
+#define	thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized)
+#define	thread_allocated_tls JEMALLOC_N(thread_allocated_tls)
+#define	thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd)
+#define	thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot)
+#define	thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper)
+#define	thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get)
+#define	thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper)
+#define	thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set)
+#define	u2rz JEMALLOC_N(u2rz)
--- a/src/rt/jemalloc/include/jemalloc/internal/prng.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/prng.h
@ -0,0 +1,60 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ *   prng(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ *   c == Odd number (relatively prime to 2^n).
+ *   m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position.  For example. the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
+ * bits.
+ *
+ * Macro parameters:
+ *   uint32_t r          : Result.
+ *   unsigned lg_range   : (0..32], number of least significant bits to return.
+ *   uint32_t state      : Seed value.
+ *   const uint32_t a, c : See above discussion.
+ */
+#define prng32(r, lg_range, state, a, c) do {				\
+    assert(lg_range > 0);						\
+    assert(lg_range <= 32);						\
+                                    \
+    r = (state * (a)) + (c);					\
+    state = r;							\
+    r >>= (32 - lg_range);						\
+} while (false)
+
+/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
+#define prng64(r, lg_range, state, a, c) do {				\
+    assert(lg_range > 0);						\
+    assert(lg_range <= 64);						\
+                                    \
+    r = (state * (a)) + (c);					\
+    state = r;							\
+    r >>= (64 - lg_range);						\
+} while (false)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/prof.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/prof.h
@ -0,0 +1,579 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_thr_cnt_s prof_thr_cnt_t;
+typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#define	PROF_PREFIX_DEFAULT		"jeprof"
+#define	LG_PROF_SAMPLE_DEFAULT		19
+#define	LG_PROF_INTERVAL_DEFAULT	-1
+
+/*
+ * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
+ */
+#define	PROF_BT_MAX			128
+
+/* Maximum number of backtraces to store in each per thread LRU cache. */
+#define	PROF_TCMAX			1024
+
+/* Initial hash table size. */
+#define	PROF_CKH_MINITEMS		64
+
+/* Size of memory buffer to use when writing dump files. */
+#define	PROF_DUMP_BUFSIZE		65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define	PROF_PRINTF_BUFSIZE		128
+
+/*
+ * Number of mutexes shared among all ctx's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NCTX_LOCKS			1024
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct prof_bt_s {
+    /* Backtrace, stored as len program counters. */
+    void		**vec;
+    unsigned	len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+    prof_bt_t	*bt;
+    unsigned	nignore;
+    unsigned	max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_cnt_s {
+    /*
+     * Profiling counters.  An allocation/deallocation pair can operate on
+     * different prof_thr_cnt_t objects that are linked into the same
+     * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
+     * negative.  In principle it is possible for the *bytes counters to
+     * overflow/underflow, but a general solution would require something
+     * like 128-bit counters; this implementation doesn't bother to solve
+     * that problem.
+     */
+    int64_t		curobjs;
+    int64_t		curbytes;
+    uint64_t	accumobjs;
+    uint64_t	accumbytes;
+};
+
+struct prof_thr_cnt_s {
+    /* Linkage into prof_ctx_t's cnts_ql. */
+    ql_elm(prof_thr_cnt_t)	cnts_link;
+
+    /* Linkage into thread's LRU. */
+    ql_elm(prof_thr_cnt_t)	lru_link;
+
+    /*
+     * Associated context.  If a thread frees an object that it did not
+     * allocate, it is possible that the context is not cached in the
+     * thread's hash table, in which case it must be able to look up the
+     * context, insert a new prof_thr_cnt_t into the thread's hash table,
+     * and link it into the prof_ctx_t's cnts_ql.
+     */
+    prof_ctx_t		*ctx;
+
+    /*
+     * Threads use memory barriers to update the counters.  Since there is
+     * only ever one writer, the only challenge is for the reader to get a
+     * consistent read of the counters.
+     *
+     * The writer uses this series of operations:
+     *
+     * 1) Increment epoch to an odd number.
+     * 2) Update counters.
+     * 3) Increment epoch to an even number.
+     *
+     * The reader must assure 1) that the epoch is even while it reads the
+     * counters, and 2) that the epoch doesn't change between the time it
+     * starts and finishes reading the counters.
+     */
+    unsigned		epoch;
+
+    /* Profiling counters. */
+    prof_cnt_t		cnts;
+};
+
+struct prof_ctx_s {
+    /* Associated backtrace. */
+    prof_bt_t		*bt;
+
+    /* Protects nlimbo, cnt_merged, and cnts_ql. */
+    malloc_mutex_t		*lock;
+
+    /*
+     * Number of threads that currently cause this ctx to be in a state of
+     * limbo due to one of:
+     *   - Initializing per thread counters associated with this ctx.
+     *   - Preparing to destroy this ctx.
+     * nlimbo must be 1 (single destroyer) in order to safely destroy the
+     * ctx.
+     */
+    unsigned		nlimbo;
+
+    /* Temporary storage for summation during dump. */
+    prof_cnt_t		cnt_summed;
+
+    /* When threads exit, they merge their stats into cnt_merged. */
+    prof_cnt_t		cnt_merged;
+
+    /*
+     * List of profile counters, one for each thread that has allocated in
+     * this context.
+     */
+    ql_head(prof_thr_cnt_t)	cnts_ql;
+};
+
+struct prof_tdata_s {
+    /*
+     * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
+     * cache of backtraces, with associated thread-specific prof_thr_cnt_t
+     * objects.  Other threads may read the prof_thr_cnt_t contents, but no
+     * others will ever write them.
+     *
+     * Upon thread exit, the thread must merge all the prof_thr_cnt_t
+     * counter data into the associated prof_ctx_t objects, and unlink/free
+     * the prof_thr_cnt_t objects.
+     */
+    ckh_t			bt2cnt;
+
+    /* LRU for contents of bt2cnt. */
+    ql_head(prof_thr_cnt_t)	lru_ql;
+
+    /* Backtrace vector, used for calls to prof_backtrace(). */
+    void			**vec;
+
+    /* Sampling state. */
+    uint64_t		prng_state;
+    uint64_t		threshold;
+    uint64_t		accum;
+
+    /* State used to avoid dumping while operating on prof internals. */
+    bool			enq;
+    bool			enq_idump;
+    bool			enq_gdump;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_prof;
+/*
+ * Even if opt_prof is true, sampling can be temporarily disabled by setting
+ * opt_prof_active to false.  No locking is used when updating opt_prof_active,
+ * so there are no guarantees regarding how long it will take for all threads
+ * to notice state changes.
+ */
+extern bool	opt_prof_active;
+extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
+extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_final;       /* Final profile dumping. */
+extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
+extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern char	opt_prof_prefix[PATH_MAX + 1];
+
+/*
+ * Profile dump interval, measured in bytes allocated.  Each arena triggers a
+ * profile dump when it reaches this threshold.  The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t	prof_interval;
+
+/*
+ * If true, promote small sampled objects to large objects, since small run
+ * headers do not have embedded profile context pointers.
+ */
+extern bool	prof_promote;
+
+void	bt_init(prof_bt_t *bt, void **vec);
+void	prof_backtrace(prof_bt_t *bt, unsigned nignore);
+prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
+void	prof_idump(void);
+bool	prof_mdump(const char *filename);
+void	prof_gdump(void);
+prof_tdata_t	*prof_tdata_init(void);
+void	prof_tdata_cleanup(void *arg);
+void	prof_boot0(void);
+void	prof_boot1(void);
+bool	prof_boot2(void);
+void	prof_prefork(void);
+void	prof_postfork_parent(void);
+void	prof_postfork_child(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
+    prof_tdata_t *prof_tdata;					\
+    prof_bt_t bt;							\
+                                    \
+    assert(size == s2u(size));					\
+                                    \
+    prof_tdata = prof_tdata_get(true);				\
+    if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
+        if (prof_tdata != NULL)					\
+            ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+        else							\
+            ret = NULL;					\
+        break;							\
+    }								\
+                                    \
+    if (opt_prof_active == false) {					\
+        /* Sampling is currently inactive, so avoid sampling. */\
+        ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
+    } else if (opt_lg_prof_sample == 0) {				\
+        /* Don't bother with sampling logic, since sampling   */\
+        /* interval is 1.                                     */\
+        bt_init(&bt, prof_tdata->vec);				\
+        prof_backtrace(&bt, nignore);				\
+        ret = prof_lookup(&bt);					\
+    } else {							\
+        if (prof_tdata->threshold == 0) {			\
+            /* Initialize.  Seed the prng differently for */\
+            /* each thread.                               */\
+            prof_tdata->prng_state =			\
+                (uint64_t)(uintptr_t)&size;			\
+            prof_sample_threshold_update(prof_tdata);	\
+        }							\
+                                    \
+        /* Determine whether to capture a backtrace based on  */\
+        /* whether size is enough for prof_accum to reach     */\
+        /* prof_tdata->threshold.  However, delay updating    */\
+        /* these variables until prof_{m,re}alloc(), because  */\
+        /* we don't know for sure that the allocation will    */\
+        /* succeed.                                           */\
+        /*                                                    */\
+        /* Use subtraction rather than addition to avoid      */\
+        /* potential integer overflow.                        */\
+        if (size >= prof_tdata->threshold -			\
+            prof_tdata->accum) {				\
+            bt_init(&bt, prof_tdata->vec);			\
+            prof_backtrace(&bt, nignore);			\
+            ret = prof_lookup(&bt);				\
+        } else							\
+            ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+    }								\
+} while (0)
+
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
+
+prof_tdata_t	*prof_tdata_get(bool create);
+void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+prof_ctx_t	*prof_ctx_get(const void *ptr);
+void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+bool	prof_sample_accum_update(size_t size);
+void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
+void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx);
+void	prof_free(const void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+malloc_tsd_externs(prof_tdata, prof_tdata_t *)
+malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
+    prof_tdata_cleanup)
+
+JEMALLOC_INLINE prof_tdata_t *
+prof_tdata_get(bool create)
+{
+    prof_tdata_t *prof_tdata;
+
+    cassert(config_prof);
+
+    prof_tdata = *prof_tdata_tsd_get();
+    if (create && prof_tdata == NULL)
+        prof_tdata = prof_tdata_init();
+
+    return (prof_tdata);
+}
+
+JEMALLOC_INLINE void
+prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+{
+    uint64_t r;
+    double u;
+
+    cassert(config_prof);
+
+    /*
+     * Compute sample threshold as a geometrically distributed random
+     * variable with mean (2^opt_lg_prof_sample).
+     *
+     *                         __        __
+     *                         |  log(u)  |                     1
+     * prof_tdata->threshold = | -------- |, where p = -------------------
+     *                         | log(1-p) |             opt_lg_prof_sample
+     *                                                 2
+     *
+     * For more information on the math, see:
+     *
+     *   Non-Uniform Random Variate Generation
+     *   Luc Devroye
+     *   Springer-Verlag, New York, 1986
+     *   pp 500
+     *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
+     */
+    prng64(r, 53, prof_tdata->prng_state,
+        UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
+    u = (double)r * (1.0/9007199254740992.0L);
+    prof_tdata->threshold = (uint64_t)(log(u) /
+        log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+        + (uint64_t)1U;
+}
+
+JEMALLOC_INLINE prof_ctx_t *
+prof_ctx_get(const void *ptr)
+{
+    prof_ctx_t *ret;
+    arena_chunk_t *chunk;
+
+    cassert(config_prof);
+    assert(ptr != NULL);
+
+    chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+    if (chunk != ptr) {
+        /* Region. */
+        ret = arena_prof_ctx_get(ptr);
+    } else
+        ret = huge_prof_ctx_get(ptr);
+
+    return (ret);
+}
+
+JEMALLOC_INLINE void
+prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+    arena_chunk_t *chunk;
+
+    cassert(config_prof);
+    assert(ptr != NULL);
+
+    chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+    if (chunk != ptr) {
+        /* Region. */
+        arena_prof_ctx_set(ptr, ctx);
+    } else
+        huge_prof_ctx_set(ptr, ctx);
+}
+
+JEMALLOC_INLINE bool
+prof_sample_accum_update(size_t size)
+{
+    prof_tdata_t *prof_tdata;
+
+    cassert(config_prof);
+    /* Sampling logic is unnecessary if the interval is 1. */
+    assert(opt_lg_prof_sample != 0);
+
+    prof_tdata = prof_tdata_get(false);
+    if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+        return (true);
+
+    /* Take care to avoid integer overflow. */
+    if (size >= prof_tdata->threshold - prof_tdata->accum) {
+        prof_tdata->accum -= (prof_tdata->threshold - size);
+        /* Compute new sample threshold. */
+        prof_sample_threshold_update(prof_tdata);
+        while (prof_tdata->accum >= prof_tdata->threshold) {
+            prof_tdata->accum -= prof_tdata->threshold;
+            prof_sample_threshold_update(prof_tdata);
+        }
+        return (false);
+    } else {
+        prof_tdata->accum += size;
+        return (true);
+    }
+}
+
+JEMALLOC_INLINE void
+prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
+{
+
+    cassert(config_prof);
+    assert(ptr != NULL);
+    assert(size == isalloc(ptr, true));
+
+    if (opt_lg_prof_sample != 0) {
+        if (prof_sample_accum_update(size)) {
+            /*
+             * Don't sample.  For malloc()-like allocation, it is
+             * always possible to tell in advance how large an
+             * object's usable size will be, so there should never
+             * be a difference between the size passed to
+             * PROF_ALLOC_PREP() and prof_malloc().
+             */
+            assert((uintptr_t)cnt == (uintptr_t)1U);
+        }
+    }
+
+    if ((uintptr_t)cnt > (uintptr_t)1U) {
+        prof_ctx_set(ptr, cnt->ctx);
+
+        cnt->epoch++;
+        /*********/
+        mb_write();
+        /*********/
+        cnt->cnts.curobjs++;
+        cnt->cnts.curbytes += size;
+        if (opt_prof_accum) {
+            cnt->cnts.accumobjs++;
+            cnt->cnts.accumbytes += size;
+        }
+        /*********/
+        mb_write();
+        /*********/
+        cnt->epoch++;
+        /*********/
+        mb_write();
+        /*********/
+    } else
+        prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+}
+
+JEMALLOC_INLINE void
+prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx)
+{
+    prof_thr_cnt_t *told_cnt;
+
+    cassert(config_prof);
+    assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+
+    if (ptr != NULL) {
+        assert(size == isalloc(ptr, true));
+        if (opt_lg_prof_sample != 0) {
+            if (prof_sample_accum_update(size)) {
+                /*
+                 * Don't sample.  The size passed to
+                 * PROF_ALLOC_PREP() was larger than what
+                 * actually got allocated, so a backtrace was
+                 * captured for this allocation, even though
+                 * its actual size was insufficient to cross
+                 * the sample threshold.
+                 */
+                cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+            }
+        }
+    }
+
+    if ((uintptr_t)old_ctx > (uintptr_t)1U) {
+        told_cnt = prof_lookup(old_ctx->bt);
+        if (told_cnt == NULL) {
+            /*
+             * It's too late to propagate OOM for this realloc(),
+             * so operate directly on old_cnt->ctx->cnt_merged.
+             */
+            malloc_mutex_lock(old_ctx->lock);
+            old_ctx->cnt_merged.curobjs--;
+            old_ctx->cnt_merged.curbytes -= old_size;
+            malloc_mutex_unlock(old_ctx->lock);
+            told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+        }
+    } else
+        told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+
+    if ((uintptr_t)told_cnt > (uintptr_t)1U)
+        told_cnt->epoch++;
+    if ((uintptr_t)cnt > (uintptr_t)1U) {
+        prof_ctx_set(ptr, cnt->ctx);
+        cnt->epoch++;
+    } else if (ptr != NULL)
+        prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+    /*********/
+    mb_write();
+    /*********/
+    if ((uintptr_t)told_cnt > (uintptr_t)1U) {
+        told_cnt->cnts.curobjs--;
+        told_cnt->cnts.curbytes -= old_size;
+    }
+    if ((uintptr_t)cnt > (uintptr_t)1U) {
+        cnt->cnts.curobjs++;
+        cnt->cnts.curbytes += size;
+        if (opt_prof_accum) {
+            cnt->cnts.accumobjs++;
+            cnt->cnts.accumbytes += size;
+        }
+    }
+    /*********/
+    mb_write();
+    /*********/
+    if ((uintptr_t)told_cnt > (uintptr_t)1U)
+        told_cnt->epoch++;
+    if ((uintptr_t)cnt > (uintptr_t)1U)
+        cnt->epoch++;
+    /*********/
+    mb_write(); /* Not strictly necessary. */
+}
+
+JEMALLOC_INLINE void
+prof_free(const void *ptr, size_t size)
+{
+    prof_ctx_t *ctx = prof_ctx_get(ptr);
+
+    cassert(config_prof);
+
+    if ((uintptr_t)ctx > (uintptr_t)1) {
+        prof_thr_cnt_t *tcnt;
+        assert(size == isalloc(ptr, true));
+        tcnt = prof_lookup(ctx->bt);
+
+        if (tcnt != NULL) {
+            tcnt->epoch++;
+            /*********/
+            mb_write();
+            /*********/
+            tcnt->cnts.curobjs--;
+            tcnt->cnts.curbytes -= size;
+            /*********/
+            mb_write();
+            /*********/
+            tcnt->epoch++;
+            /*********/
+            mb_write();
+            /*********/
+        } else {
+            /*
+             * OOM during free() cannot be propagated, so operate
+             * directly on cnt->ctx->cnt_merged.
+             */
+            malloc_mutex_lock(ctx->lock);
+            ctx->cnt_merged.curobjs--;
+            ctx->cnt_merged.curbytes -= size;
+            malloc_mutex_unlock(ctx->lock);
+        }
+    }
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/ql.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/ql.h
@ -0,0 +1,83 @@
+/*
+ * List definitions.
+ */
+#define ql_head(a_type)							\
+struct {								\
+    a_type *qlh_first;						\
+}
+
+#define ql_head_initializer(a_head) {NULL}
+
+#define ql_elm(a_type)	qr(a_type)
+
+/* List functions. */
+#define ql_new(a_head) do {						\
+    (a_head)->qlh_first = NULL;					\
+} while (0)
+
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+
+#define ql_first(a_head) ((a_head)->qlh_first)
+
+#define ql_last(a_head, a_field)					\
+    ((ql_first(a_head) != NULL)					\
+        ? qr_prev(ql_first(a_head), a_field) : NULL)
+
+#define ql_next(a_head, a_elm, a_field)					\
+    ((ql_last(a_head, a_field) != (a_elm))				\
+        ? qr_next((a_elm), a_field)	: NULL)
+
+#define ql_prev(a_head, a_elm, a_field)					\
+    ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
+                       : NULL)
+
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
+    qr_before_insert((a_qlelm), (a_elm), a_field);			\
+    if (ql_first(a_head) == (a_qlelm)) {				\
+        ql_first(a_head) = (a_elm);				\
+    }								\
+} while (0)
+
+#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+    qr_after_insert((a_qlelm), (a_elm), a_field)
+
+#define ql_head_insert(a_head, a_elm, a_field) do {			\
+    if (ql_first(a_head) != NULL) {					\
+        qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+    }								\
+    ql_first(a_head) = (a_elm);					\
+} while (0)
+
+#define ql_tail_insert(a_head, a_elm, a_field) do {			\
+    if (ql_first(a_head) != NULL) {					\
+        qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+    }								\
+    ql_first(a_head) = qr_next((a_elm), a_field);			\
+} while (0)
+
+#define ql_remove(a_head, a_elm, a_field) do {				\
+    if (ql_first(a_head) == (a_elm)) {				\
+        ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
+    }								\
+    if (ql_first(a_head) != (a_elm)) {				\
+        qr_remove((a_elm), a_field);				\
+    } else {							\
+        ql_first(a_head) = NULL;				\
+    }								\
+} while (0)
+
+#define ql_head_remove(a_head, a_type, a_field) do {			\
+    a_type *t = ql_first(a_head);					\
+    ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_tail_remove(a_head, a_type, a_field) do {			\
+    a_type *t = ql_last(a_head, a_field);				\
+    ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_foreach(a_var, a_head, a_field)				\
+    qr_foreach((a_var), ql_first(a_head), a_field)
+
+#define ql_reverse_foreach(a_var, a_head, a_field)			\
+    qr_reverse_foreach((a_var), ql_first(a_head), a_field)
--- a/src/rt/jemalloc/include/jemalloc/internal/qr.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/qr.h
@ -0,0 +1,67 @@
+/* Ring definitions. */
+#define qr(a_type)							\
+struct {								\
+    a_type	*qre_next;						\
+    a_type	*qre_prev;						\
+}
+
+/* Ring functions. */
+#define qr_new(a_qr, a_field) do {					\
+    (a_qr)->a_field.qre_next = (a_qr);				\
+    (a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+
+#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
+    (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
+    (a_qr)->a_field.qre_next = (a_qrelm);				\
+    (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
+    (a_qrelm)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+    do									\
+    {									\
+    (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
+    (a_qr)->a_field.qre_prev = (a_qrelm);				\
+    (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
+    (a_qrelm)->a_field.qre_next = (a_qr);				\
+    } while (0)
+
+#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
+    void *t;							\
+    (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
+    (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
+    t = (a_qr_a)->a_field.qre_prev;					\
+    (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
+    (a_qr_b)->a_field.qre_prev = t;					\
+} while (0)
+
+/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code. */
+#define qr_split(a_qr_a, a_qr_b, a_field)				\
+    qr_meld((a_qr_a), (a_qr_b), a_field)
+
+#define qr_remove(a_qr, a_field) do {					\
+    (a_qr)->a_field.qre_prev->a_field.qre_next			\
+        = (a_qr)->a_field.qre_next;					\
+    (a_qr)->a_field.qre_next->a_field.qre_prev			\
+        = (a_qr)->a_field.qre_prev;					\
+    (a_qr)->a_field.qre_next = (a_qr);				\
+    (a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_foreach(var, a_qr, a_field)					\
+    for ((var) = (a_qr);						\
+        (var) != NULL;						\
+        (var) = (((var)->a_field.qre_next != (a_qr))		\
+        ? (var)->a_field.qre_next : NULL))
+
+#define qr_reverse_foreach(var, a_qr, a_field)				\
+    for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
+        (var) != NULL;						\
+        (var) = (((var) != (a_qr))					\
+        ? (var)->a_field.qre_prev : NULL))
--- a/src/rt/jemalloc/include/jemalloc/internal/quarantine.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/quarantine.h
@ -0,0 +1,66 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct quarantine_obj_s quarantine_obj_t;
+typedef struct quarantine_s quarantine_t;
+
+/* Default per thread quarantine size if valgrind is enabled. */
+#define	JEMALLOC_VALGRIND_QUARANTINE_DEFAULT	(ZU(1) << 24)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct quarantine_obj_s {
+    void	*ptr;
+    size_t	usize;
+};
+
+struct quarantine_s {
+    size_t			curbytes;
+    size_t			curobjs;
+    size_t			first;
+#define	LG_MAXOBJS_INIT 10
+    size_t			lg_maxobjs;
+    quarantine_obj_t	objs[1]; /* Dynamically sized ring buffer. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+quarantine_t	*quarantine_init(size_t lg_maxobjs);
+void	quarantine(void *ptr);
+void	quarantine_cleanup(void *arg);
+bool	quarantine_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *)
+
+void	quarantine_alloc_hook(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_))
+malloc_tsd_externs(quarantine, quarantine_t *)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL,
+    quarantine_cleanup)
+
+JEMALLOC_ALWAYS_INLINE void
+quarantine_alloc_hook(void)
+{
+    quarantine_t *quarantine;
+
+    assert(config_fill && opt_quarantine);
+
+    quarantine = *quarantine_tsd_get();
+    if (quarantine == NULL)
+        quarantine_init(LG_MAXOBJS_INIT);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/rb.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/rb.h
@ -0,0 +1,973 @@
+/*-
+ *******************************************************************************
+ *
+ * cpp macro implementation of left-leaning 2-3 red-black trees.  Parent
+ * pointers are not used, and color bits are stored in the least significant
+ * bit of right-child pointers (if RB_COMPACT is defined), thus making node
+ * linkage as compact as is possible for red-black trees.
+ *
+ * Usage:
+ *
+ *   #include <stdint.h>
+ *   #include <stdbool.h>
+ *   #define NDEBUG // (Optional, see assert(3).)
+ *   #include <assert.h>
+ *   #define RB_COMPACT // (Optional, embed color bits in right-child pointers.)
+ *   #include <rb.h>
+ *   ...
+ *
+ *******************************************************************************
+ */
+
+#ifndef RB_H_
+#define	RB_H_
+
+#if 0
+__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $");
+#endif
+
+#ifdef RB_COMPACT
+/* Node structure. */
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right_red;						\
+}
+#else
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right;							\
+    bool rbn_red;							\
+}
+#endif
+
+/* Root structure. */
+#define	rb_tree(a_type)							\
+struct {								\
+    a_type *rbt_root;							\
+    a_type rbt_nil;							\
+}
+
+/* Left accessors. */
+#define	rbtn_left_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_left)
+#define	rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
+    (a_node)->a_field.rbn_left = a_left;				\
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((ssize_t)-2)))
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right)	\
+      | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1)));	\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((size_t)1)))
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2))			\
+      | ((ssize_t)a_red));						\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t)		\
+      (a_node)->a_field.rbn_right_red) | ((size_t)1));			\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
+} while (0)
+#else
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_right)
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right = a_right;				\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_red)
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_red = (a_red);				\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = true;					\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = false;					\
+} while (0)
+#endif
+
+/* Node initializer. */
+#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+/* Tree initializer. */
+#define	rb_new(a_type, a_field, a_rbt) do {				\
+    (a_rbt)->rbt_root = &(a_rbt)->rbt_nil;				\
+    rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil);		\
+    rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil);			\
+} while (0)
+
+/* Internal utility macros. */
+#define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+    for (;								\
+      rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+      (r_node) = rbtn_left_get(a_type, a_field, (r_node))) {	\
+    }								\
+    }									\
+} while (0)
+
+#define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+    for (; rbtn_right_get(a_type, a_field, (r_node)) !=		\
+      &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field,	\
+      (r_node))) {							\
+    }								\
+    }									\
+} while (0)
+
+#define	rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_right_get(a_type, a_field, (a_node));		\
+    rbtn_right_set(a_type, a_field, (a_node),				\
+      rbtn_left_get(a_type, a_field, (r_node)));			\
+    rbtn_left_set(a_type, a_field, (r_node), (a_node));			\
+} while (0)
+
+#define	rbtn_rotate_right(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_left_get(a_type, a_field, (a_node));		\
+    rbtn_left_set(a_type, a_field, (a_node),				\
+      rbtn_right_get(a_type, a_field, (r_node)));			\
+    rbtn_right_set(a_type, a_field, (r_node), (a_node));		\
+} while (0)
+
+/*
+ * The rb_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to rb_gen().
+ */
+
+#define	rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node);			\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg);				\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+
+/*
+ * The rb_gen() macro generates a type-specific red-black tree implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ *   a_attr    : Function attribute for generated functions (ex: static).
+ *   a_prefix  : Prefix for generated functions (ex: ex_).
+ *   a_rb_type : Type for red-black tree data structure (ex: ex_t).
+ *   a_type    : Type for red-black tree node data structure (ex: ex_node_t).
+ *   a_field   : Name of red-black tree node linkage (ex: ex_link).
+ *   a_cmp     : Node comparison function name, with the following prototype:
+ *                 int (a_cmp *)(a_type *a_node, a_type *a_other);
+ *                                       ^^^^^^
+ *                                    or a_key
+ *               Interpretation of comparision function return values:
+ *                 -1 : a_node <  a_other
+ *                  0 : a_node == a_other
+ *                  1 : a_node >  a_other
+ *               In all cases, the a_node or a_key macro argument is the first
+ *               argument to the comparison function, which makes it possible
+ *               to write comparison functions that treat the first argument
+ *               specially.
+ *
+ * Assuming the following setup:
+ *
+ *   typedef struct ex_node_s ex_node_t;
+ *   struct ex_node_s {
+ *       rb_node(ex_node_t) ex_link;
+ *   };
+ *   typedef rb_tree(ex_node_t) ex_t;
+ *   rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ *   static void
+ *   ex_new(ex_t *tree);
+ *       Description: Initialize a red-black tree structure.
+ *       Args:
+ *         tree: Pointer to an uninitialized red-black tree object.
+ *
+ *   static ex_node_t *
+ *   ex_first(ex_t *tree);
+ *   static ex_node_t *
+ *   ex_last(ex_t *tree);
+ *       Description: Get the first/last node in tree.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *       Ret: First/last node in tree, or NULL if tree is empty.
+ *
+ *   static ex_node_t *
+ *   ex_next(ex_t *tree, ex_node_t *node);
+ *   static ex_node_t *
+ *   ex_prev(ex_t *tree, ex_node_t *node);
+ *       Description: Get node's successor/predecessor.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: A node in tree.
+ *       Ret: node's successor/predecessor in tree, or NULL if node is
+ *            last/first.
+ *
+ *   static ex_node_t *
+ *   ex_search(ex_t *tree, ex_node_t *key);
+ *       Description: Search for node that matches key.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         key : Search key.
+ *       Ret: Node in tree that matches key, or NULL if no match.
+ *
+ *   static ex_node_t *
+ *   ex_nsearch(ex_t *tree, ex_node_t *key);
+ *   static ex_node_t *
+ *   ex_psearch(ex_t *tree, ex_node_t *key);
+ *       Description: Search for node that matches key.  If no match is found,
+ *                    return what would be key's successor/predecessor, were
+ *                    key in tree.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         key : Search key.
+ *       Ret: Node in tree that matches key, or if no match, hypothetical node's
+ *            successor/predecessor (NULL if no successor/predecessor).
+ *
+ *   static void
+ *   ex_insert(ex_t *tree, ex_node_t *node);
+ *       Description: Insert node into tree.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: Node to be inserted into tree.
+ *
+ *   static void
+ *   ex_remove(ex_t *tree, ex_node_t *node);
+ *       Description: Remove node from tree.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: Node in tree to be removed.
+ *
+ *   static ex_node_t *
+ *   ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *   static ex_node_t *
+ *   ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *       Description: Iterate forward/backward over tree, starting at node.  If
+ *                    tree is modified, iteration must be immediately
+ *                    terminated by the callback function that causes the
+ *                    modification.
+ *       Args:
+ *         tree : Pointer to an initialized red-black tree object.
+ *         start: Node at which to start iteration, or NULL to start at
+ *                first/last node.
+ *         cb   : Callback function, which is called for each node during
+ *                iteration.  Under normal circumstances the callback function
+ *                should return NULL, which causes iteration to continue.  If a
+ *                callback function returns non-NULL, iteration is immediately
+ *                terminated and the non-NULL return value is returned by the
+ *                iterator.  This is useful for re-starting iteration after
+ *                modifying tree.
+ *         arg  : Opaque pointer passed to cb().
+ *       Ret: NULL if iteration completed, or the non-NULL callback return value
+ *            that caused termination of the iteration.
+ */
+#define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree) {					\
+    rb_new(a_type, a_field, rbtree);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type,	\
+      a_field, node), ret);						\
+    } else {								\
+    a_type *tnode = rbtree->rbt_root;				\
+    assert(tnode != &rbtree->rbt_nil);				\
+    ret = &rbtree->rbt_nil;						\
+    while (true) {							\
+        int cmp = (a_cmp)(node, tnode);				\
+        if (cmp < 0) {						\
+        ret = tnode;						\
+        tnode = rbtn_left_get(a_type, a_field, tnode);		\
+        } else if (cmp > 0) {					\
+        tnode = rbtn_right_get(a_type, a_field, tnode);		\
+        } else {							\
+        break;							\
+        }								\
+        assert(tnode != &rbtree->rbt_nil);				\
+    }								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type,	\
+      a_field, node), ret);						\
+    } else {								\
+    a_type *tnode = rbtree->rbt_root;				\
+    assert(tnode != &rbtree->rbt_nil);				\
+    ret = &rbtree->rbt_nil;						\
+    while (true) {							\
+        int cmp = (a_cmp)(node, tnode);				\
+        if (cmp < 0) {						\
+        tnode = rbtn_left_get(a_type, a_field, tnode);		\
+        } else if (cmp > 0) {					\
+        ret = tnode;						\
+        tnode = rbtn_right_get(a_type, a_field, tnode);		\
+        } else {							\
+        break;							\
+        }								\
+        assert(tnode != &rbtree->rbt_nil);				\
+    }								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    int cmp;								\
+    ret = rbtree->rbt_root;						\
+    while (ret != &rbtree->rbt_nil					\
+      && (cmp = (a_cmp)(key, ret)) != 0) {				\
+    if (cmp < 0) {							\
+        ret = rbtn_left_get(a_type, a_field, ret);			\
+    } else {							\
+        ret = rbtn_right_get(a_type, a_field, ret);			\
+    }								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+    int cmp = (a_cmp)(key, tnode);					\
+    if (cmp < 0) {							\
+        ret = tnode;						\
+        tnode = rbtn_left_get(a_type, a_field, tnode);		\
+    } else if (cmp > 0) {						\
+        tnode = rbtn_right_get(a_type, a_field, tnode);		\
+    } else {							\
+        ret = tnode;						\
+        break;							\
+    }								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+    int cmp = (a_cmp)(key, tnode);					\
+    if (cmp < 0) {							\
+        tnode = rbtn_left_get(a_type, a_field, tnode);		\
+    } else if (cmp > 0) {						\
+        ret = tnode;						\
+        tnode = rbtn_right_get(a_type, a_field, tnode);		\
+    } else {							\
+        ret = tnode;						\
+        break;							\
+    }								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+    a_type *node;							\
+    int cmp;							\
+    } path[sizeof(void *) << 4], *pathp;				\
+    rbt_node_new(a_type, a_field, rbtree, node);			\
+    /* Wind. */								\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+    assert(cmp != 0);						\
+    if (cmp < 0) {							\
+        pathp[1].node = rbtn_left_get(a_type, a_field,		\
+          pathp->node);						\
+    } else {							\
+        pathp[1].node = rbtn_right_get(a_type, a_field,		\
+          pathp->node);						\
+    }								\
+    }									\
+    pathp->node = node;							\
+    /* Unwind. */							\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+    a_type *cnode = pathp->node;					\
+    if (pathp->cmp < 0) {						\
+        a_type *left = pathp[1].node;				\
+        rbtn_left_set(a_type, a_field, cnode, left);		\
+        if (rbtn_red_get(a_type, a_field, left)) {			\
+        a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+        if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+            /* Fix up 4-node. */				\
+            a_type *tnode;					\
+            rbtn_black_set(a_type, a_field, leftleft);		\
+            rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
+            cnode = tnode;					\
+        }							\
+        } else {							\
+        return;							\
+        }								\
+    } else {							\
+        a_type *right = pathp[1].node;				\
+        rbtn_right_set(a_type, a_field, cnode, right);		\
+        if (rbtn_red_get(a_type, a_field, right)) {			\
+        a_type *left = rbtn_left_get(a_type, a_field, cnode);	\
+        if (rbtn_red_get(a_type, a_field, left)) {		\
+            /* Split 4-node. */					\
+            rbtn_black_set(a_type, a_field, left);		\
+            rbtn_black_set(a_type, a_field, right);		\
+            rbtn_red_set(a_type, a_field, cnode);		\
+        } else {						\
+            /* Lean left. */					\
+            a_type *tnode;					\
+            bool tred = rbtn_red_get(a_type, a_field, cnode);	\
+            rbtn_rotate_left(a_type, a_field, cnode, tnode);	\
+            rbtn_color_set(a_type, a_field, tnode, tred);	\
+            rbtn_red_set(a_type, a_field, cnode);		\
+            cnode = tnode;					\
+        }							\
+        } else {							\
+        return;							\
+        }								\
+    }								\
+    pathp->node = cnode;						\
+    }									\
+    /* Set root, and make it black. */					\
+    rbtree->rbt_root = path->node;					\
+    rbtn_black_set(a_type, a_field, rbtree->rbt_root);			\
+}									\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+    a_type *node;							\
+    int cmp;							\
+    } *pathp, *nodep, path[sizeof(void *) << 4];			\
+    /* Wind. */								\
+    nodep = NULL; /* Silence compiler warning. */			\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+    if (cmp < 0) {							\
+        pathp[1].node = rbtn_left_get(a_type, a_field,		\
+          pathp->node);						\
+    } else {							\
+        pathp[1].node = rbtn_right_get(a_type, a_field,		\
+          pathp->node);						\
+        if (cmp == 0) {						\
+            /* Find node's successor, in preparation for swap. */	\
+        pathp->cmp = 1;						\
+        nodep = pathp;						\
+        for (pathp++; pathp->node != &rbtree->rbt_nil;		\
+          pathp++) {						\
+            pathp->cmp = -1;					\
+            pathp[1].node = rbtn_left_get(a_type, a_field,	\
+              pathp->node);					\
+        }							\
+        break;							\
+        }								\
+    }								\
+    }									\
+    assert(nodep->node == node);					\
+    pathp--;								\
+    if (pathp->node != node) {						\
+    /* Swap node with its successor. */				\
+    bool tred = rbtn_red_get(a_type, a_field, pathp->node);		\
+    rbtn_color_set(a_type, a_field, pathp->node,			\
+      rbtn_red_get(a_type, a_field, node));				\
+    rbtn_left_set(a_type, a_field, pathp->node,			\
+      rbtn_left_get(a_type, a_field, node));			\
+    /* If node's successor is its right child, the following code */\
+    /* will do the wrong thing for the right child pointer.       */\
+    /* However, it doesn't matter, because the pointer will be    */\
+    /* properly set when the successor is pruned.                 */\
+    rbtn_right_set(a_type, a_field, pathp->node,			\
+      rbtn_right_get(a_type, a_field, node));			\
+    rbtn_color_set(a_type, a_field, node, tred);			\
+    /* The pruned leaf node's child pointers are never accessed   */\
+    /* again, so don't bother setting them to nil.                */\
+    nodep->node = pathp->node;					\
+    pathp->node = node;						\
+    if (nodep == path) {						\
+        rbtree->rbt_root = nodep->node;				\
+    } else {							\
+        if (nodep[-1].cmp < 0) {					\
+        rbtn_left_set(a_type, a_field, nodep[-1].node,		\
+          nodep->node);						\
+        } else {							\
+        rbtn_right_set(a_type, a_field, nodep[-1].node,		\
+          nodep->node);						\
+        }								\
+    }								\
+    } else {								\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    if (left != &rbtree->rbt_nil) {					\
+        /* node has no successor, but it has a left child.        */\
+        /* Splice node out, without losing the left child.        */\
+        assert(rbtn_red_get(a_type, a_field, node) == false);	\
+        assert(rbtn_red_get(a_type, a_field, left));		\
+        rbtn_black_set(a_type, a_field, left);			\
+        if (pathp == path) {					\
+        rbtree->rbt_root = left;				\
+        } else {							\
+        if (pathp[-1].cmp < 0) {				\
+            rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+              left);						\
+        } else {						\
+            rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+              left);						\
+        }							\
+        }								\
+        return;							\
+    } else if (pathp == path) {					\
+        /* The tree only contained one node. */			\
+        rbtree->rbt_root = &rbtree->rbt_nil;			\
+        return;							\
+    }								\
+    }									\
+    if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
+    /* Prune red node, which requires no fixup. */			\
+    assert(pathp[-1].cmp < 0);					\
+    rbtn_left_set(a_type, a_field, pathp[-1].node,			\
+      &rbtree->rbt_nil);						\
+    return;								\
+    }									\
+    /* The node to be pruned is black, so unwind until balance is     */\
+    /* restored.                                                      */\
+    pathp->node = &rbtree->rbt_nil;					\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+    assert(pathp->cmp != 0);					\
+    if (pathp->cmp < 0) {						\
+        rbtn_left_set(a_type, a_field, pathp->node,			\
+          pathp[1].node);						\
+        assert(rbtn_red_get(a_type, a_field, pathp[1].node)		\
+          == false);						\
+        if (rbtn_red_get(a_type, a_field, pathp->node)) {		\
+        a_type *right = rbtn_right_get(a_type, a_field,		\
+          pathp->node);						\
+        a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+          right);						\
+        a_type *tnode;						\
+        if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+            /* In the following diagrams, ||, //, and \\      */\
+            /* indicate the path to the removed node.         */\
+            /*                                                */\
+            /*      ||                                        */\
+            /*    pathp(r)                                    */\
+            /*  //        \                                   */\
+            /* (b)        (b)                                 */\
+            /*           /                                    */\
+            /*          (r)                                   */\
+            /*                                                */\
+            rbtn_black_set(a_type, a_field, pathp->node);	\
+            rbtn_rotate_right(a_type, a_field, right, tnode);	\
+            rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+            rbtn_rotate_left(a_type, a_field, pathp->node,	\
+              tnode);						\
+        } else {						\
+            /*      ||                                        */\
+            /*    pathp(r)                                    */\
+            /*  //        \                                   */\
+            /* (b)        (b)                                 */\
+            /*           /                                    */\
+            /*          (b)                                   */\
+            /*                                                */\
+            rbtn_rotate_left(a_type, a_field, pathp->node,	\
+              tnode);						\
+        }							\
+        /* Balance restored, but rotation modified subtree    */\
+        /* root.                                              */\
+        assert((uintptr_t)pathp > (uintptr_t)path);		\
+        if (pathp[-1].cmp < 0) {				\
+            rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+              tnode);						\
+        } else {						\
+            rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+              tnode);						\
+        }							\
+        return;							\
+        } else {							\
+        a_type *right = rbtn_right_get(a_type, a_field,		\
+          pathp->node);						\
+        a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+          right);						\
+        if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+            /*      ||                                        */\
+            /*    pathp(b)                                    */\
+            /*  //        \                                   */\
+            /* (b)        (b)                                 */\
+            /*           /                                    */\
+            /*          (r)                                   */\
+            a_type *tnode;					\
+            rbtn_black_set(a_type, a_field, rightleft);		\
+            rbtn_rotate_right(a_type, a_field, right, tnode);	\
+            rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+            rbtn_rotate_left(a_type, a_field, pathp->node,	\
+              tnode);						\
+            /* Balance restored, but rotation modified        */\
+            /* subree root, which may actually be the tree    */\
+            /* root.                                          */\
+            if (pathp == path) {				\
+            /* Set root. */					\
+            rbtree->rbt_root = tnode;			\
+            } else {						\
+            if (pathp[-1].cmp < 0) {			\
+                rbtn_left_set(a_type, a_field,		\
+                  pathp[-1].node, tnode);			\
+            } else {					\
+                rbtn_right_set(a_type, a_field,		\
+                  pathp[-1].node, tnode);			\
+            }						\
+            }							\
+            return;						\
+        } else {						\
+            /*      ||                                        */\
+            /*    pathp(b)                                    */\
+            /*  //        \                                   */\
+            /* (b)        (b)                                 */\
+            /*           /                                    */\
+            /*          (b)                                   */\
+            a_type *tnode;					\
+            rbtn_red_set(a_type, a_field, pathp->node);		\
+            rbtn_rotate_left(a_type, a_field, pathp->node,	\
+              tnode);						\
+            pathp->node = tnode;				\
+        }							\
+        }								\
+    } else {							\
+        a_type *left;						\
+        rbtn_right_set(a_type, a_field, pathp->node,		\
+          pathp[1].node);						\
+        left = rbtn_left_get(a_type, a_field, pathp->node);		\
+        if (rbtn_red_get(a_type, a_field, left)) {			\
+        a_type *tnode;						\
+        a_type *leftright = rbtn_right_get(a_type, a_field,	\
+          left);						\
+        a_type *leftrightleft = rbtn_left_get(a_type, a_field,	\
+          leftright);						\
+        if (rbtn_red_get(a_type, a_field, leftrightleft)) {	\
+            /*      ||                                        */\
+            /*    pathp(b)                                    */\
+            /*   /        \\                                  */\
+            /* (r)        (b)                                 */\
+            /*   \                                            */\
+            /*   (b)                                          */\
+            /*   /                                            */\
+            /* (r)                                            */\
+            a_type *unode;					\
+            rbtn_black_set(a_type, a_field, leftrightleft);	\
+            rbtn_rotate_right(a_type, a_field, pathp->node,	\
+              unode);						\
+            rbtn_rotate_right(a_type, a_field, pathp->node,	\
+              tnode);						\
+            rbtn_right_set(a_type, a_field, unode, tnode);	\
+            rbtn_rotate_left(a_type, a_field, unode, tnode);	\
+        } else {						\
+            /*      ||                                        */\
+            /*    pathp(b)                                    */\
+            /*   /        \\                                  */\
+            /* (r)        (b)                                 */\
+            /*   \                                            */\
+            /*   (b)                                          */\
+            /*   /                                            */\
+            /* (b)                                            */\
+            assert(leftright != &rbtree->rbt_nil);		\
+            rbtn_red_set(a_type, a_field, leftright);		\
+            rbtn_rotate_right(a_type, a_field, pathp->node,	\
+              tnode);						\
+            rbtn_black_set(a_type, a_field, tnode);		\
+        }							\
+        /* Balance restored, but rotation modified subtree    */\
+        /* root, which may actually be the tree root.         */\
+        if (pathp == path) {					\
+            /* Set root. */					\
+            rbtree->rbt_root = tnode;				\
+        } else {						\
+            if (pathp[-1].cmp < 0) {				\
+            rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+              tnode);					\
+            } else {						\
+            rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+              tnode);					\
+            }							\
+        }							\
+        return;							\
+        } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
+        a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+        if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+            /*        ||                                      */\
+            /*      pathp(r)                                  */\
+            /*     /        \\                                */\
+            /*   (b)        (b)                               */\
+            /*   /                                            */\
+            /* (r)                                            */\
+            a_type *tnode;					\
+            rbtn_black_set(a_type, a_field, pathp->node);	\
+            rbtn_red_set(a_type, a_field, left);		\
+            rbtn_black_set(a_type, a_field, leftleft);		\
+            rbtn_rotate_right(a_type, a_field, pathp->node,	\
+              tnode);						\
+            /* Balance restored, but rotation modified        */\
+            /* subtree root.                                  */\
+            assert((uintptr_t)pathp > (uintptr_t)path);		\
+            if (pathp[-1].cmp < 0) {				\
+            rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+              tnode);					\
+            } else {						\
+            rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+              tnode);					\
+            }							\
+            return;						\
+        } else {						\
+            /*        ||                                      */\
+            /*      pathp(r)                                  */\
+            /*     /        \\                                */\
+            /*   (b)        (b)                               */\
+            /*   /                                            */\
+            /* (b)                                            */\
+            rbtn_red_set(a_type, a_field, left);		\
+            rbtn_black_set(a_type, a_field, pathp->node);	\
+            /* Balance restored. */				\
+            return;						\
+        }							\
+        } else {							\
+        a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+        if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+            /*               ||                               */\
+            /*             pathp(b)                           */\
+            /*            /        \\                         */\
+            /*          (b)        (b)                        */\
+            /*          /                                     */\
+            /*        (r)                                     */\
+            a_type *tnode;					\
+            rbtn_black_set(a_type, a_field, leftleft);		\
+            rbtn_rotate_right(a_type, a_field, pathp->node,	\
+              tnode);						\
+            /* Balance restored, but rotation modified        */\
+            /* subtree root, which may actually be the tree   */\
+            /* root.                                          */\
+            if (pathp == path) {				\
+            /* Set root. */					\
+            rbtree->rbt_root = tnode;			\
+            } else {						\
+            if (pathp[-1].cmp < 0) {			\
+                rbtn_left_set(a_type, a_field,		\
+                  pathp[-1].node, tnode);			\
+            } else {					\
+                rbtn_right_set(a_type, a_field,		\
+                  pathp[-1].node, tnode);			\
+            }						\
+            }							\
+            return;						\
+        } else {						\
+            /*               ||                               */\
+            /*             pathp(b)                           */\
+            /*            /        \\                         */\
+            /*          (b)        (b)                        */\
+            /*          /                                     */\
+            /*        (b)                                     */\
+            rbtn_red_set(a_type, a_field, left);		\
+        }							\
+        }								\
+    }								\
+    }									\
+    /* Set root. */							\
+    rbtree->rbt_root = path->node;					\
+    assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false);	\
+}									\
+a_attr a_type *								\
+a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+    return (&rbtree->rbt_nil);					\
+    } else {								\
+    a_type *ret;							\
+    if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
+      a_field, node), cb, arg)) != &rbtree->rbt_nil			\
+      || (ret = cb(rbtree, node, arg)) != NULL) {			\
+        return (ret);						\
+    }								\
+    return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+      a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    int cmp = a_cmp(start, node);					\
+    if (cmp < 0) {							\
+    a_type *ret;							\
+    if ((ret = a_prefix##iter_start(rbtree, start,			\
+      rbtn_left_get(a_type, a_field, node), cb, arg)) !=		\
+      &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+        return (ret);						\
+    }								\
+    return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+      a_field, node), cb, arg));					\
+    } else if (cmp > 0) {						\
+    return (a_prefix##iter_start(rbtree, start,			\
+      rbtn_right_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+    a_type *ret;							\
+    if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+        return (ret);						\
+    }								\
+    return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+      a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg) {				\
+    a_type *ret;							\
+    if (start != NULL) {						\
+    ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root,	\
+      cb, arg);							\
+    } else {								\
+    ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+    return (&rbtree->rbt_nil);					\
+    } else {								\
+    a_type *ret;							\
+    if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
+      rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+      &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+        return (ret);						\
+    }								\
+    return (a_prefix##reverse_iter_recurse(rbtree,			\
+      rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg) {								\
+    int cmp = a_cmp(start, node);					\
+    if (cmp > 0) {							\
+    a_type *ret;							\
+    if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
+      rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+      &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+        return (ret);						\
+    }								\
+    return (a_prefix##reverse_iter_recurse(rbtree,			\
+      rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else if (cmp < 0) {						\
+    return (a_prefix##reverse_iter_start(rbtree, start,		\
+      rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+    a_type *ret;							\
+    if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+        return (ret);						\
+    }								\
+    return (a_prefix##reverse_iter_recurse(rbtree,			\
+      rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    a_type *ret;							\
+    if (start != NULL) {						\
+    ret = a_prefix##reverse_iter_start(rbtree, start,		\
+      rbtree->rbt_root, cb, arg);					\
+    } else {								\
+    ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
+      cb, arg);							\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+    ret = NULL;							\
+    }									\
+    return (ret);							\
+}
+
+#endif /* RB_H_ */
--- a/src/rt/jemalloc/include/jemalloc/internal/rtree.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/rtree.h
@ -0,0 +1,164 @@
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * tracking which chunks are currently owned by jemalloc.  This functionality
+ * is mandatory for OS X, where jemalloc must be able to respond to object
+ * ownership queries.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct rtree_s rtree_t;
+
+/*
+ * Size of each radix tree node (must be a power of 2).  This impacts tree
+ * depth.
+ */
+#if (LG_SIZEOF_PTR == 2)
+#  define RTREE_NODESIZE (1U << 14)
+#else
+#  define RTREE_NODESIZE CACHELINE
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct rtree_s {
+    malloc_mutex_t	mutex;
+    void		**root;
+    unsigned	height;
+    unsigned	level2bits[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rtree_t	*rtree_new(unsigned bits);
+void	rtree_prefork(rtree_t *rtree);
+void	rtree_postfork_parent(rtree_t *rtree);
+void	rtree_postfork_child(rtree_t *rtree);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+#ifndef JEMALLOC_DEBUG
+void	*rtree_get_locked(rtree_t *rtree, uintptr_t key);
+#endif
+void	*rtree_get(rtree_t *rtree, uintptr_t key);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
+#define	RTREE_GET_GENERATE(f)						\
+/* The least significant bits of the key are ignored. */		\
+JEMALLOC_INLINE void *							\
+f(rtree_t *rtree, uintptr_t key)					\
+{									\
+    void *ret;							\
+    uintptr_t subkey;						\
+    unsigned i, lshift, height, bits;				\
+    void **node, **child;						\
+                                    \
+    RTREE_LOCK(&rtree->mutex);					\
+    for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+        i < height - 1;						\
+        i++, lshift += bits, node = child) {			\
+        bits = rtree->level2bits[i];				\
+        subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
+            3)) - bits);					\
+        child = (void**)node[subkey];				\
+        if (child == NULL) {					\
+            RTREE_UNLOCK(&rtree->mutex);			\
+            return (NULL);					\
+        }							\
+    }								\
+                                    \
+    /*								\
+     * node is a leaf, so it contains values rather than node	\
+     * pointers.							\
+     */								\
+    bits = rtree->level2bits[i];					\
+    subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -	\
+        bits);							\
+    ret = node[subkey];						\
+    RTREE_UNLOCK(&rtree->mutex);					\
+                                    \
+    RTREE_GET_VALIDATE						\
+    return (ret);							\
+}
+
+#ifdef JEMALLOC_DEBUG
+#  define RTREE_LOCK(l)		malloc_mutex_lock(l)
+#  define RTREE_UNLOCK(l)	malloc_mutex_unlock(l)
+#  define RTREE_GET_VALIDATE
+RTREE_GET_GENERATE(rtree_get_locked)
+#  undef RTREE_LOCK
+#  undef RTREE_UNLOCK
+#  undef RTREE_GET_VALIDATE
+#endif
+
+#define	RTREE_LOCK(l)
+#define	RTREE_UNLOCK(l)
+#ifdef JEMALLOC_DEBUG
+   /*
+    * Suppose that it were possible for a jemalloc-allocated chunk to be
+    * munmap()ped, followed by a different allocator in another thread re-using
+    * overlapping virtual memory, all without invalidating the cached rtree
+    * value.  The result would be a false positive (the rtree would claim that
+    * jemalloc owns memory that it had actually discarded).  This scenario
+    * seems impossible, but the following assertion is a prudent sanity check.
+    */
+#  define RTREE_GET_VALIDATE						\
+    assert(rtree_get_locked(rtree, key) == ret);
+#else
+#  define RTREE_GET_VALIDATE
+#endif
+RTREE_GET_GENERATE(rtree_get)
+#undef RTREE_LOCK
+#undef RTREE_UNLOCK
+#undef RTREE_GET_VALIDATE
+
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+    uintptr_t subkey;
+    unsigned i, lshift, height, bits;
+    void **node, **child;
+
+    malloc_mutex_lock(&rtree->mutex);
+    for (i = lshift = 0, height = rtree->height, node = rtree->root;
+        i < height - 1;
+        i++, lshift += bits, node = child) {
+        bits = rtree->level2bits[i];
+        subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+            bits);
+        child = (void**)node[subkey];
+        if (child == NULL) {
+            child = (void**)base_alloc(sizeof(void *) <<
+                rtree->level2bits[i+1]);
+            if (child == NULL) {
+                malloc_mutex_unlock(&rtree->mutex);
+                return (true);
+            }
+            memset(child, 0, sizeof(void *) <<
+                rtree->level2bits[i+1]);
+            node[subkey] = child;
+        }
+    }
+
+    /* node is a leaf, so it contains values rather than node pointers. */
+    bits = rtree->level2bits[i];
+    subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
+    node[subkey] = val;
+    malloc_mutex_unlock(&rtree->mutex);
+
+    return (false);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/size_classes.sh
+++ b/src/rt/jemalloc/include/jemalloc/internal/size_classes.sh
@ -0,0 +1,122 @@
+#!/bin/sh
+
+# The following limits are chosen such that they cover all supported platforms.
+
+# Range of quanta.
+lg_qmin=3
+lg_qmax=4
+
+# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
+lg_tmin=3
+
+# Range of page sizes.
+lg_pmin=12
+lg_pmax=16
+
+pow2() {
+  e=$1
+  pow2_result=1
+  while [ ${e} -gt 0 ] ; do
+    pow2_result=$((${pow2_result} + ${pow2_result}))
+    e=$((${e} - 1))
+  done
+}
+
+cat <<EOF
+/* This file was automatically generated by size_classes.sh. */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+EOF
+
+lg_q=${lg_qmin}
+while [ ${lg_q} -le ${lg_qmax} ] ; do
+  lg_t=${lg_tmin}
+  while [ ${lg_t} -le ${lg_q} ] ; do
+    lg_p=${lg_pmin}
+    while [ ${lg_p} -le ${lg_pmax} ] ; do
+      echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+      echo "#define	SIZE_CLASSES_DEFINED"
+      pow2 ${lg_q}; q=${pow2_result}
+      pow2 ${lg_t}; t=${pow2_result}
+      pow2 ${lg_p}; p=${pow2_result}
+      bin=0
+      psz=0
+      sz=${t}
+      delta=$((${sz} - ${psz}))
+      echo "/*  SIZE_CLASS(bin,	delta,	sz) */"
+      echo "#define	SIZE_CLASSES							\\"
+
+      # Tiny size classes.
+      while [ ${sz} -lt ${q} ] ; do
+        echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
+        bin=$((${bin} + 1))
+        psz=${sz}
+        sz=$((${sz} + ${sz}))
+        delta=$((${sz} - ${psz}))
+      done
+      # Quantum-multiple size classes.  For each doubling of sz, as many as 4
+      # size classes exist.  Their spacing is the greater of:
+      # - q
+      # - sz/4, where sz is a power of 2
+      while [ ${sz} -lt ${p} ] ; do
+        if [ ${sz} -ge $((${q} * 4)) ] ; then
+          i=$((${sz} / 4))
+        else
+          i=${q}
+        fi
+        next_2pow=$((${sz} * 2))
+        while [ ${sz} -lt $next_2pow ] ; do
+          echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
+          bin=$((${bin} + 1))
+          psz=${sz}
+          sz=$((${sz} + ${i}))
+          delta=$((${sz} - ${psz}))
+        done
+      done
+      echo
+      echo "#define	NBINS		${bin}"
+      echo "#define	SMALL_MAXCLASS	${psz}"
+      echo "#endif"
+      echo
+      lg_p=$((${lg_p} + 1))
+    done
+    lg_t=$((${lg_t} + 1))
+  done
+  lg_q=$((${lg_q} + 1))
+done
+
+cat <<EOF
+#ifndef SIZE_CLASSES_DEFINED
+#  error "No size class definitions match configuration"
+#endif
+#undef SIZE_CLASSES_DEFINED
+/*
+ * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
+ * cannot support more than 256 small size classes.  Further constrain NBINS to
+ * 255 to support prof_promote, since all small size classes, plus a "not
+ * small" size class must be stored in 8 bits of arena_chunk_map_t's bits
+ * field.
+ */
+#if (NBINS > 255)
+#  error "Too many small size classes"
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+EOF
--- a/src/rt/jemalloc/include/jemalloc/internal/stats.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/stats.h
@ -0,0 +1,173 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct tcache_bin_stats_s tcache_bin_stats_t;
+typedef struct malloc_bin_stats_s malloc_bin_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct arena_stats_s arena_stats_t;
+typedef struct chunk_stats_s chunk_stats_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct tcache_bin_stats_s {
+    /*
+     * Number of allocation requests that corresponded to the size of this
+     * bin.
+     */
+    uint64_t	nrequests;
+};
+
+struct malloc_bin_stats_s {
+    /*
+     * Current number of bytes allocated, including objects currently
+     * cached by tcache.
+     */
+    size_t		allocated;
+
+    /*
+     * Total number of allocation/deallocation requests served directly by
+     * the bin.  Note that tcache may allocate an object, then recycle it
+     * many times, resulting many increments to nrequests, but only one
+     * each to nmalloc and ndalloc.
+     */
+    uint64_t	nmalloc;
+    uint64_t	ndalloc;
+
+    /*
+     * Number of allocation requests that correspond to the size of this
+     * bin.  This includes requests served by tcache, though tcache only
+     * periodically merges into this counter.
+     */
+    uint64_t	nrequests;
+
+    /* Number of tcache fills from this bin. */
+    uint64_t	nfills;
+
+    /* Number of tcache flushes to this bin. */
+    uint64_t	nflushes;
+
+    /* Total number of runs created for this bin's size class. */
+    uint64_t	nruns;
+
+    /*
+     * Total number of runs reused by extracting them from the runs tree for
+     * this bin's size class.
+     */
+    uint64_t	reruns;
+
+    /* Current number of runs in this bin. */
+    size_t		curruns;
+};
+
+struct malloc_large_stats_s {
+    /*
+     * Total number of allocation/deallocation requests served directly by
+     * the arena.  Note that tcache may allocate an object, then recycle it
+     * many times, resulting many increments to nrequests, but only one
+     * each to nmalloc and ndalloc.
+     */
+    uint64_t	nmalloc;
+    uint64_t	ndalloc;
+
+    /*
+     * Number of allocation requests that correspond to this size class.
+     * This includes requests served by tcache, though tcache only
+     * periodically merges into this counter.
+     */
+    uint64_t	nrequests;
+
+    /* Current number of runs of this size class. */
+    size_t		curruns;
+};
+
+struct arena_stats_s {
+    /* Number of bytes currently mapped. */
+    size_t		mapped;
+
+    /*
+     * Total number of purge sweeps, total number of madvise calls made,
+     * and total pages purged in order to keep dirty unused memory under
+     * control.
+     */
+    uint64_t	npurge;
+    uint64_t	nmadvise;
+    uint64_t	purged;
+
+    /* Per-size-category statistics. */
+    size_t		allocated_large;
+    uint64_t	nmalloc_large;
+    uint64_t	ndalloc_large;
+    uint64_t	nrequests_large;
+
+    /*
+     * One element for each possible size class, including sizes that
+     * overlap with bin size classes.  This is necessary because ipalloc()
+     * sometimes has to use such large objects in order to assure proper
+     * alignment.
+     */
+    malloc_large_stats_t	*lstats;
+};
+
+struct chunk_stats_s {
+    /* Number of chunks that were allocated. */
+    uint64_t	nchunks;
+
+    /* High-water mark for number of chunks allocated. */
+    size_t		highchunks;
+
+    /*
+     * Current number of chunks allocated.  This value isn't maintained for
+     * any other purpose, so keep track of it in order to be able to set
+     * highchunks.
+     */
+    size_t		curchunks;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_stats_print;
+
+extern size_t	stats_cactive;
+
+void	stats_print(void (*write)(void *, const char *), void *cbopaque,
+    const char *opts);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	stats_cactive_get(void);
+void	stats_cactive_add(size_t size);
+void	stats_cactive_sub(size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
+JEMALLOC_INLINE size_t
+stats_cactive_get(void)
+{
+
+    return (atomic_read_z(&stats_cactive));
+}
+
+JEMALLOC_INLINE void
+stats_cactive_add(size_t size)
+{
+
+    atomic_add_z(&stats_cactive, size);
+}
+
+JEMALLOC_INLINE void
+stats_cactive_sub(size_t size)
+{
+
+    atomic_sub_z(&stats_cactive, size);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/tcache.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/tcache.h
@ -0,0 +1,442 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct tcache_bin_info_s tcache_bin_info_t;
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+
+/*
+ * tcache pointers close to NULL are used to encode state information that is
+ * used for two purposes: preventing thread caching on a per thread basis and
+ * cleaning up during thread shutdown.
+ */
+#define	TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
+#define	TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
+#define	TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
+#define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache.  This is an additional constraint beyond that imposed as: twice the
+ * number of regions per run for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define	TCACHE_NSLOTS_SMALL_MAX		200
+
+/* Number of cache slots for large size classes. */
+#define	TCACHE_NSLOTS_LARGE		20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define	LG_TCACHE_MAXCLASS_DEFAULT	15
+
+/*
+ * TCACHE_GC_SWEEP is the approximate number of allocation events between
+ * full GC sweeps.  Integer rounding may cause the actual number to be
+ * slightly higher, since GC is performed incrementally.
+ */
+#define	TCACHE_GC_SWEEP			8192
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+#define	TCACHE_GC_INCR							\
+    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+typedef enum {
+    tcache_enabled_false   = 0, /* Enable cast to/from bool. */
+    tcache_enabled_true    = 1,
+    tcache_enabled_default = 2
+} tcache_enabled_t;
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+    unsigned	ncached_max;	/* Upper limit on ncached. */
+};
+
+struct tcache_bin_s {
+    tcache_bin_stats_t tstats;
+    int		low_water;	/* Min # cached since last GC. */
+    unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
+    unsigned	ncached;	/* # of cached objects. */
+    void		**avail;	/* Stack of available objects. */
+};
+
+struct tcache_s {
+    ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+    uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum() */
+    arena_t		*arena;		/* This thread's arena. */
+    unsigned	ev_cnt;		/* Event count since incremental GC. */
+    unsigned	next_gc_bin;	/* Next bin to GC. */
+    tcache_bin_t	tbins[1];	/* Dynamically sized. */
+    /*
+     * The pointer stacks associated with tbins follow as a contiguous
+     * array.  During tcache initialization, the avail pointer in each
+     * element of tbins is initialized to point to the proper offset within
+     * this array.
+     */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_tcache;
+extern ssize_t	opt_lg_tcache_max;
+
+extern tcache_bin_info_t	*tcache_bin_info;
+
+/*
+ * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern size_t			nhbins;
+
+/* Maximum cached size class. */
+extern size_t			tcache_maxclass;
+
+size_t	tcache_salloc(const void *ptr);
+void	tcache_event_hard(tcache_t *tcache);
+void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
+    size_t binind);
+void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache);
+void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache);
+void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_dissociate(tcache_t *tcache);
+tcache_t *tcache_create(arena_t *arena);
+void	tcache_destroy(tcache_t *tcache);
+void	tcache_thread_cleanup(void *arg);
+void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+bool	tcache_boot0(void);
+bool	tcache_boot1(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
+malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
+
+void	tcache_event(tcache_t *tcache);
+void	tcache_flush(void);
+bool	tcache_enabled_get(void);
+tcache_t *tcache_get(bool create);
+void	tcache_enabled_set(bool enabled);
+void	*tcache_alloc_easy(tcache_bin_t *tbin);
+void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
+void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
+void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
+/* Map of thread-specific caches. */
+malloc_tsd_externs(tcache, tcache_t *)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL,
+    tcache_thread_cleanup)
+/* Per thread flag that allows thread caches to be disabled. */
+malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t,
+    tcache_enabled_default, malloc_tsd_no_cleanup)
+
+JEMALLOC_INLINE void
+tcache_flush(void)
+{
+    tcache_t *tcache;
+
+    cassert(config_tcache);
+
+    tcache = *tcache_tsd_get();
+    if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
+        return;
+    tcache_destroy(tcache);
+    tcache = NULL;
+    tcache_tsd_set(&tcache);
+}
+
+JEMALLOC_INLINE bool
+tcache_enabled_get(void)
+{
+    tcache_enabled_t tcache_enabled;
+
+    cassert(config_tcache);
+
+    tcache_enabled = *tcache_enabled_tsd_get();
+    if (tcache_enabled == tcache_enabled_default) {
+        tcache_enabled = (tcache_enabled_t)opt_tcache;
+        tcache_enabled_tsd_set(&tcache_enabled);
+    }
+
+    return ((bool)tcache_enabled);
+}
+
+JEMALLOC_INLINE void
+tcache_enabled_set(bool enabled)
+{
+    tcache_enabled_t tcache_enabled;
+    tcache_t *tcache;
+
+    cassert(config_tcache);
+
+    tcache_enabled = (tcache_enabled_t)enabled;
+    tcache_enabled_tsd_set(&tcache_enabled);
+    tcache = *tcache_tsd_get();
+    if (enabled) {
+        if (tcache == TCACHE_STATE_DISABLED) {
+            tcache = NULL;
+            tcache_tsd_set(&tcache);
+        }
+    } else /* disabled */ {
+        if (tcache > TCACHE_STATE_MAX) {
+            tcache_destroy(tcache);
+            tcache = NULL;
+        }
+        if (tcache == NULL) {
+            tcache = TCACHE_STATE_DISABLED;
+            tcache_tsd_set(&tcache);
+        }
+    }
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get(bool create)
+{
+    tcache_t *tcache;
+
+    if (config_tcache == false)
+        return (NULL);
+    if (config_lazy_lock && isthreaded == false)
+        return (NULL);
+
+    tcache = *tcache_tsd_get();
+    if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
+        if (tcache == TCACHE_STATE_DISABLED)
+            return (NULL);
+        if (tcache == NULL) {
+            if (create == false) {
+                /*
+                 * Creating a tcache here would cause
+                 * allocation as a side effect of free().
+                 * Ordinarily that would be okay since
+                 * tcache_create() failure is a soft failure
+                 * that doesn't propagate.  However, if TLS
+                 * data are freed via free() as in glibc,
+                 * subtle corruption could result from setting
+                 * a TLS variable after its backing memory is
+                 * freed.
+                 */
+                return (NULL);
+            }
+            if (tcache_enabled_get() == false) {
+                tcache_enabled_set(false); /* Memoize. */
+                return (NULL);
+            }
+            return (tcache_create(choose_arena(NULL)));
+        }
+        if (tcache == TCACHE_STATE_PURGATORY) {
+            /*
+             * Make a note that an allocator function was called
+             * after tcache_thread_cleanup() was called.
+             */
+            tcache = TCACHE_STATE_REINCARNATED;
+            tcache_tsd_set(&tcache);
+            return (NULL);
+        }
+        if (tcache == TCACHE_STATE_REINCARNATED)
+            return (NULL);
+        not_reached();
+    }
+
+    return (tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_event(tcache_t *tcache)
+{
+
+    if (TCACHE_GC_INCR == 0)
+        return;
+
+    tcache->ev_cnt++;
+    assert(tcache->ev_cnt <= TCACHE_GC_INCR);
+    if (tcache->ev_cnt == TCACHE_GC_INCR)
+        tcache_event_hard(tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_easy(tcache_bin_t *tbin)
+{
+    void *ret;
+
+    if (tbin->ncached == 0) {
+        tbin->low_water = -1;
+        return (NULL);
+    }
+    tbin->ncached--;
+    if ((int)tbin->ncached < tbin->low_water)
+        tbin->low_water = tbin->ncached;
+    ret = tbin->avail[tbin->ncached];
+    return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+{
+    void *ret;
+    size_t binind;
+    tcache_bin_t *tbin;
+
+    binind = SMALL_SIZE2BIN(size);
+    assert(binind < NBINS);
+    tbin = &tcache->tbins[binind];
+    ret = tcache_alloc_easy(tbin);
+    if (ret == NULL) {
+        ret = tcache_alloc_small_hard(tcache, tbin, binind);
+        if (ret == NULL)
+            return (NULL);
+    }
+    assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
+
+    if (zero == false) {
+        if (config_fill) {
+            if (opt_junk) {
+                arena_alloc_junk_small(ret,
+                    &arena_bin_info[binind], false);
+            } else if (opt_zero)
+                memset(ret, 0, size);
+        }
+    } else {
+        if (config_fill && opt_junk) {
+            arena_alloc_junk_small(ret, &arena_bin_info[binind],
+                true);
+        }
+        VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+        memset(ret, 0, size);
+    }
+    VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+
+    if (config_stats)
+        tbin->tstats.nrequests++;
+    if (config_prof)
+        tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
+    tcache_event(tcache);
+    return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+{
+    void *ret;
+    size_t binind;
+    tcache_bin_t *tbin;
+
+    size = PAGE_CEILING(size);
+    assert(size <= tcache_maxclass);
+    binind = NBINS + (size >> LG_PAGE) - 1;
+    assert(binind < nhbins);
+    tbin = &tcache->tbins[binind];
+    ret = tcache_alloc_easy(tbin);
+    if (ret == NULL) {
+        /*
+         * Only allocate one large object at a time, because it's quite
+         * expensive to create one and not use it.
+         */
+        ret = arena_malloc_large(tcache->arena, size, zero);
+        if (ret == NULL)
+            return (NULL);
+    } else {
+        if (config_prof && prof_promote && size == PAGE) {
+            arena_chunk_t *chunk =
+                (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+            size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
+                LG_PAGE);
+            arena_mapbits_large_binind_set(chunk, pageind,
+                BININD_INVALID);
+        }
+        if (zero == false) {
+            if (config_fill) {
+                if (opt_junk)
+                    memset(ret, 0xa5, size);
+                else if (opt_zero)
+                    memset(ret, 0, size);
+            }
+        } else {
+            VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+            memset(ret, 0, size);
+        }
+        VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+
+        if (config_stats)
+            tbin->tstats.nrequests++;
+        if (config_prof)
+            tcache->prof_accumbytes += size;
+    }
+
+    tcache_event(tcache);
+    return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
+{
+    tcache_bin_t *tbin;
+    tcache_bin_info_t *tbin_info;
+
+    assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+
+    if (config_fill && opt_junk)
+        arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+
+    tbin = &tcache->tbins[binind];
+    tbin_info = &tcache_bin_info[binind];
+    if (tbin->ncached == tbin_info->ncached_max) {
+        tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
+            1), tcache);
+    }
+    assert(tbin->ncached < tbin_info->ncached_max);
+    tbin->avail[tbin->ncached] = ptr;
+    tbin->ncached++;
+
+    tcache_event(tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+{
+    size_t binind;
+    tcache_bin_t *tbin;
+    tcache_bin_info_t *tbin_info;
+
+    assert((size & PAGE_MASK) == 0);
+    assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
+    assert(tcache_salloc(ptr) <= tcache_maxclass);
+
+    binind = NBINS + (size >> LG_PAGE) - 1;
+
+    if (config_fill && opt_junk)
+        memset(ptr, 0x5a, size);
+
+    tbin = &tcache->tbins[binind];
+    tbin_info = &tcache_bin_info[binind];
+    if (tbin->ncached == tbin_info->ncached_max) {
+        tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
+            1), tcache);
+    }
+    assert(tbin->ncached < tbin_info->ncached_max);
+    tbin->avail[tbin->ncached] = ptr;
+    tbin->ncached++;
+
+    tcache_event(tcache);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/tsd.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/tsd.h
@ -0,0 +1,397 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum number of malloc_tsd users with cleanup functions. */
+#define	MALLOC_TSD_CLEANUPS_MAX	8
+
+typedef bool (*malloc_tsd_cleanup_t)(void);
+
+/*
+ * TLS/TSD-agnostic macro-based implementation of thread-specific data.  There
+ * are four macros that support (at least) three use cases: file-private,
+ * library-private, and library-private inlined.  Following is an example
+ * library-private tsd variable:
+ *
+ * In example.h:
+ *   typedef struct {
+ *           int x;
+ *           int y;
+ *   } example_t;
+ *   #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
+ *   malloc_tsd_protos(, example, example_t *)
+ *   malloc_tsd_externs(example, example_t *)
+ * In example.c:
+ *   malloc_tsd_data(, example, example_t *, EX_INITIALIZER)
+ *   malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER,
+ *       example_tsd_cleanup)
+ *
+ * The result is a set of generated functions, e.g.:
+ *
+ *   bool example_tsd_boot(void) {...}
+ *   example_t **example_tsd_get() {...}
+ *   void example_tsd_set(example_t **val) {...}
+ *
+ * Note that all of the functions deal in terms of (a_type *) rather than
+ * (a_type)  so that it is possible to support non-pointer types (unlike
+ * pthreads TSD).  example_tsd_cleanup() is passed an (a_type *) pointer that is
+ * cast to (void *).  This means that the cleanup function needs to cast *and*
+ * dereference the function argument, e.g.:
+ *
+ *   void
+ *   example_tsd_cleanup(void *arg)
+ *   {
+ *           example_t *example = *(example_t **)arg;
+ *
+ *           [...]
+ *           if ([want the cleanup function to be called again]) {
+ *                   example_tsd_set(&example);
+ *           }
+ *   }
+ *
+ * If example_tsd_set() is called within example_tsd_cleanup(), it will be
+ * called again.  This is similar to how pthreads TSD destruction works, except
+ * that pthreads only calls the cleanup function again if the value was set to
+ * non-NULL.
+ */
+
+/* malloc_tsd_protos(). */
+#define	malloc_tsd_protos(a_attr, a_name, a_type)			\
+a_attr bool								\
+a_name##_tsd_boot(void);						\
+a_attr a_type *								\
+a_name##_tsd_get(void);							\
+a_attr void								\
+a_name##_tsd_set(a_type *val);
+
+/* malloc_tsd_externs(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern __thread a_type	a_name##_tls;					\
+extern __thread bool	a_name##_initialized;				\
+extern bool		a_name##_booted;
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern __thread a_type	a_name##_tls;					\
+extern pthread_key_t	a_name##_tsd;					\
+extern bool		a_name##_booted;
+#elif (defined(_WIN32))
+#define malloc_tsd_externs(a_name, a_type)				\
+extern DWORD		a_name##_tsd;					\
+extern bool		a_name##_booted;
+#else
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern pthread_key_t	a_name##_tsd;					\
+extern bool		a_name##_booted;
+#endif
+
+/* malloc_tsd_data(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr __thread a_type JEMALLOC_TLS_MODEL				\
+    a_name##_tls = a_initializer;					\
+a_attr __thread bool JEMALLOC_TLS_MODEL					\
+    a_name##_initialized = false;					\
+a_attr bool		a_name##_booted = false;
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr __thread a_type JEMALLOC_TLS_MODEL				\
+    a_name##_tls = a_initializer;					\
+a_attr pthread_key_t	a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#elif (defined(_WIN32))
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr DWORD		a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#else
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr pthread_key_t	a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#endif
+
+/* malloc_tsd_funcs(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_cleanup_wrapper(void)					\
+{									\
+                                    \
+    if (a_name##_initialized) {					\
+        a_name##_initialized = false;				\
+        a_cleanup(&a_name##_tls);				\
+    }								\
+    return (a_name##_initialized);					\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+                                    \
+    if (a_cleanup != malloc_tsd_no_cleanup) {			\
+        malloc_tsd_cleanup_register(				\
+            &a_name##_tsd_cleanup_wrapper);			\
+    }								\
+    a_name##_booted = true;						\
+    return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+                                    \
+    assert(a_name##_booted);					\
+    return (&a_name##_tls);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+                                    \
+    assert(a_name##_booted);					\
+    a_name##_tls = (*val);						\
+    if (a_cleanup != malloc_tsd_no_cleanup)				\
+        a_name##_initialized = true;				\
+}
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+                                    \
+    if (a_cleanup != malloc_tsd_no_cleanup) {			\
+        if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0)	\
+            return (true);					\
+    }								\
+    a_name##_booted = true;						\
+    return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+                                    \
+    assert(a_name##_booted);					\
+    return (&a_name##_tls);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+                                    \
+    assert(a_name##_booted);					\
+    a_name##_tls = (*val);						\
+    if (a_cleanup != malloc_tsd_no_cleanup) {			\
+        if (pthread_setspecific(a_name##_tsd,			\
+            (void *)(&a_name##_tls))) {				\
+            malloc_write("<jemalloc>: Error"		\
+                " setting TSD for "#a_name"\n");		\
+            if (opt_abort)					\
+                abort();				\
+        }							\
+    }								\
+}
+#elif (defined(_WIN32))
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Data structure. */							\
+typedef struct {							\
+    bool	initialized;						\
+    a_type	val;							\
+} a_name##_tsd_wrapper_t;						\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_cleanup_wrapper(void)					\
+{									\
+    a_name##_tsd_wrapper_t *wrapper;				\
+                                    \
+    wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd);	\
+    if (wrapper == NULL)						\
+        return (false);						\
+    if (a_cleanup != malloc_tsd_no_cleanup &&			\
+        wrapper->initialized) {					\
+        a_type val = wrapper->val;				\
+        a_type tsd_static_data = a_initializer;			\
+        wrapper->initialized = false;				\
+        wrapper->val = tsd_static_data;				\
+        a_cleanup(&val);					\
+        if (wrapper->initialized) {				\
+            /* Trigger another cleanup round. */		\
+            return (true);					\
+        }							\
+    }								\
+    malloc_tsd_dalloc(wrapper);					\
+    return (false);							\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+                                    \
+    a_name##_tsd = TlsAlloc();					\
+    if (a_name##_tsd == TLS_OUT_OF_INDEXES)				\
+        return (true);						\
+    if (a_cleanup != malloc_tsd_no_cleanup) {			\
+        malloc_tsd_cleanup_register(				\
+            &a_name##_tsd_cleanup_wrapper);			\
+    }								\
+    a_name##_booted = true;						\
+    return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_name##_tsd_wrapper_t *						\
+a_name##_tsd_get_wrapper(void)						\
+{									\
+    a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)	\
+        TlsGetValue(a_name##_tsd);					\
+                                    \
+    if (wrapper == NULL) {						\
+        wrapper = (a_name##_tsd_wrapper_t *)			\
+            malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t));	\
+        if (wrapper == NULL) {					\
+            malloc_write("<jemalloc>: Error allocating"	\
+                " TSD for "#a_name"\n");			\
+            abort();					\
+        } else {						\
+            static a_type tsd_static_data = a_initializer;	\
+            wrapper->initialized = false;			\
+            wrapper->val = tsd_static_data;			\
+        }							\
+        if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) {	\
+            malloc_write("<jemalloc>: Error setting"	\
+                " TSD for "#a_name"\n");			\
+            abort();					\
+        }							\
+    }								\
+    return (wrapper);						\
+}									\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+    a_name##_tsd_wrapper_t *wrapper;				\
+                                    \
+    assert(a_name##_booted);					\
+    wrapper = a_name##_tsd_get_wrapper();				\
+    return (&wrapper->val);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+    a_name##_tsd_wrapper_t *wrapper;				\
+                                    \
+    assert(a_name##_booted);					\
+    wrapper = a_name##_tsd_get_wrapper();				\
+    wrapper->val = *(val);						\
+    if (a_cleanup != malloc_tsd_no_cleanup)				\
+        wrapper->initialized = true;				\
+}
+#else
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Data structure. */							\
+typedef struct {							\
+    bool	initialized;						\
+    a_type	val;							\
+} a_name##_tsd_wrapper_t;						\
+/* Initialization/cleanup. */						\
+a_attr void								\
+a_name##_tsd_cleanup_wrapper(void *arg)					\
+{									\
+    a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\
+                                    \
+    if (a_cleanup != malloc_tsd_no_cleanup &&			\
+        wrapper->initialized) {					\
+        wrapper->initialized = false;				\
+        a_cleanup(&wrapper->val);				\
+        if (wrapper->initialized) {				\
+            /* Trigger another cleanup round. */		\
+            if (pthread_setspecific(a_name##_tsd,		\
+                (void *)wrapper)) {				\
+                malloc_write("<jemalloc>: Error"	\
+                    " setting TSD for "#a_name"\n");	\
+                if (opt_abort)				\
+                    abort();			\
+            }						\
+            return;						\
+        }							\
+    }								\
+    malloc_tsd_dalloc(wrapper);					\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+                                    \
+    if (pthread_key_create(&a_name##_tsd,				\
+        a_name##_tsd_cleanup_wrapper) != 0)				\
+        return (true);						\
+    a_name##_booted = true;						\
+    return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_name##_tsd_wrapper_t *						\
+a_name##_tsd_get_wrapper(void)						\
+{									\
+    a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)	\
+        pthread_getspecific(a_name##_tsd);				\
+                                    \
+    if (wrapper == NULL) {						\
+        wrapper = (a_name##_tsd_wrapper_t *)			\
+            malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t));	\
+        if (wrapper == NULL) {					\
+            malloc_write("<jemalloc>: Error allocating"	\
+                " TSD for "#a_name"\n");			\
+            abort();					\
+        } else {						\
+            static a_type tsd_static_data = a_initializer;	\
+            wrapper->initialized = false;			\
+            wrapper->val = tsd_static_data;			\
+        }							\
+        if (pthread_setspecific(a_name##_tsd,			\
+            (void *)wrapper)) {					\
+            malloc_write("<jemalloc>: Error setting"	\
+                " TSD for "#a_name"\n");			\
+            abort();					\
+        }							\
+    }								\
+    return (wrapper);						\
+}									\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+    a_name##_tsd_wrapper_t *wrapper;				\
+                                    \
+    assert(a_name##_booted);					\
+    wrapper = a_name##_tsd_get_wrapper();				\
+    return (&wrapper->val);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+    a_name##_tsd_wrapper_t *wrapper;				\
+                                    \
+    assert(a_name##_booted);					\
+    wrapper = a_name##_tsd_get_wrapper();				\
+    wrapper->val = *(val);						\
+    if (a_cleanup != malloc_tsd_no_cleanup)				\
+        wrapper->initialized = true;				\
+}
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*malloc_tsd_malloc(size_t size);
+void	malloc_tsd_dalloc(void *wrapper);
+void	malloc_tsd_no_cleanup(void *);
+void	malloc_tsd_cleanup_register(bool (*f)(void));
+void	malloc_tsd_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/internal/util.h
+++ b/src/rt/jemalloc/include/jemalloc/internal/util.h
@ -0,0 +1,160 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define	MALLOC_PRINTF_BUFSIZE	4096
+
+/*
+ * Wrap a cpp argument that contains commas such that it isn't broken up into
+ * multiple arguments.
+ */
+#define JEMALLOC_CONCAT(...) __VA_ARGS__
+
+/*
+ * Silence compiler warnings due to uninitialized values.  This is used
+ * wherever the compiler fails to recognize that the variable is never used
+ * uninitialized.
+ */
+#ifdef JEMALLOC_CC_SILENCE
+#  define JEMALLOC_CC_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define	assert(e) do {							\
+    if (config_debug && !(e)) {					\
+        malloc_printf(						\
+            "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+            __FILE__, __LINE__, #e);				\
+        abort();						\
+    }								\
+} while (0)
+#endif
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#define	cassert(c) do {							\
+    if ((c) == false)						\
+        assert(false);						\
+} while (0)
+
+#ifndef not_reached
+#define	not_reached() do {						\
+    if (config_debug) {						\
+        malloc_printf(						\
+            "<jemalloc>: %s:%d: Unreachable code reached\n",	\
+            __FILE__, __LINE__);				\
+        abort();						\
+    }								\
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define	not_implemented() do {						\
+    if (config_debug) {						\
+        malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
+            __FILE__, __LINE__);				\
+        abort();						\
+    }								\
+} while (0)
+#endif
+
+#define	assert_not_implemented(e) do {					\
+    if (config_debug && !(e))					\
+        not_implemented();					\
+} while (0)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int	buferror(char *buf, size_t buflen);
+uintmax_t	malloc_strtoumax(const char *nptr, char **endptr, int base);
+void	malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+int	malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
+void	malloc_write(const char *s);
+void	set_errno(int errnum);
+int	get_errno(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+    x--;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+    x |= x >> 32;
+#endif
+    x++;
+    return (x);
+}
+
+/* Sets error code */
+JEMALLOC_INLINE void
+set_errno(int errnum)
+{
+
+#ifdef _WIN32
+    SetLastError(errnum);
+#else
+    errno = errnum;
+#endif
+}
+
+/* Get last error code */
+JEMALLOC_INLINE int
+get_errno(void)
+{
+
+#ifdef _WIN32
+    return (GetLastError());
+#else
+    return (errno);
+#endif
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
--- a/src/rt/jemalloc/include/jemalloc/jemalloc.h.in
+++ b/src/rt/jemalloc/include/jemalloc/jemalloc.h.in
@ -0,0 +1,157 @@
+#ifndef JEMALLOC_H_
+#define	JEMALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <strings.h>
+
+#define	JEMALLOC_VERSION "@jemalloc_version@"
+#define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+
+#include "jemalloc_defs@install_suffix@.h"
+
+#ifdef JEMALLOC_EXPERIMENTAL
+#define	ALLOCM_LG_ALIGN(la)	(la)
+#if LG_SIZEOF_PTR == 2
+#define	ALLOCM_ALIGN(a)	(ffs(a)-1)
+#else
+#define	ALLOCM_ALIGN(a)	((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+#endif
+#define	ALLOCM_ZERO	((int)0x40)
+#define	ALLOCM_NO_MOVE	((int)0x80)
+/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */
+#define	ALLOCM_ARENA(a)	((int)(((a)+1) << 8))
+
+#define	ALLOCM_SUCCESS		0
+#define	ALLOCM_ERR_OOM		1
+#define	ALLOCM_ERR_NOT_MOVED	2
+#endif
+
+/*
+ * The je_ prefix on the following public symbol declarations is an artifact of
+ * namespace management, and should be omitted in application code unless
+ * JEMALLOC_NO_DEMANGLE is defined (see below).
+ */
+extern JEMALLOC_EXPORT const char	*je_malloc_conf;
+extern JEMALLOC_EXPORT void		(*je_malloc_message)(void *cbopaque,
+    const char *s);
+
+JEMALLOC_EXPORT void	*je_malloc(size_t size) JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_calloc(size_t num, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT int	je_posix_memalign(void **memptr, size_t alignment,
+    size_t size) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT void	*je_aligned_alloc(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_realloc(void *ptr, size_t size);
+JEMALLOC_EXPORT void	je_free(void *ptr);
+
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+JEMALLOC_EXPORT void *	je_memalign(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+JEMALLOC_EXPORT void *	je_valloc(size_t size) JEMALLOC_ATTR(malloc);
+#endif
+
+JEMALLOC_EXPORT size_t	je_malloc_usable_size(
+    JEMALLOC_USABLE_SIZE_CONST void *ptr);
+JEMALLOC_EXPORT void	je_malloc_stats_print(void (*write_cb)(void *,
+    const char *), void *je_cbopaque, const char *opts);
+JEMALLOC_EXPORT int	je_mallctl(const char *name, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
+JEMALLOC_EXPORT int	je_mallctlnametomib(const char *name, size_t *mibp,
+    size_t *miblenp);
+JEMALLOC_EXPORT int	je_mallctlbymib(const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+
+#ifdef JEMALLOC_EXPERIMENTAL
+JEMALLOC_EXPORT int	je_allocm(void **ptr, size_t *rsize, size_t size,
+    int flags) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_rallocm(void **ptr, size_t *rsize, size_t size,
+    size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_sallocm(const void *ptr, size_t *rsize, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_dallocm(void *ptr, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_nallocm(size_t *rsize, size_t size, int flags);
+#endif
+
+/*
+ * By default application code must explicitly refer to mangled symbol names,
+ * so that it is possible to use jemalloc in conjunction with another allocator
+ * in the same application.  Define JEMALLOC_MANGLE in order to cause automatic
+ * name mangling that matches the API prefixing that happened as a result of
+ * --with-mangling and/or --with-jemalloc-prefix configuration settings.
+ */
+#ifdef JEMALLOC_MANGLE
+#ifndef JEMALLOC_NO_DEMANGLE
+#define	JEMALLOC_NO_DEMANGLE
+#endif
+#define	malloc_conf je_malloc_conf
+#define	malloc_message je_malloc_message
+#define	malloc je_malloc
+#define	calloc je_calloc
+#define	posix_memalign je_posix_memalign
+#define	aligned_alloc je_aligned_alloc
+#define	realloc je_realloc
+#define	free je_free
+#define	malloc_usable_size je_malloc_usable_size
+#define	malloc_stats_print je_malloc_stats_print
+#define	mallctl je_mallctl
+#define	mallctlnametomib je_mallctlnametomib
+#define	mallctlbymib je_mallctlbymib
+#define	memalign je_memalign
+#define	valloc je_valloc
+#ifdef JEMALLOC_EXPERIMENTAL
+#define	allocm je_allocm
+#define	rallocm je_rallocm
+#define	sallocm je_sallocm
+#define	dallocm je_dallocm
+#define	nallocm je_nallocm
+#endif
+#endif
+
+/*
+ * The je_* macros can be used as stable alternative names for the public
+ * jemalloc API if JEMALLOC_NO_DEMANGLE is defined.  This is primarily meant
+ * for use in jemalloc itself, but it can be used by application code to
+ * provide isolation from the name mangling specified via --with-mangling
+ * and/or --with-jemalloc-prefix.
+ */
+#ifndef JEMALLOC_NO_DEMANGLE
+#undef je_malloc_conf
+#undef je_malloc_message
+#undef je_malloc
+#undef je_calloc
+#undef je_posix_memalign
+#undef je_aligned_alloc
+#undef je_realloc
+#undef je_free
+#undef je_malloc_usable_size
+#undef je_malloc_stats_print
+#undef je_mallctl
+#undef je_mallctlnametomib
+#undef je_mallctlbymib
+#undef je_memalign
+#undef je_valloc
+#ifdef JEMALLOC_EXPERIMENTAL
+#undef je_allocm
+#undef je_rallocm
+#undef je_sallocm
+#undef je_dallocm
+#undef je_nallocm
+#endif
+#endif
+
+#ifdef __cplusplus
+};
+#endif
+#endif /* JEMALLOC_H_ */
--- a/src/rt/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/src/rt/jemalloc/include/jemalloc/jemalloc_defs.h.in
@ -0,0 +1,267 @@
+/*
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
+ */
+#undef JEMALLOC_PREFIX
+#undef JEMALLOC_CPREFIX
+
+/*
+ * Name mangling for public symbols is controlled by --with-mangling and
+ * --with-jemalloc-prefix.  With default settings the je_ prefix is stripped by
+ * these macro definitions.
+ */
+#undef je_malloc_conf
+#undef je_malloc_message
+#undef je_malloc
+#undef je_calloc
+#undef je_posix_memalign
+#undef je_aligned_alloc
+#undef je_realloc
+#undef je_free
+#undef je_malloc_usable_size
+#undef je_malloc_stats_print
+#undef je_mallctl
+#undef je_mallctlnametomib
+#undef je_mallctlbymib
+#undef je_memalign
+#undef je_valloc
+#undef je_allocm
+#undef je_rallocm
+#undef je_sallocm
+#undef je_dallocm
+#undef je_nallocm
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#undef JEMALLOC_PRIVATE_NAMESPACE
+#undef JEMALLOC_N
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#undef CPU_SPINWAIT
+
+/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
+#undef JEMALLOC_ATOMIC9
+
+/*
+ * Defined if OSAtomic*() functions are available, as provided by Darwin, and
+ * documented in the atomic(3) manual page.
+ */
+#undef JEMALLOC_OSATOMIC
+
+/*
+ * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
+ * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
+ * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
+ * functions are defined in libgcc instead of being inlines)
+ */
+#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
+
+/*
+ * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
+ * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
+ * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
+ * functions are defined in libgcc instead of being inlines)
+ */
+#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
+
+/*
+ * Defined if OSSpin*() functions are available, as provided by Darwin, and
+ * documented in the spinlock(3) manual page.
+ */
+#undef JEMALLOC_OSSPIN
+
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+#undef JEMALLOC_MALLOC_THREAD_CLEANUP
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+#undef JEMALLOC_THREADED_INIT
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+#undef JEMALLOC_MUTEX_INIT_CB
+
+/* Defined if __attribute__((...)) syntax is supported. */
+#undef JEMALLOC_HAVE_ATTR
+#ifdef JEMALLOC_HAVE_ATTR
+#  define JEMALLOC_ATTR(s) __attribute__((s))
+#  define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+#  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
+#  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+#  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+#elif _MSC_VER
+#  define JEMALLOC_ATTR(s)
+#  ifdef DLLEXPORT
+#    define JEMALLOC_EXPORT __declspec(dllexport)
+#  else
+#    define JEMALLOC_EXPORT __declspec(dllimport)
+#  endif
+#  define JEMALLOC_ALIGNED(s) __declspec(align(s))
+#  define JEMALLOC_SECTION(s) __declspec(allocate(s))
+#  define JEMALLOC_NOINLINE __declspec(noinline)
+#else
+#  define JEMALLOC_ATTR(s)
+#  define JEMALLOC_EXPORT
+#  define JEMALLOC_ALIGNED(s)
+#  define JEMALLOC_SECTION(s)
+#  define JEMALLOC_NOINLINE
+#endif
+
+/* Defined if sbrk() is supported. */
+#undef JEMALLOC_HAVE_SBRK
+
+/* Non-empty if the tls_model attribute is supported. */
+#undef JEMALLOC_TLS_MODEL
+
+/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
+#undef JEMALLOC_CC_SILENCE
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+#undef JEMALLOC_DEBUG
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#undef JEMALLOC_STATS
+
+/* JEMALLOC_PROF enables allocation profiling. */
+#undef JEMALLOC_PROF
+
+/* Use libunwind for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBUNWIND
+
+/* Use libgcc for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBGCC
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+#undef JEMALLOC_PROF_GCC
+
+/*
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
+ */
+#undef JEMALLOC_TCACHE
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * segment (DSS).
+ */
+#undef JEMALLOC_DSS
+
+/* Support memory filling (junk/zero/quarantine/redzone). */
+#undef JEMALLOC_FILL
+
+/* Support the experimental API. */
+#undef JEMALLOC_EXPERIMENTAL
+
+/* Support utrace(2)-based tracing. */
+#undef JEMALLOC_UTRACE
+
+/* Support Valgrind. */
+#undef JEMALLOC_VALGRIND
+
+/* Support optional abort() on OOM. */
+#undef JEMALLOC_XMALLOC
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#undef JEMALLOC_LAZY_LOCK
+
+/* One page is 2^STATIC_PAGE_SHIFT bytes. */
+#undef STATIC_PAGE_SHIFT
+
+/*
+ * If defined, use munmap() to unmap freed chunks, rather than storing them for
+ * later reuse.  This is disabled by default on Linux because common sequences
+ * of mmap()/munmap() calls will cause virtual memory map holes.
+ */
+#undef JEMALLOC_MUNMAP
+
+/*
+ * If defined, use mremap(...MREMAP_FIXED...) for huge realloc().  This is
+ * disabled by default because it is Linux-specific and it will cause virtual
+ * memory map holes, much like munmap(2) does.
+ */
+#undef JEMALLOC_MREMAP
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#undef JEMALLOC_TLS
+
+/*
+ * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
+ * within jemalloc-owned chunks before dereferencing them.
+ */
+#undef JEMALLOC_IVSALLOC
+
+/*
+ * Define overrides for non-standard allocator-related functions if they
+ * are present on the system.
+ */
+#undef JEMALLOC_OVERRIDE_MEMALIGN
+#undef JEMALLOC_OVERRIDE_VALLOC
+
+/*
+ * At least Linux omits the "const" in:
+ *
+ *   size_t malloc_usable_size(const void *ptr);
+ *
+ * Match the operating system's prototype.
+ */
+#undef JEMALLOC_USABLE_SIZE_CONST
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#undef JEMALLOC_ZONE
+#undef JEMALLOC_ZONE_VERSION
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched.
+ *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
+ *                             unused, such that they will be discarded rather
+ *                             than swapped out.
+ */
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_FREE
+
+/*
+ * Define if operating system has alloca.h header.
+ */
+#undef JEMALLOC_HAS_ALLOCA_H
+
+/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
+#undef LG_SIZEOF_PTR
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#undef LG_SIZEOF_INT
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#undef LG_SIZEOF_LONG
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#undef LG_SIZEOF_INTMAX_T
--- a/src/rt/jemalloc/include/msvc_compat/inttypes.h
+++ b/src/rt/jemalloc/include/msvc_compat/inttypes.h
@ -0,0 +1,313 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
+//  Copyright (c) 2006 Alexander Chemeris
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+//
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
+
+#ifdef _WIN64
+#  define __PRI64_PREFIX        "l"
+#  define __PRIPTR_PREFIX       "l"
+#else
+#  define __PRI64_PREFIX        "ll"
+#  define __PRIPTR_PREFIX
+#endif
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "d"
+#define PRIi32       "i"
+#define PRIdLEAST32  "d"
+#define PRIiLEAST32  "i"
+#define PRIdFAST32   "d"
+#define PRIiFAST32   "i"
+
+#define PRId64       __PRI64_PREFIX "d"
+#define PRIi64       __PRI64_PREFIX "i"
+#define PRIdLEAST64  __PRI64_PREFIX "d"
+#define PRIiLEAST64  __PRI64_PREFIX "i"
+#define PRIdFAST64   __PRI64_PREFIX "d"
+#define PRIiFAST64   __PRI64_PREFIX "i"
+
+#define PRIdMAX     __PRI64_PREFIX "d"
+#define PRIiMAX     __PRI64_PREFIX "i"
+
+#define PRIdPTR     __PRIPTR_PREFIX "d"
+#define PRIiPTR     __PRIPTR_PREFIX "i"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "o"
+#define PRIu32       "u"
+#define PRIx32       "x"
+#define PRIX32       "X"
+#define PRIoLEAST32  "o"
+#define PRIuLEAST32  "u"
+#define PRIxLEAST32  "x"
+#define PRIXLEAST32  "X"
+#define PRIoFAST32   "o"
+#define PRIuFAST32   "u"
+#define PRIxFAST32   "x"
+#define PRIXFAST32   "X"
+
+#define PRIo64       __PRI64_PREFIX "o"
+#define PRIu64       __PRI64_PREFIX "u"
+#define PRIx64       __PRI64_PREFIX "x"
+#define PRIX64       __PRI64_PREFIX "X"
+#define PRIoLEAST64  __PRI64_PREFIX "o"
+#define PRIuLEAST64  __PRI64_PREFIX "u"
+#define PRIxLEAST64  __PRI64_PREFIX "x"
+#define PRIXLEAST64  __PRI64_PREFIX "X"
+#define PRIoFAST64   __PRI64_PREFIX "o"
+#define PRIuFAST64   __PRI64_PREFIX "u"
+#define PRIxFAST64   __PRI64_PREFIX "x"
+#define PRIXFAST64   __PRI64_PREFIX "X"
+
+#define PRIoMAX     __PRI64_PREFIX "o"
+#define PRIuMAX     __PRI64_PREFIX "u"
+#define PRIxMAX     __PRI64_PREFIX "x"
+#define PRIXMAX     __PRI64_PREFIX "X"
+
+#define PRIoPTR     __PRIPTR_PREFIX "o"
+#define PRIuPTR     __PRIPTR_PREFIX "u"
+#define PRIxPTR     __PRIPTR_PREFIX "x"
+#define PRIXPTR     __PRIPTR_PREFIX "X"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+#endif // __STDC_FORMAT_MACROS ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
--- a/src/rt/jemalloc/include/msvc_compat/stdbool.h
+++ b/src/rt/jemalloc/include/msvc_compat/stdbool.h
@ -0,0 +1,16 @@
+#ifndef stdbool_h
+#define stdbool_h
+
+#include <wtypes.h>
+
+/* MSVC doesn't define _Bool or bool in C, but does have BOOL */
+/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */
+typedef BOOL _Bool;
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#define __bool_true_false_are_defined 1
+
+#endif /* stdbool_h */
--- a/src/rt/jemalloc/include/msvc_compat/stdint.h
+++ b/src/rt/jemalloc/include/msvc_compat/stdint.h
@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
+//  Copyright (c) 2006-2008 Alexander Chemeris
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+//
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
--- a/src/rt/jemalloc/include/msvc_compat/strings.h
+++ b/src/rt/jemalloc/include/msvc_compat/strings.h
@ -0,0 +1,23 @@
+#ifndef strings_h
+#define strings_h
+
+/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
+ * for both */
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+static __forceinline int ffsl(long x)
+{
+    unsigned long i;
+
+    if (_BitScanForward(&i, x))
+        return (i + 1);
+    return (0);
+}
+
+static __forceinline int ffs(int x)
+{
+
+    return (ffsl(x));
+}
+
+#endif
--- a/src/rt/jemalloc/install-sh
+++ b/src/rt/jemalloc/install-sh
@ -0,0 +1,250 @@
+#! /bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission.  M.I.T. makes no representations about the
+# suitability of this software for any purpose.  It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+	-c) instcmd="$cpprog"
+	    shift
+	    continue;;
+
+	-d) dir_arg=true
+	    shift
+	    continue;;
+
+	-m) chmodcmd="$chmodprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-o) chowncmd="$chownprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-g) chgrpcmd="$chgrpprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-s) stripcmd="$stripprog"
+	    shift
+	    continue;;
+
+	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
+	    shift
+	    continue;;
+
+	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+	    shift
+	    continue;;
+
+	*)  if [ x"$src" = x ]
+	    then
+		src=$1
+	    else
+		# this colon is to work around a 386BSD /bin/sh bug
+		:
+		dst=$1
+	    fi
+	    shift
+	    continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+	echo "install:	no input file specified"
+	exit 1
+else
+	true
+fi
+
+if [ x"$dir_arg" != x ]; then
+	dst=$src
+	src=""
+
+	if [ -d $dst ]; then
+		instcmd=:
+	else
+		instcmd=mkdir
+	fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad
+# if $src (and thus $dsttmp) contains '*'.
+
+	if [ -f $src -o -d $src ]
+	then
+		true
+	else
+		echo "install:  $src does not exist"
+		exit 1
+	fi
+
+	if [ x"$dst" = x ]
+	then
+		echo "install:	no destination specified"
+		exit 1
+	else
+		true
+	fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+	if [ -d $dst ]
+	then
+		dst="$dst"/`basename $src`
+	else
+		true
+	fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+	pathcomp="${pathcomp}${1}"
+	shift
+
+	if [ ! -d "${pathcomp}" ] ;
+        then
+		$mkdirprog "${pathcomp}"
+	else
+		true
+	fi
+
+	pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+	$doit $instcmd $dst &&
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+	if [ x"$transformarg" = x ]
+	then
+		dstfile=`basename $dst`
+	else
+		dstfile=`basename $dst $transformbasename |
+			sed $transformarg`$transformbasename
+	fi
+
+# don't allow the sed command to completely eliminate the filename
+
+	if [ x"$dstfile" = x ]
+	then
+		dstfile=`basename $dst`
+	else
+		true
+	fi
+
+# Make a temp file name in the proper directory.
+
+	dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+	$doit $instcmd $src $dsttmp &&
+
+	trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+	$doit $rmcmd -f $dstdir/$dstfile &&
+	$doit $mvcmd $dsttmp $dstdir/$dstfile
+
+fi &&
+
+
+exit 0
--- a/src/rt/jemalloc/src/arena.c
+++ b/src/rt/jemalloc/src/arena.c
--- a/src/rt/jemalloc/src/atomic.c
+++ b/src/rt/jemalloc/src/atomic.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_ATOMIC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/src/rt/jemalloc/src/base.c
+++ b/src/rt/jemalloc/src/base.c
@ -0,0 +1,142 @@
+#define	JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_mutex_t	base_mtx;
+
+/*
+ * Current pages that are being used for internal memory allocations.  These
+ * pages are carved up in cacheline-size quanta, so that there is no chance of
+ * false cache line sharing.
+ */
+static void		*base_pages;
+static void		*base_next_addr;
+static void		*base_past_addr; /* Addr immediately past base_pages. */
+static extent_node_t	*base_nodes;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	base_pages_alloc(size_t minsize);
+
+/******************************************************************************/
+
+static bool
+base_pages_alloc(size_t minsize)
+{
+	size_t csize;
+	bool zero;
+
+	assert(minsize != 0);
+	csize = CHUNK_CEILING(minsize);
+	zero = false;
+	base_pages = chunk_alloc(csize, chunksize, true, &zero,
+	    chunk_dss_prec_get());
+	if (base_pages == NULL)
+		return (true);
+	base_next_addr = base_pages;
+	base_past_addr = (void *)((uintptr_t)base_pages + csize);
+
+	return (false);
+}
+
+void *
+base_alloc(size_t size)
+{
+	void *ret;
+	size_t csize;
+
+	/* Round size up to nearest multiple of the cacheline size. */
+	csize = CACHELINE_CEILING(size);
+
+	malloc_mutex_lock(&base_mtx);
+	/* Make sure there's enough space for the allocation. */
+	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
+		if (base_pages_alloc(csize)) {
+			malloc_mutex_unlock(&base_mtx);
+			return (NULL);
+		}
+	}
+	/* Allocate. */
+	ret = base_next_addr;
+	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
+	malloc_mutex_unlock(&base_mtx);
+	VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
+
+	return (ret);
+}
+
+void *
+base_calloc(size_t number, size_t size)
+{
+	void *ret = base_alloc(number * size);
+
+	if (ret != NULL)
+		memset(ret, 0, number * size);
+
+	return (ret);
+}
+
+extent_node_t *
+base_node_alloc(void)
+{
+	extent_node_t *ret;
+
+	malloc_mutex_lock(&base_mtx);
+	if (base_nodes != NULL) {
+		ret = base_nodes;
+		base_nodes = *(extent_node_t **)ret;
+		malloc_mutex_unlock(&base_mtx);
+		VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t));
+	} else {
+		malloc_mutex_unlock(&base_mtx);
+		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+	}
+
+	return (ret);
+}
+
+void
+base_node_dealloc(extent_node_t *node)
+{
+
+	VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
+	malloc_mutex_lock(&base_mtx);
+	*(extent_node_t **)node = base_nodes;
+	base_nodes = node;
+	malloc_mutex_unlock(&base_mtx);
+}
+
+bool
+base_boot(void)
+{
+
+	base_nodes = NULL;
+	if (malloc_mutex_init(&base_mtx))
+		return (true);
+
+	return (false);
+}
+
+void
+base_prefork(void)
+{
+
+	malloc_mutex_prefork(&base_mtx);
+}
+
+void
+base_postfork_parent(void)
+{
+
+	malloc_mutex_postfork_parent(&base_mtx);
+}
+
+void
+base_postfork_child(void)
+{
+
+	malloc_mutex_postfork_child(&base_mtx);
+}
--- a/src/rt/jemalloc/src/bitmap.c
+++ b/src/rt/jemalloc/src/bitmap.c
@ -0,0 +1,90 @@
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	bits2groups(size_t nbits);
+
+/******************************************************************************/
+
+static size_t
+bits2groups(size_t nbits)
+{
+
+	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
+	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
+}
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+	unsigned i;
+	size_t group_count;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	/*
+	 * Compute the number of groups necessary to store nbits bits, and
+	 * progressively work upward through the levels until reaching a level
+	 * that requires only one group.
+	 */
+	binfo->levels[0].group_offset = 0;
+	group_count = bits2groups(nbits);
+	for (i = 1; group_count > 1; i++) {
+		assert(i < BITMAP_MAX_LEVELS);
+		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+		    + group_count;
+		group_count = bits2groups(group_count);
+	}
+	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	    + group_count;
+	binfo->nlevels = i;
+	binfo->nbits = nbits;
+}
+
+size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
+}
+
+size_t
+bitmap_size(size_t nbits)
+{
+	bitmap_info_t binfo;
+
+	bitmap_info_init(&binfo, nbits);
+	return (bitmap_info_ngroups(&binfo));
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t extra;
+	unsigned i;
+
+	/*
+	 * Bits are actually inverted with regard to the external bitmap
+	 * interface, so the bitmap starts out with all 1 bits, except for
+	 * trailing unused bits (if any).  Note that each group uses bit 0 to
+	 * correspond to the first logical bit in the group, so extra bits
+	 * are the most significant bits of the last group.
+	 */
+	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
+	    LG_SIZEOF_BITMAP);
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
+	if (extra != 0)
+		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	for (i = 1; i < binfo->nlevels; i++) {
+		size_t group_count = binfo->levels[i].group_offset -
+		    binfo->levels[i-1].group_offset;
+		extra = (BITMAP_GROUP_NBITS - (group_count &
+		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		if (extra != 0)
+			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+	}
+}
--- a/src/rt/jemalloc/src/chunk.c
+++ b/src/rt/jemalloc/src/chunk.c
@ -0,0 +1,385 @@
+#define	JEMALLOC_CHUNK_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+const char	*opt_dss = DSS_DEFAULT;
+size_t		opt_lg_chunk = LG_CHUNK_DEFAULT;
+
+malloc_mutex_t	chunks_mtx;
+chunk_stats_t	stats_chunks;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering).  These are used when allocating chunks, in an attempt to re-use
+ * address space.  Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t	chunks_szad_mmap;
+static extent_tree_t	chunks_ad_mmap;
+static extent_tree_t	chunks_szad_dss;
+static extent_tree_t	chunks_ad_dss;
+
+rtree_t		*chunks_rtree;
+
+/* Various chunk-related settings. */
+size_t		chunksize;
+size_t		chunksize_mask; /* (chunksize - 1). */
+size_t		chunk_npages;
+size_t		map_bias;
+size_t		arena_maxclass; /* Max size class for arenas. */
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*chunk_recycle(extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base,
+    bool *zero);
+static void	chunk_record(extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
+    size_t alignment, bool base, bool *zero)
+{
+	void *ret;
+	extent_node_t *node;
+	extent_node_t key;
+	size_t alloc_size, leadsize, trailsize;
+	bool zeroed;
+
+	if (base) {
+		/*
+		 * This function may need to call base_node_{,de}alloc(), but
+		 * the current chunk allocation request is on behalf of the
+		 * base allocator.  Avoid deadlock (and if that weren't an
+		 * issue, potential for infinite recursion) by returning NULL.
+		 */
+		return (NULL);
+	}
+
+	alloc_size = size + alignment - chunksize;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size)
+		return (NULL);
+	key.addr = NULL;
+	key.size = alloc_size;
+	malloc_mutex_lock(&chunks_mtx);
+	node = extent_tree_szad_nsearch(chunks_szad, &key);
+	if (node == NULL) {
+		malloc_mutex_unlock(&chunks_mtx);
+		return (NULL);
+	}
+	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
+	    (uintptr_t)node->addr;
+	assert(node->size >= leadsize + size);
+	trailsize = node->size - leadsize - size;
+	ret = (void *)((uintptr_t)node->addr + leadsize);
+	zeroed = node->zeroed;
+	if (zeroed)
+	    *zero = true;
+	/* Remove node from the tree. */
+	extent_tree_szad_remove(chunks_szad, node);
+	extent_tree_ad_remove(chunks_ad, node);
+	if (leadsize != 0) {
+		/* Insert the leading space as a smaller chunk. */
+		node->size = leadsize;
+		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_ad_insert(chunks_ad, node);
+		node = NULL;
+	}
+	if (trailsize != 0) {
+		/* Insert the trailing space as a smaller chunk. */
+		if (node == NULL) {
+			/*
+			 * An additional node is required, but
+			 * base_node_alloc() can cause a new base chunk to be
+			 * allocated.  Drop chunks_mtx in order to avoid
+			 * deadlock, and if node allocation fails, deallocate
+			 * the result before returning an error.
+			 */
+			malloc_mutex_unlock(&chunks_mtx);
+			node = base_node_alloc();
+			if (node == NULL) {
+				chunk_dealloc(ret, size, true);
+				return (NULL);
+			}
+			malloc_mutex_lock(&chunks_mtx);
+		}
+		node->addr = (void *)((uintptr_t)(ret) + size);
+		node->size = trailsize;
+		node->zeroed = zeroed;
+		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_ad_insert(chunks_ad, node);
+		node = NULL;
+	}
+	malloc_mutex_unlock(&chunks_mtx);
+
+	if (node != NULL)
+		base_node_dealloc(node);
+	if (*zero) {
+		if (zeroed == false)
+			memset(ret, 0, size);
+		else if (config_debug) {
+			size_t i;
+			size_t *p = (size_t *)(uintptr_t)ret;
+
+			VALGRIND_MAKE_MEM_DEFINED(ret, size);
+			for (i = 0; i < size / sizeof(size_t); i++)
+				assert(p[i] == 0);
+		}
+	}
+	return (ret);
+}
+
+/*
+ * If the caller specifies (*zero == false), it is still possible to receive
+ * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
+ * takes advantage of this to avoid demanding zeroed chunks, but taking
+ * advantage of them if they are returned.
+ */
+void *
+chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
+    dss_prec_t dss_prec)
+{
+	void *ret;
+
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
+
+	/* "primary" dss. */
+	if (config_dss && dss_prec == dss_prec_primary) {
+		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
+		    alignment, base, zero)) != NULL)
+			goto label_return;
+		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
+			goto label_return;
+	}
+	/* mmap. */
+	if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
+	    alignment, base, zero)) != NULL)
+		goto label_return;
+	if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
+		goto label_return;
+	/* "secondary" dss. */
+	if (config_dss && dss_prec == dss_prec_secondary) {
+		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
+		    alignment, base, zero)) != NULL)
+			goto label_return;
+		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
+			goto label_return;
+	}
+
+	/* All strategies for allocation failed. */
+	ret = NULL;
+label_return:
+	if (ret != NULL) {
+		if (config_ivsalloc && base == false) {
+			if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+				chunk_dealloc(ret, size, true);
+				return (NULL);
+			}
+		}
+		if (config_stats || config_prof) {
+			bool gdump;
+			malloc_mutex_lock(&chunks_mtx);
+			if (config_stats)
+				stats_chunks.nchunks += (size / chunksize);
+			stats_chunks.curchunks += (size / chunksize);
+			if (stats_chunks.curchunks > stats_chunks.highchunks) {
+				stats_chunks.highchunks =
+				    stats_chunks.curchunks;
+				if (config_prof)
+					gdump = true;
+			} else if (config_prof)
+				gdump = false;
+			malloc_mutex_unlock(&chunks_mtx);
+			if (config_prof && opt_prof && opt_prof_gdump && gdump)
+				prof_gdump();
+		}
+		if (config_valgrind)
+			VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+	}
+	assert(CHUNK_ADDR2BASE(ret) == ret);
+	return (ret);
+}
+
+static void
+chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
+    size_t size)
+{
+	bool unzeroed;
+	extent_node_t *xnode, *node, *prev, key;
+
+	unzeroed = pages_purge(chunk, size);
+	VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
+
+	/*
+	 * Allocate a node before acquiring chunks_mtx even though it might not
+	 * be needed, because base_node_alloc() may cause a new base chunk to
+	 * be allocated, which could cause deadlock if chunks_mtx were already
+	 * held.
+	 */
+	xnode = base_node_alloc();
+
+	malloc_mutex_lock(&chunks_mtx);
+	key.addr = (void *)((uintptr_t)chunk + size);
+	node = extent_tree_ad_nsearch(chunks_ad, &key);
+	/* Try to coalesce forward. */
+	if (node != NULL && node->addr == key.addr) {
+		/*
+		 * Coalesce chunk with the following address range.  This does
+		 * not change the position within chunks_ad, so only
+		 * remove/insert from/into chunks_szad.
+		 */
+		extent_tree_szad_remove(chunks_szad, node);
+		node->addr = chunk;
+		node->size += size;
+		node->zeroed = (node->zeroed && (unzeroed == false));
+		extent_tree_szad_insert(chunks_szad, node);
+		if (xnode != NULL)
+			base_node_dealloc(xnode);
+	} else {
+		/* Coalescing forward failed, so insert a new node. */
+		if (xnode == NULL) {
+			/*
+			 * base_node_alloc() failed, which is an exceedingly
+			 * unlikely failure.  Leak chunk; its pages have
+			 * already been purged, so this is only a virtual
+			 * memory leak.
+			 */
+			malloc_mutex_unlock(&chunks_mtx);
+			return;
+		}
+		node = xnode;
+		node->addr = chunk;
+		node->size = size;
+		node->zeroed = (unzeroed == false);
+		extent_tree_ad_insert(chunks_ad, node);
+		extent_tree_szad_insert(chunks_szad, node);
+	}
+
+	/* Try to coalesce backward. */
+	prev = extent_tree_ad_prev(chunks_ad, node);
+	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+	    chunk) {
+		/*
+		 * Coalesce chunk with the previous address range.  This does
+		 * not change the position within chunks_ad, so only
+		 * remove/insert node from/into chunks_szad.
+		 */
+		extent_tree_szad_remove(chunks_szad, prev);
+		extent_tree_ad_remove(chunks_ad, prev);
+
+		extent_tree_szad_remove(chunks_szad, node);
+		node->addr = prev->addr;
+		node->size += prev->size;
+		node->zeroed = (node->zeroed && prev->zeroed);
+		extent_tree_szad_insert(chunks_szad, node);
+
+		base_node_dealloc(prev);
+	}
+	malloc_mutex_unlock(&chunks_mtx);
+}
+
+void
+chunk_unmap(void *chunk, size_t size)
+{
+	assert(chunk != NULL);
+	assert(CHUNK_ADDR2BASE(chunk) == chunk);
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+	if (config_dss && chunk_in_dss(chunk))
+		chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
+	else if (chunk_dealloc_mmap(chunk, size))
+		chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
+}
+
+void
+chunk_dealloc(void *chunk, size_t size, bool unmap)
+{
+
+	assert(chunk != NULL);
+	assert(CHUNK_ADDR2BASE(chunk) == chunk);
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+	if (config_ivsalloc)
+		rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+	if (config_stats || config_prof) {
+		malloc_mutex_lock(&chunks_mtx);
+		assert(stats_chunks.curchunks >= (size / chunksize));
+		stats_chunks.curchunks -= (size / chunksize);
+		malloc_mutex_unlock(&chunks_mtx);
+	}
+
+	if (unmap)
+		chunk_unmap(chunk, size);
+}
+
+bool
+chunk_boot(void)
+{
+
+	/* Set variables according to the value of opt_lg_chunk. */
+	chunksize = (ZU(1) << opt_lg_chunk);
+	assert(chunksize >= PAGE);
+	chunksize_mask = chunksize - 1;
+	chunk_npages = (chunksize >> LG_PAGE);
+
+	if (config_stats || config_prof) {
+		if (malloc_mutex_init(&chunks_mtx))
+			return (true);
+		memset(&stats_chunks, 0, sizeof(chunk_stats_t));
+	}
+	if (config_dss && chunk_dss_boot())
+		return (true);
+	extent_tree_szad_new(&chunks_szad_mmap);
+	extent_tree_ad_new(&chunks_ad_mmap);
+	extent_tree_szad_new(&chunks_szad_dss);
+	extent_tree_ad_new(&chunks_ad_dss);
+	if (config_ivsalloc) {
+		chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
+		    opt_lg_chunk);
+		if (chunks_rtree == NULL)
+			return (true);
+	}
+
+	return (false);
+}
+
+void
+chunk_prefork(void)
+{
+
+	malloc_mutex_lock(&chunks_mtx);
+	if (config_ivsalloc)
+		rtree_prefork(chunks_rtree);
+	chunk_dss_prefork();
+}
+
+void
+chunk_postfork_parent(void)
+{
+
+	chunk_dss_postfork_parent();
+	if (config_ivsalloc)
+		rtree_postfork_parent(chunks_rtree);
+	malloc_mutex_postfork_parent(&chunks_mtx);
+}
+
+void
+chunk_postfork_child(void)
+{
+
+	chunk_dss_postfork_child();
+	if (config_ivsalloc)
+		rtree_postfork_child(chunks_rtree);
+	malloc_mutex_postfork_child(&chunks_mtx);
+}
--- a/src/rt/jemalloc/src/chunk_dss.c
+++ b/src/rt/jemalloc/src/chunk_dss.c
@ -0,0 +1,197 @@
+#define	JEMALLOC_CHUNK_DSS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+/******************************************************************************/
+/* Data. */
+
+const char	*dss_prec_names[] = {
+	"disabled",
+	"primary",
+	"secondary",
+	"N/A"
+};
+
+/* Current dss precedence default, used when creating new arenas. */
+static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
+
+/*
+ * Protects sbrk() calls.  This avoids malloc races among threads, though it
+ * does not protect against races with threads that call sbrk() directly.
+ */
+static malloc_mutex_t	dss_mtx;
+
+/* Base address of the DSS. */
+static void		*dss_base;
+/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
+static void		*dss_prev;
+/* Current upper limit on DSS addresses. */
+static void		*dss_max;
+
+/******************************************************************************/
+
+#ifndef JEMALLOC_HAVE_SBRK
+static void *
+sbrk(intptr_t increment)
+{
+
+	not_implemented();
+
+	return (NULL);
+}
+#endif
+
+dss_prec_t
+chunk_dss_prec_get(void)
+{
+	dss_prec_t ret;
+
+	if (config_dss == false)
+		return (dss_prec_disabled);
+	malloc_mutex_lock(&dss_mtx);
+	ret = dss_prec_default;
+	malloc_mutex_unlock(&dss_mtx);
+	return (ret);
+}
+
+bool
+chunk_dss_prec_set(dss_prec_t dss_prec)
+{
+
+	if (config_dss == false)
+		return (true);
+	malloc_mutex_lock(&dss_mtx);
+	dss_prec_default = dss_prec;
+	malloc_mutex_unlock(&dss_mtx);
+	return (false);
+}
+
+void *
+chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
+{
+	void *ret;
+
+	cassert(config_dss);
+	assert(size > 0 && (size & chunksize_mask) == 0);
+	assert(alignment > 0 && (alignment & chunksize_mask) == 0);
+
+	/*
+	 * sbrk() uses a signed increment argument, so take care not to
+	 * interpret a huge allocation request as a negative increment.
+	 */
+	if ((intptr_t)size < 0)
+		return (NULL);
+
+	malloc_mutex_lock(&dss_mtx);
+	if (dss_prev != (void *)-1) {
+		size_t gap_size, cpad_size;
+		void *cpad, *dss_next;
+		intptr_t incr;
+
+		/*
+		 * The loop is necessary to recover from races with other
+		 * threads that are using the DSS for something other than
+		 * malloc.
+		 */
+		do {
+			/* Get the current end of the DSS. */
+			dss_max = sbrk(0);
+			/*
+			 * Calculate how much padding is necessary to
+			 * chunk-align the end of the DSS.
+			 */
+			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
+			    chunksize_mask;
+			/*
+			 * Compute how much chunk-aligned pad space (if any) is
+			 * necessary to satisfy alignment.  This space can be
+			 * recycled for later use.
+			 */
+			cpad = (void *)((uintptr_t)dss_max + gap_size);
+			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
+			    alignment);
+			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
+			dss_next = (void *)((uintptr_t)ret + size);
+			if ((uintptr_t)ret < (uintptr_t)dss_max ||
+			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
+				/* Wrap-around. */
+				malloc_mutex_unlock(&dss_mtx);
+				return (NULL);
+			}
+			incr = gap_size + cpad_size + size;
+			dss_prev = sbrk(incr);
+			if (dss_prev == dss_max) {
+				/* Success. */
+				dss_max = dss_next;
+				malloc_mutex_unlock(&dss_mtx);
+				if (cpad_size != 0)
+					chunk_unmap(cpad, cpad_size);
+				if (*zero) {
+					VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+					memset(ret, 0, size);
+				}
+				return (ret);
+			}
+		} while (dss_prev != (void *)-1);
+	}
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (NULL);
+}
+
+bool
+chunk_in_dss(void *chunk)
+{
+	bool ret;
+
+	cassert(config_dss);
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (ret);
+}
+
+bool
+chunk_dss_boot(void)
+{
+
+	cassert(config_dss);
+
+	if (malloc_mutex_init(&dss_mtx))
+		return (true);
+	dss_base = sbrk(0);
+	dss_prev = dss_base;
+	dss_max = dss_base;
+
+	return (false);
+}
+
+void
+chunk_dss_prefork(void)
+{
+
+	if (config_dss)
+		malloc_mutex_prefork(&dss_mtx);
+}
+
+void
+chunk_dss_postfork_parent(void)
+{
+
+	if (config_dss)
+		malloc_mutex_postfork_parent(&dss_mtx);
+}
+
+void
+chunk_dss_postfork_child(void)
+{
+
+	if (config_dss)
+		malloc_mutex_postfork_child(&dss_mtx);
+}
+
+/******************************************************************************/
--- a/src/rt/jemalloc/src/chunk_mmap.c
+++ b/src/rt/jemalloc/src/chunk_mmap.c
@ -0,0 +1,210 @@
+#define	JEMALLOC_CHUNK_MMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*pages_map(void *addr, size_t size);
+static void	pages_unmap(void *addr, size_t size);
+static void	*chunk_alloc_mmap_slow(size_t size, size_t alignment,
+    bool *zero);
+
+/******************************************************************************/
+
+static void *
+pages_map(void *addr, size_t size)
+{
+	void *ret;
+
+	assert(size != 0);
+
+#ifdef _WIN32
+	/*
+	 * If VirtualAlloc can't allocate at the given address when one is
+	 * given, it fails and returns NULL.
+	 */
+	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	    PAGE_READWRITE);
+#else
+	/*
+	 * We don't use MAP_FIXED here, because it can cause the *replacement*
+	 * of existing mappings, and we only want to create new mappings.
+	 */
+	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+	    -1, 0);
+	assert(ret != NULL);
+
+	if (ret == MAP_FAILED)
+		ret = NULL;
+	else if (addr != NULL && ret != addr) {
+		/*
+		 * We succeeded in mapping memory, but not in the right place.
+		 */
+		if (munmap(ret, size) == -1) {
+			char buf[BUFERROR_BUF];
+
+			buferror(buf, sizeof(buf));
+			malloc_printf("<jemalloc: Error in munmap(): %s\n",
+			    buf);
+			if (opt_abort)
+				abort();
+		}
+		ret = NULL;
+	}
+#endif
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
+	return (ret);
+}
+
+static void
+pages_unmap(void *addr, size_t size)
+{
+
+#ifdef _WIN32
+	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+	if (munmap(addr, size) == -1)
+#endif
+	{
+		char buf[BUFERROR_BUF];
+
+		buferror(buf, sizeof(buf));
+		malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+		              "VirtualFree"
+#else
+		              "munmap"
+#endif
+		              "(): %s\n", buf);
+		if (opt_abort)
+			abort();
+	}
+}
+
+static void *
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+{
+	void *ret = (void *)((uintptr_t)addr + leadsize);
+
+	assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+	{
+		void *new_addr;
+
+		pages_unmap(addr, alloc_size);
+		new_addr = pages_map(ret, size);
+		if (new_addr == ret)
+			return (ret);
+		if (new_addr)
+			pages_unmap(new_addr, size);
+		return (NULL);
+	}
+#else
+	{
+		size_t trailsize = alloc_size - leadsize - size;
+
+		if (leadsize != 0)
+			pages_unmap(addr, leadsize);
+		if (trailsize != 0)
+			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		return (ret);
+	}
+#endif
+}
+
+bool
+pages_purge(void *addr, size_t length)
+{
+	bool unzeroed;
+
+#ifdef _WIN32
+	VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
+	unzeroed = true;
+#else
+#  ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
+#    define JEMALLOC_MADV_ZEROS true
+#  elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+#    define JEMALLOC_MADV_PURGE MADV_FREE
+#    define JEMALLOC_MADV_ZEROS false
+#  else
+#    error "No method defined for purging unused dirty pages."
+#  endif
+	int err = madvise(addr, length, JEMALLOC_MADV_PURGE);
+	unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0);
+#  undef JEMALLOC_MADV_PURGE
+#  undef JEMALLOC_MADV_ZEROS
+#endif
+	return (unzeroed);
+}
+
+static void *
+chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
+{
+	void *ret, *pages;
+	size_t alloc_size, leadsize;
+
+	alloc_size = size + alignment - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size)
+		return (NULL);
+	do {
+		pages = pages_map(NULL, alloc_size);
+		if (pages == NULL)
+			return (NULL);
+		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
+		    (uintptr_t)pages;
+		ret = pages_trim(pages, alloc_size, leadsize, size);
+	} while (ret == NULL);
+
+	assert(ret != NULL);
+	*zero = true;
+	return (ret);
+}
+
+void *
+chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
+{
+	void *ret;
+	size_t offset;
+
+	/*
+	 * Ideally, there would be a way to specify alignment to mmap() (like
+	 * NetBSD has), but in the absence of such a feature, we have to work
+	 * hard to efficiently create aligned mappings.  The reliable, but
+	 * slow method is to create a mapping that is over-sized, then trim the
+	 * excess.  However, that always results in one or two calls to
+	 * pages_unmap().
+	 *
+	 * Optimistically try mapping precisely the right amount before falling
+	 * back to the slow method, with the expectation that the optimistic
+	 * approach works most of the time.
+	 */
+
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
+
+	ret = pages_map(NULL, size);
+	if (ret == NULL)
+		return (NULL);
+	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
+	if (offset != 0) {
+		pages_unmap(ret, size);
+		return (chunk_alloc_mmap_slow(size, alignment, zero));
+	}
+
+	assert(ret != NULL);
+	*zero = true;
+	return (ret);
+}
+
+bool
+chunk_dealloc_mmap(void *chunk, size_t size)
+{
+
+	if (config_munmap)
+		pages_unmap(chunk, size);
+
+	return (config_munmap == false);
+}
--- a/src/rt/jemalloc/src/ckh.c
+++ b/src/rt/jemalloc/src/ckh.c
@ -0,0 +1,563 @@
+/*
+ *******************************************************************************
+ * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
+ * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
+ * functions are employed.  The original cuckoo hashing algorithm was described
+ * in:
+ *
+ *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
+ *     51(2):122-144.
+ *
+ * Generalization of cuckoo hashing was discussed in:
+ *
+ *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
+ *     alternative to traditional hash tables.  In Proceedings of the 7th
+ *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
+ *     January 2006.
+ *
+ * This implementation uses precisely two hash functions because that is the
+ * fewest that can work, and supporting multiple hashes is an implementation
+ * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
+ * that shows approximate expected maximum load factors for various
+ * configurations:
+ *
+ *           |         #cells/bucket         |
+ *   #hashes |   1   |   2   |   4   |   8   |
+ *   --------+-------+-------+-------+-------+
+ *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
+ *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
+ *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
+ *         4 | 0.97  | 0.99  | 0.999 |       |
+ *
+ * The number of cells per bucket is chosen such that a bucket fits in one cache
+ * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
+ * respectively.
+ *
+ ******************************************************************************/
+#define	JEMALLOC_CKH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	ckh_grow(ckh_t *ckh);
+static void	ckh_shrink(ckh_t *ckh);
+
+/******************************************************************************/
+
+/*
+ * Search bucket for key and return the cell number if found; SIZE_T_MAX
+ * otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
+{
+	ckhc_t *cell;
+	unsigned i;
+
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		if (cell->key != NULL && ckh->keycomp(key, cell->key))
+			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+	}
+
+	return (SIZE_T_MAX);
+}
+
+/*
+ * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_isearch(ckh_t *ckh, const void *key)
+{
+	size_t hashes[2], bucket, cell;
+
+	assert(ckh != NULL);
+
+	ckh->hash(key, hashes);
+
+	/* Search primary bucket. */
+	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	if (cell != SIZE_T_MAX)
+		return (cell);
+
+	/* Search secondary bucket. */
+	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	return (cell);
+}
+
+JEMALLOC_INLINE bool
+ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
+    const void *data)
+{
+	ckhc_t *cell;
+	unsigned offset, i;
+
+	/*
+	 * Cycle through the cells in the bucket, starting at a random position.
+	 * The randomness avoids worst-case search overhead as buckets fill up.
+	 */
+	prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
+		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		if (cell->key == NULL) {
+			cell->key = key;
+			cell->data = data;
+			ckh->count++;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+/*
+ * No space is available in bucket.  Randomly evict an item, then try to find an
+ * alternate location for that item.  Iteratively repeat this
+ * eviction/relocation procedure until either success or detection of an
+ * eviction/relocation bucket cycle.
+ */
+JEMALLOC_INLINE bool
+ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
+    void const **argdata)
+{
+	const void *key, *data, *tkey, *tdata;
+	ckhc_t *cell;
+	size_t hashes[2], bucket, tbucket;
+	unsigned i;
+
+	bucket = argbucket;
+	key = *argkey;
+	data = *argdata;
+	while (true) {
+		/*
+		 * Choose a random item within the bucket to evict.  This is
+		 * critical to correct function, because without (eventually)
+		 * evicting all items within a bucket during iteration, it
+		 * would be possible to get stuck in an infinite loop if there
+		 * were an item for which both hashes indicated the same
+		 * bucket.
+		 */
+		prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		assert(cell->key != NULL);
+
+		/* Swap cell->{key,data} and {key,data} (evict). */
+		tkey = cell->key; tdata = cell->data;
+		cell->key = key; cell->data = data;
+		key = tkey; data = tdata;
+
+#ifdef CKH_COUNT
+		ckh->nrelocs++;
+#endif
+
+		/* Find the alternate bucket for the evicted item. */
+		ckh->hash(key, hashes);
+		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
+		if (tbucket == bucket) {
+			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
+			    - 1);
+			/*
+			 * It may be that (tbucket == bucket) still, if the
+			 * item's hashes both indicate this bucket.  However,
+			 * we are guaranteed to eventually escape this bucket
+			 * during iteration, assuming pseudo-random item
+			 * selection (true randomness would make infinite
+			 * looping a remote possibility).  The reason we can
+			 * never get trapped forever is that there are two
+			 * cases:
+			 *
+			 * 1) This bucket == argbucket, so we will quickly
+			 *    detect an eviction cycle and terminate.
+			 * 2) An item was evicted to this bucket from another,
+			 *    which means that at least one item in this bucket
+			 *    has hashes that indicate distinct buckets.
+			 */
+		}
+		/* Check for a cycle. */
+		if (tbucket == argbucket) {
+			*argkey = key;
+			*argdata = data;
+			return (true);
+		}
+
+		bucket = tbucket;
+		if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+			return (false);
+	}
+}
+
+JEMALLOC_INLINE bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
+{
+	size_t hashes[2], bucket;
+	const void *key = *argkey;
+	const void *data = *argdata;
+
+	ckh->hash(key, hashes);
+
+	/* Try to insert in primary bucket. */
+	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/* Try to insert in secondary bucket. */
+	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/*
+	 * Try to find a place for this item via iterative eviction/relocation.
+	 */
+	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+}
+
+/*
+ * Try to rebuild the hash table from scratch by inserting all items from the
+ * old table into the new.
+ */
+JEMALLOC_INLINE bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
+{
+	size_t count, i, nins;
+	const void *key, *data;
+
+	count = ckh->count;
+	ckh->count = 0;
+	for (i = nins = 0; nins < count; i++) {
+		if (aTab[i].key != NULL) {
+			key = aTab[i].key;
+			data = aTab[i].data;
+			if (ckh_try_insert(ckh, &key, &data)) {
+				ckh->count = count;
+				return (true);
+			}
+			nins++;
+		}
+	}
+
+	return (false);
+}
+
+static bool
+ckh_grow(ckh_t *ckh)
+{
+	bool ret;
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells;
+	unsigned lg_prevbuckets;
+
+#ifdef CKH_COUNT
+	ckh->ngrows++;
+#endif
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table will have to be doubled more than once in order to create a
+	 * usable table.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
+	while (true) {
+		size_t usize;
+
+		lg_curcells++;
+		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+		if (usize == 0) {
+			ret = true;
+			goto label_return;
+		}
+		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		if (tab == NULL) {
+			ret = true;
+			goto label_return;
+		}
+		/* Swap in new table. */
+		ttab = ckh->tab;
+		ckh->tab = tab;
+		tab = ttab;
+		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+		if (ckh_rebuild(ckh, tab) == false) {
+			idalloc(tab);
+			break;
+		}
+
+		/* Rebuilding failed, so back out partially rebuilt table. */
+		idalloc(ckh->tab);
+		ckh->tab = tab;
+		ckh->lg_curbuckets = lg_prevbuckets;
+	}
+
+	ret = false;
+label_return:
+	return (ret);
+}
+
+static void
+ckh_shrink(ckh_t *ckh)
+{
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells, usize;
+	unsigned lg_prevbuckets;
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table rebuild will fail.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
+	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+	if (usize == 0)
+		return;
+	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (tab == NULL) {
+		/*
+		 * An OOM error isn't worth propagating, since it doesn't
+		 * prevent this or future operations from proceeding.
+		 */
+		return;
+	}
+	/* Swap in new table. */
+	ttab = ckh->tab;
+	ckh->tab = tab;
+	tab = ttab;
+	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+	if (ckh_rebuild(ckh, tab) == false) {
+		idalloc(tab);
+#ifdef CKH_COUNT
+		ckh->nshrinks++;
+#endif
+		return;
+	}
+
+	/* Rebuilding failed, so back out partially rebuilt table. */
+	idalloc(ckh->tab);
+	ckh->tab = tab;
+	ckh->lg_curbuckets = lg_prevbuckets;
+#ifdef CKH_COUNT
+	ckh->nshrinkfails++;
+#endif
+}
+
+bool
+ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
+{
+	bool ret;
+	size_t mincells, usize;
+	unsigned lg_mincells;
+
+	assert(minitems > 0);
+	assert(hash != NULL);
+	assert(keycomp != NULL);
+
+#ifdef CKH_COUNT
+	ckh->ngrows = 0;
+	ckh->nshrinks = 0;
+	ckh->nshrinkfails = 0;
+	ckh->ninserts = 0;
+	ckh->nrelocs = 0;
+#endif
+	ckh->prng_state = 42; /* Value doesn't really matter. */
+	ckh->count = 0;
+
+	/*
+	 * Find the minimum power of 2 that is large enough to fit aBaseCount
+	 * entries.  We are using (2+,2) cuckoo hashing, which has an expected
+	 * maximum load factor of at least ~0.86, so 0.75 is a conservative load
+	 * factor that will typically allow 2^aLgMinItems to fit without ever
+	 * growing the table.
+	 */
+	assert(LG_CKH_BUCKET_CELLS > 0);
+	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
+	for (lg_mincells = LG_CKH_BUCKET_CELLS;
+	    (ZU(1) << lg_mincells) < mincells;
+	    lg_mincells++)
+		; /* Do nothing. */
+	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->hash = hash;
+	ckh->keycomp = keycomp;
+
+	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
+	if (usize == 0) {
+		ret = true;
+		goto label_return;
+	}
+	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (ckh->tab == NULL) {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	return (ret);
+}
+
+void
+ckh_delete(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+
+#ifdef CKH_VERBOSE
+	malloc_printf(
+	    "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
+	    " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
+	    " nrelocs: %"PRIu64"\n", __func__, ckh,
+	    (unsigned long long)ckh->ngrows,
+	    (unsigned long long)ckh->nshrinks,
+	    (unsigned long long)ckh->nshrinkfails,
+	    (unsigned long long)ckh->ninserts,
+	    (unsigned long long)ckh->nrelocs);
+#endif
+
+	idalloc(ckh->tab);
+	if (config_debug)
+		memset(ckh, 0x5a, sizeof(ckh_t));
+}
+
+size_t
+ckh_count(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+
+	return (ckh->count);
+}
+
+bool
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
+{
+	size_t i, ncells;
+
+	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
+	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+		if (ckh->tab[i].key != NULL) {
+			if (key != NULL)
+				*key = (void *)ckh->tab[i].key;
+			if (data != NULL)
+				*data = (void *)ckh->tab[i].data;
+			*tabind = i + 1;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+bool
+ckh_insert(ckh_t *ckh, const void *key, const void *data)
+{
+	bool ret;
+
+	assert(ckh != NULL);
+	assert(ckh_search(ckh, key, NULL, NULL));
+
+#ifdef CKH_COUNT
+	ckh->ninserts++;
+#endif
+
+	while (ckh_try_insert(ckh, &key, &data)) {
+		if (ckh_grow(ckh)) {
+			ret = true;
+			goto label_return;
+		}
+	}
+
+	ret = false;
+label_return:
+	return (ret);
+}
+
+bool
+ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		ckh->tab[cell].key = NULL;
+		ckh->tab[cell].data = NULL; /* Not necessary. */
+
+		ckh->count--;
+		/* Try to halve the table if it is less than 1/4 full. */
+		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
+		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
+		    > ckh->lg_minbuckets) {
+			/* Ignore error due to OOM. */
+			ckh_shrink(ckh);
+		}
+
+		return (false);
+	}
+
+	return (true);
+}
+
+bool
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		return (false);
+	}
+
+	return (true);
+}
+
+void
+ckh_string_hash(const void *key, size_t r_hash[2])
+{
+
+	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
+}
+
+bool
+ckh_string_keycomp(const void *k1, const void *k2)
+{
+
+    assert(k1 != NULL);
+    assert(k2 != NULL);
+
+    return (strcmp((char *)k1, (char *)k2) ? false : true);
+}
+
+void
+ckh_pointer_hash(const void *key, size_t r_hash[2])
+{
+	union {
+		const void	*v;
+		size_t		i;
+	} u;
+
+	assert(sizeof(u.v) == sizeof(u.i));
+	u.v = key;
+	hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash);
+}
+
+bool
+ckh_pointer_keycomp(const void *k1, const void *k2)
+{
+
+	return ((k1 == k2) ? true : false);
+}
--- a/src/rt/jemalloc/src/ctl.c
+++ b/src/rt/jemalloc/src/ctl.c
--- a/src/rt/jemalloc/src/extent.c
+++ b/src/rt/jemalloc/src/extent.c
@ -0,0 +1,39 @@
+#define	JEMALLOC_EXTENT_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+
+static inline int
+extent_szad_comp(extent_node_t *a, extent_node_t *b)
+{
+	int ret;
+	size_t a_size = a->size;
+	size_t b_size = b->size;
+
+	ret = (a_size > b_size) - (a_size < b_size);
+	if (ret == 0) {
+		uintptr_t a_addr = (uintptr_t)a->addr;
+		uintptr_t b_addr = (uintptr_t)b->addr;
+
+		ret = (a_addr > b_addr) - (a_addr < b_addr);
+	}
+
+	return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
+    extent_szad_comp)
+
+static inline int
+extent_ad_comp(extent_node_t *a, extent_node_t *b)
+{
+	uintptr_t a_addr = (uintptr_t)a->addr;
+	uintptr_t b_addr = (uintptr_t)b->addr;
+
+	return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
+    extent_ad_comp)
--- a/src/rt/jemalloc/src/hash.c
+++ b/src/rt/jemalloc/src/hash.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_HASH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/src/rt/jemalloc/src/huge.c
+++ b/src/rt/jemalloc/src/huge.c
@ -0,0 +1,313 @@
+#define	JEMALLOC_HUGE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+uint64_t	huge_nmalloc;
+uint64_t	huge_ndalloc;
+size_t		huge_allocated;
+
+malloc_mutex_t	huge_mtx;
+
+/******************************************************************************/
+
+/* Tree of chunks that are stand-alone huge allocations. */
+static extent_tree_t	huge;
+
+void *
+huge_malloc(size_t size, bool zero)
+{
+
+	return (huge_palloc(size, chunksize, zero));
+}
+
+void *
+huge_palloc(size_t size, size_t alignment, bool zero)
+{
+	void *ret;
+	size_t csize;
+	extent_node_t *node;
+	bool is_zeroed;
+
+	/* Allocate one or more contiguous chunks for this request. */
+
+	csize = CHUNK_CEILING(size);
+	if (csize == 0) {
+		/* size is large enough to cause size_t wrap-around. */
+		return (NULL);
+	}
+
+	/* Allocate an extent node with which to track the chunk. */
+	node = base_node_alloc();
+	if (node == NULL)
+		return (NULL);
+
+	/*
+	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
+	 * it is possible to make correct junk/zero fill decisions below.
+	 */
+	is_zeroed = zero;
+	ret = chunk_alloc(csize, alignment, false, &is_zeroed,
+	    chunk_dss_prec_get());
+	if (ret == NULL) {
+		base_node_dealloc(node);
+		return (NULL);
+	}
+
+	/* Insert node into huge. */
+	node->addr = ret;
+	node->size = csize;
+
+	malloc_mutex_lock(&huge_mtx);
+	extent_tree_ad_insert(&huge, node);
+	if (config_stats) {
+		stats_cactive_add(csize);
+		huge_nmalloc++;
+		huge_allocated += csize;
+	}
+	malloc_mutex_unlock(&huge_mtx);
+
+	if (config_fill && zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, csize);
+		else if (opt_zero && is_zeroed == false)
+			memset(ret, 0, csize);
+	}
+
+	return (ret);
+}
+
+void *
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
+{
+
+	/*
+	 * Avoid moving the allocation if the size class can be left the same.
+	 */
+	if (oldsize > arena_maxclass
+	    && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
+	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
+		assert(CHUNK_CEILING(oldsize) == oldsize);
+		if (config_fill && opt_junk && size < oldsize) {
+			memset((void *)((uintptr_t)ptr + size), 0x5a,
+			    oldsize - size);
+		}
+		return (ptr);
+	}
+
+	/* Reallocation would require a move. */
+	return (NULL);
+}
+
+void *
+huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero, bool try_tcache_dalloc)
+{
+	void *ret;
+	size_t copysize;
+
+	/* Try to avoid moving the allocation. */
+	ret = huge_ralloc_no_move(ptr, oldsize, size, extra);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * size and oldsize are different enough that we need to use a
+	 * different size class.  In that case, fall back to allocating new
+	 * space and copying.
+	 */
+	if (alignment > chunksize)
+		ret = huge_palloc(size + extra, alignment, zero);
+	else
+		ret = huge_malloc(size + extra, zero);
+
+	if (ret == NULL) {
+		if (extra == 0)
+			return (NULL);
+		/* Try again, this time without extra. */
+		if (alignment > chunksize)
+			ret = huge_palloc(size, alignment, zero);
+		else
+			ret = huge_malloc(size, zero);
+
+		if (ret == NULL)
+			return (NULL);
+	}
+
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+
+#ifdef JEMALLOC_MREMAP
+	/*
+	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
+	 * source nor the destination are in dss.
+	 */
+	if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr)
+	    == false && chunk_in_dss(ret) == false))) {
+		size_t newsize = huge_salloc(ret);
+
+		/*
+		 * Remove ptr from the tree of huge allocations before
+		 * performing the remap operation, in order to avoid the
+		 * possibility of another thread acquiring that mapping before
+		 * this one removes it from the tree.
+		 */
+		huge_dalloc(ptr, false);
+		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
+		    ret) == MAP_FAILED) {
+			/*
+			 * Assuming no chunk management bugs in the allocator,
+			 * the only documented way an error can occur here is
+			 * if the application changed the map type for a
+			 * portion of the old allocation.  This is firmly in
+			 * undefined behavior territory, so write a diagnostic
+			 * message, and optionally abort.
+			 */
+			char buf[BUFERROR_BUF];
+
+			buferror(buf, sizeof(buf));
+			malloc_printf("<jemalloc>: Error in mremap(): %s\n",
+			    buf);
+			if (opt_abort)
+				abort();
+			memcpy(ret, ptr, copysize);
+			chunk_dealloc_mmap(ptr, oldsize);
+		}
+	} else
+#endif
+	{
+		memcpy(ret, ptr, copysize);
+		iqallocx(ptr, try_tcache_dalloc);
+	}
+	return (ret);
+}
+
+void
+huge_dalloc(void *ptr, bool unmap)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = ptr;
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+	assert(node->addr == ptr);
+	extent_tree_ad_remove(&huge, node);
+
+	if (config_stats) {
+		stats_cactive_sub(node->size);
+		huge_ndalloc++;
+		huge_allocated -= node->size;
+	}
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	if (unmap && config_fill && config_dss && opt_junk)
+		memset(node->addr, 0x5a, node->size);
+
+	chunk_dealloc(node->addr, node->size, unmap);
+
+	base_node_dealloc(node);
+}
+
+size_t
+huge_salloc(const void *ptr)
+{
+	size_t ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->size;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+prof_ctx_t *
+huge_prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->prof_ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+void
+huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	node->prof_ctx = ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+}
+
+bool
+huge_boot(void)
+{
+
+	/* Initialize chunks data. */
+	if (malloc_mutex_init(&huge_mtx))
+		return (true);
+	extent_tree_ad_new(&huge);
+
+	if (config_stats) {
+		huge_nmalloc = 0;
+		huge_ndalloc = 0;
+		huge_allocated = 0;
+	}
+
+	return (false);
+}
+
+void
+huge_prefork(void)
+{
+
+	malloc_mutex_prefork(&huge_mtx);
+}
+
+void
+huge_postfork_parent(void)
+{
+
+	malloc_mutex_postfork_parent(&huge_mtx);
+}
+
+void
+huge_postfork_child(void)
+{
+
+	malloc_mutex_postfork_child(&huge_mtx);
+}
--- a/src/rt/jemalloc/src/jemalloc.c
+++ b/src/rt/jemalloc/src/jemalloc.c
--- a/src/rt/jemalloc/src/mb.c
+++ b/src/rt/jemalloc/src/mb.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_MB_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/src/rt/jemalloc/src/mutex.c
+++ b/src/rt/jemalloc/src/mutex.c
@ -0,0 +1,149 @@
+#define	JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
+#include <dlfcn.h>
+#endif
+
+#ifndef _CRT_SPINCOUNT
+#define _CRT_SPINCOUNT 4000
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_LAZY_LOCK
+bool isthreaded = false;
+#endif
+#ifdef JEMALLOC_MUTEX_INIT_CB
+static bool		postpone_init = true;
+static malloc_mutex_t	*postponed_mutexes = NULL;
+#endif
+
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
+static void	pthread_create_once(void);
+#endif
+
+/******************************************************************************/
+/*
+ * We intercept pthread_create() calls in order to toggle isthreaded if the
+ * process goes multi-threaded.
+ */
+
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+
+static void
+pthread_create_once(void)
+{
+
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+		    "\"pthread_create\")\n");
+		abort();
+	}
+
+	isthreaded = true;
+}
+
+JEMALLOC_EXPORT int
+pthread_create(pthread_t *__restrict thread,
+    const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
+    void *__restrict arg)
+{
+	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+	pthread_once(&once_control, pthread_create_once);
+
+	return (pthread_create_fptr(thread, attr, start_routine, arg));
+}
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_MUTEX_INIT_CB
+JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
+    void *(calloc_cb)(size_t, size_t));
+#endif
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex)
+{
+
+#ifdef _WIN32
+	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
+	    _CRT_SPINCOUNT))
+		return (true);
+#elif (defined(JEMALLOC_OSSPIN))
+	mutex->lock = 0;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+	if (postpone_init) {
+		mutex->postponed_next = postponed_mutexes;
+		postponed_mutexes = mutex;
+	} else {
+		if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
+		    0)
+			return (true);
+	}
+#else
+	pthread_mutexattr_t attr;
+
+	if (pthread_mutexattr_init(&attr) != 0)
+		return (true);
+	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
+	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
+		pthread_mutexattr_destroy(&attr);
+		return (true);
+	}
+	pthread_mutexattr_destroy(&attr);
+#endif
+	return (false);
+}
+
+void
+malloc_mutex_prefork(malloc_mutex_t *mutex)
+{
+
+	malloc_mutex_lock(mutex);
+}
+
+void
+malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+{
+
+	malloc_mutex_unlock(mutex);
+}
+
+void
+malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+{
+
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	malloc_mutex_unlock(mutex);
+#else
+	if (malloc_mutex_init(mutex)) {
+		malloc_printf("<jemalloc>: Error re-initializing mutex in "
+		    "child\n");
+		if (opt_abort)
+			abort();
+	}
+#endif
+}
+
+bool
+mutex_boot(void)
+{
+
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	postpone_init = false;
+	while (postponed_mutexes != NULL) {
+		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
+		    base_calloc) != 0)
+			return (true);
+		postponed_mutexes = postponed_mutexes->postponed_next;
+	}
+#endif
+	return (false);
+}
--- a/src/rt/jemalloc/src/prof.c
+++ b/src/rt/jemalloc/src/prof.c
--- a/src/rt/jemalloc/src/quarantine.c
+++ b/src/rt/jemalloc/src/quarantine.c
@ -0,0 +1,190 @@
+#define	JEMALLOC_QUARANTINE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/*
+ * quarantine pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	QUARANTINE_STATE_REINCARNATED	((quarantine_t *)(uintptr_t)1)
+#define	QUARANTINE_STATE_PURGATORY	((quarantine_t *)(uintptr_t)2)
+#define	QUARANTINE_STATE_MAX		QUARANTINE_STATE_PURGATORY
+
+/******************************************************************************/
+/* Data. */
+
+malloc_tsd_data(, quarantine, quarantine_t *, NULL)
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static quarantine_t	*quarantine_grow(quarantine_t *quarantine);
+static void	quarantine_drain_one(quarantine_t *quarantine);
+static void	quarantine_drain(quarantine_t *quarantine, size_t upper_bound);
+
+/******************************************************************************/
+
+quarantine_t *
+quarantine_init(size_t lg_maxobjs)
+{
+	quarantine_t *quarantine;
+
+	quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) +
+	    ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)));
+	if (quarantine == NULL)
+		return (NULL);
+	quarantine->curbytes = 0;
+	quarantine->curobjs = 0;
+	quarantine->first = 0;
+	quarantine->lg_maxobjs = lg_maxobjs;
+
+	quarantine_tsd_set(&quarantine);
+
+	return (quarantine);
+}
+
+static quarantine_t *
+quarantine_grow(quarantine_t *quarantine)
+{
+	quarantine_t *ret;
+
+	ret = quarantine_init(quarantine->lg_maxobjs + 1);
+	if (ret == NULL) {
+		quarantine_drain_one(quarantine);
+		return (quarantine);
+	}
+
+	ret->curbytes = quarantine->curbytes;
+	ret->curobjs = quarantine->curobjs;
+	if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
+	    quarantine->lg_maxobjs)) {
+		/* objs ring buffer data are contiguous. */
+		memcpy(ret->objs, &quarantine->objs[quarantine->first],
+		    quarantine->curobjs * sizeof(quarantine_obj_t));
+	} else {
+		/* objs ring buffer data wrap around. */
+		size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) -
+		    quarantine->first;
+		size_t ncopy_b = quarantine->curobjs - ncopy_a;
+
+		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
+		    * sizeof(quarantine_obj_t));
+		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
+		    sizeof(quarantine_obj_t));
+	}
+	idalloc(quarantine);
+
+	return (ret);
+}
+
+static void
+quarantine_drain_one(quarantine_t *quarantine)
+{
+	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
+	assert(obj->usize == isalloc(obj->ptr, config_prof));
+	idalloc(obj->ptr);
+	quarantine->curbytes -= obj->usize;
+	quarantine->curobjs--;
+	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
+	    quarantine->lg_maxobjs) - 1);
+}
+
+static void
+quarantine_drain(quarantine_t *quarantine, size_t upper_bound)
+{
+
+	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
+		quarantine_drain_one(quarantine);
+}
+
+void
+quarantine(void *ptr)
+{
+	quarantine_t *quarantine;
+	size_t usize = isalloc(ptr, config_prof);
+
+	cassert(config_fill);
+	assert(opt_quarantine);
+
+	quarantine = *quarantine_tsd_get();
+	if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) {
+		if (quarantine == QUARANTINE_STATE_PURGATORY) {
+			/*
+			 * Make a note that quarantine() was called after
+			 * quarantine_cleanup() was called.
+			 */
+			quarantine = QUARANTINE_STATE_REINCARNATED;
+			quarantine_tsd_set(&quarantine);
+		}
+		idalloc(ptr);
+		return;
+	}
+	/*
+	 * Drain one or more objects if the quarantine size limit would be
+	 * exceeded by appending ptr.
+	 */
+	if (quarantine->curbytes + usize > opt_quarantine) {
+		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
+		    - usize : 0;
+		quarantine_drain(quarantine, upper_bound);
+	}
+	/* Grow the quarantine ring buffer if it's full. */
+	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
+		quarantine = quarantine_grow(quarantine);
+	/* quarantine_grow() must free a slot if it fails to grow. */
+	assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
+	/* Append ptr if its size doesn't exceed the quarantine size. */
+	if (quarantine->curbytes + usize <= opt_quarantine) {
+		size_t offset = (quarantine->first + quarantine->curobjs) &
+		    ((ZU(1) << quarantine->lg_maxobjs) - 1);
+		quarantine_obj_t *obj = &quarantine->objs[offset];
+		obj->ptr = ptr;
+		obj->usize = usize;
+		quarantine->curbytes += usize;
+		quarantine->curobjs++;
+		if (opt_junk)
+			memset(ptr, 0x5a, usize);
+	} else {
+		assert(quarantine->curbytes == 0);
+		idalloc(ptr);
+	}
+}
+
+void
+quarantine_cleanup(void *arg)
+{
+	quarantine_t *quarantine = *(quarantine_t **)arg;
+
+	if (quarantine == QUARANTINE_STATE_REINCARNATED) {
+		/*
+		 * Another destructor deallocated memory after this destructor
+		 * was called.  Reset quarantine to QUARANTINE_STATE_PURGATORY
+		 * in order to receive another callback.
+		 */
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
+	} else if (quarantine == QUARANTINE_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to QUARANTINE_STATE_PURGATORY so that other destructors
+		 * wouldn't cause re-creation of the quarantine.  This time, do
+		 * nothing, so that the destructor will not be called again.
+		 */
+	} else if (quarantine != NULL) {
+		quarantine_drain(quarantine, 0);
+		idalloc(quarantine);
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
+	}
+}
+
+bool
+quarantine_boot(void)
+{
+
+	cassert(config_fill);
+
+	if (quarantine_tsd_boot())
+		return (true);
+
+	return (false);
+}
--- a/src/rt/jemalloc/src/rtree.c
+++ b/src/rt/jemalloc/src/rtree.c
@ -0,0 +1,67 @@
+#define	JEMALLOC_RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+	rtree_t *ret;
+	unsigned bits_per_level, height, i;
+
+	bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+	height = bits / bits_per_level;
+	if (height * bits_per_level != bits)
+		height++;
+	assert(height * bits_per_level >= bits);
+
+	ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) +
+	    (sizeof(unsigned) * height));
+	if (ret == NULL)
+		return (NULL);
+	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
+	    height));
+
+	if (malloc_mutex_init(&ret->mutex)) {
+		/* Leak the rtree. */
+		return (NULL);
+	}
+	ret->height = height;
+	if (bits_per_level * height > bits)
+		ret->level2bits[0] = bits % bits_per_level;
+	else
+		ret->level2bits[0] = bits_per_level;
+	for (i = 1; i < height; i++)
+		ret->level2bits[i] = bits_per_level;
+
+	ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+	if (ret->root == NULL) {
+		/*
+		 * We leak the rtree here, since there's no generic base
+		 * deallocation.
+		 */
+		return (NULL);
+	}
+	memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+	return (ret);
+}
+
+void
+rtree_prefork(rtree_t *rtree)
+{
+
+	malloc_mutex_prefork(&rtree->mutex);
+}
+
+void
+rtree_postfork_parent(rtree_t *rtree)
+{
+
+	malloc_mutex_postfork_parent(&rtree->mutex);
+}
+
+void
+rtree_postfork_child(rtree_t *rtree)
+{
+
+	malloc_mutex_postfork_child(&rtree->mutex);
+}
--- a/src/rt/jemalloc/src/stats.c
+++ b/src/rt/jemalloc/src/stats.c
@ -0,0 +1,549 @@
+#define	JEMALLOC_STATS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#define	CTL_GET(n, v, t) do {						\
+	size_t sz = sizeof(t);						\
+	xmallctl(n, v, &sz, NULL, 0);					\
+} while (0)
+
+#define	CTL_I_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_J_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_IJ_GET(n, v, t) do {					\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	mib[4] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+/******************************************************************************/
+/* Data. */
+
+bool	opt_stats_print = false;
+
+size_t	stats_cactive = 0;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i, bool bins, bool large);
+
+/******************************************************************************/
+
+static void
+stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t page;
+	bool config_tcache;
+	unsigned nbins, j, gap_start;
+
+	CTL_GET("arenas.page", &page, size_t);
+
+	CTL_GET("config.tcache", &config_tcache, bool);
+	if (config_tcache) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin  size regs pgs    allocated      nmalloc"
+		    "      ndalloc    nrequests       nfills     nflushes"
+		    "      newruns       reruns      curruns\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin  size regs pgs    allocated      nmalloc"
+		    "      ndalloc      newruns       reruns      curruns\n");
+	}
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
+		uint64_t nruns;
+
+		CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
+		if (nruns == 0) {
+			if (gap_start == UINT_MAX)
+				gap_start = j;
+		} else {
+			size_t reg_size, run_size, allocated;
+			uint32_t nregs;
+			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+			uint64_t reruns;
+			size_t curruns;
+
+			if (gap_start != UINT_MAX) {
+				if (j > gap_start + 1) {
+					/* Gap of more than one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u..%u]\n", gap_start,
+					    j - 1);
+				} else {
+					/* Gap of one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u]\n", gap_start);
+				}
+				gap_start = UINT_MAX;
+			}
+			CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
+			CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
+			CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
+			    &allocated, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
+			    &nmalloc, uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
+			    &ndalloc, uint64_t);
+			if (config_tcache) {
+				CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
+				    &nrequests, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
+				    &nfills, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
+				    &nflushes, uint64_t);
+			}
+			CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
+			    uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
+			    size_t);
+			if (config_tcache) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu\n",
+				    j, reg_size, nregs, run_size / page,
+				    allocated, nmalloc, ndalloc, nrequests,
+				    nfills, nflushes, nruns, reruns, curruns);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu\n",
+				    j, reg_size, nregs, run_size / page,
+				    allocated, nmalloc, ndalloc, nruns, reruns,
+				    curruns);
+			}
+		}
+	}
+	if (gap_start != UINT_MAX) {
+		if (j > gap_start + 1) {
+			/* Gap of more than one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
+			    gap_start, j - 1);
+		} else {
+			/* Gap of one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
+		}
+	}
+}
+
+static void
+stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t page, nlruns, j;
+	ssize_t gap_start;
+
+	CTL_GET("arenas.page", &page, size_t);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   size pages      nmalloc      ndalloc    nrequests"
+	    "      curruns\n");
+	CTL_GET("arenas.nlruns", &nlruns, size_t);
+	for (j = 0, gap_start = -1; j < nlruns; j++) {
+		uint64_t nmalloc, ndalloc, nrequests;
+		size_t run_size, curruns;
+
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
+		    uint64_t);
+		if (nrequests == 0) {
+			if (gap_start == -1)
+				gap_start = j;
+		} else {
+			CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
+			    size_t);
+			if (gap_start != -1) {
+				malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
+				    j - gap_start);
+				gap_start = -1;
+			}
+			malloc_cprintf(write_cb, cbopaque,
+			    "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
+			    " %12zu\n",
+			    run_size, run_size / page, nmalloc, ndalloc,
+			    nrequests, curruns);
+		}
+	}
+	if (gap_start != -1)
+		malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+}
+
+static void
+stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i, bool bins, bool large)
+{
+	unsigned nthreads;
+	const char *dss;
+	size_t page, pactive, pdirty, mapped;
+	uint64_t npurge, nmadvise, purged;
+	size_t small_allocated;
+	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+	size_t large_allocated;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+
+	CTL_GET("arenas.page", &page, size_t);
+
+	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+	malloc_cprintf(write_cb, cbopaque,
+	    "assigned threads: %u\n", nthreads);
+	CTL_I_GET("stats.arenas.0.dss", &dss, const char *);
+	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
+	    dss);
+	CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
+	CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
+	CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
+	CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
+	CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
+	    " %"PRIu64" madvise%s, %"PRIu64" purged\n",
+	    pactive, pdirty, npurge, npurge == 1 ? "" : "s",
+	    nmadvise, nmadvise == 1 ? "" : "s", purged);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "            allocated      nmalloc      ndalloc    nrequests\n");
+	CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "small:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	malloc_cprintf(write_cb, cbopaque,
+	    "total:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated + large_allocated,
+	    small_nmalloc + large_nmalloc,
+	    small_ndalloc + large_ndalloc,
+	    small_nrequests + large_nrequests);
+	malloc_cprintf(write_cb, cbopaque, "active:  %12zu\n", pactive * page);
+	CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
+	malloc_cprintf(write_cb, cbopaque, "mapped:  %12zu\n", mapped);
+
+	if (bins)
+		stats_arena_bins_print(write_cb, cbopaque, i);
+	if (large)
+		stats_arena_lruns_print(write_cb, cbopaque, i);
+}
+
+void
+stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts)
+{
+	int err;
+	uint64_t epoch;
+	size_t u64sz;
+	bool general = true;
+	bool merged = true;
+	bool unmerged = true;
+	bool bins = true;
+	bool large = true;
+
+	/*
+	 * Refresh stats, in case mallctl() was called by the application.
+	 *
+	 * Check for OOM here, since refreshing the ctl cache can trigger
+	 * allocation.  In practice, none of the subsequent mallctl()-related
+	 * calls in this function will cause OOM if this one succeeds.
+	 * */
+	epoch = 1;
+	u64sz = sizeof(uint64_t);
+	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+	if (err != 0) {
+		if (err == EAGAIN) {
+			malloc_write("<jemalloc>: Memory allocation failure in "
+			    "mallctl(\"epoch\", ...)\n");
+			return;
+		}
+		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		    "...)\n");
+		abort();
+	}
+
+	if (opts != NULL) {
+		unsigned i;
+
+		for (i = 0; opts[i] != '\0'; i++) {
+			switch (opts[i]) {
+			case 'g':
+				general = false;
+				break;
+			case 'm':
+				merged = false;
+				break;
+			case 'a':
+				unmerged = false;
+				break;
+			case 'b':
+				bins = false;
+				break;
+			case 'l':
+				large = false;
+				break;
+			default:;
+			}
+		}
+	}
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "___ Begin jemalloc statistics ___\n");
+	if (general) {
+		int err;
+		const char *cpv;
+		bool bv;
+		unsigned uv;
+		ssize_t ssv;
+		size_t sv, bsz, ssz, sssz, cpsz;
+
+		bsz = sizeof(bool);
+		ssz = sizeof(size_t);
+		sssz = sizeof(ssize_t);
+		cpsz = sizeof(const char *);
+
+		CTL_GET("version", &cpv, const char *);
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
+		CTL_GET("config.debug", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
+
+#define OPT_WRITE_BOOL(n)						\
+		if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
+		}
+#define OPT_WRITE_SIZE_T(n)						\
+		if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
+		}
+#define OPT_WRITE_SSIZE_T(n)						\
+		if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
+		}
+#define OPT_WRITE_CHAR_P(n)						\
+		if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
+		}
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
+		OPT_WRITE_BOOL(abort)
+		OPT_WRITE_SIZE_T(lg_chunk)
+		OPT_WRITE_CHAR_P(dss)
+		OPT_WRITE_SIZE_T(narenas)
+		OPT_WRITE_SSIZE_T(lg_dirty_mult)
+		OPT_WRITE_BOOL(stats_print)
+		OPT_WRITE_BOOL(junk)
+		OPT_WRITE_SIZE_T(quarantine)
+		OPT_WRITE_BOOL(redzone)
+		OPT_WRITE_BOOL(zero)
+		OPT_WRITE_BOOL(utrace)
+		OPT_WRITE_BOOL(valgrind)
+		OPT_WRITE_BOOL(xmalloc)
+		OPT_WRITE_BOOL(tcache)
+		OPT_WRITE_SSIZE_T(lg_tcache_max)
+		OPT_WRITE_BOOL(prof)
+		OPT_WRITE_CHAR_P(prof_prefix)
+		OPT_WRITE_BOOL(prof_active)
+		OPT_WRITE_SSIZE_T(lg_prof_sample)
+		OPT_WRITE_BOOL(prof_accum)
+		OPT_WRITE_SSIZE_T(lg_prof_interval)
+		OPT_WRITE_BOOL(prof_gdump)
+		OPT_WRITE_BOOL(prof_final)
+		OPT_WRITE_BOOL(prof_leak)
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
+
+		CTL_GET("arenas.narenas", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
+
+		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
+		    sizeof(void *));
+
+		CTL_GET("arenas.quantum", &sv, size_t);
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+
+		CTL_GET("arenas.page", &sv, size_t);
+		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+
+		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
+		if (ssv >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: %u:1\n",
+			    (1U << ssv));
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: N/A\n");
+		}
+		if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
+		    == 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
+		}
+		if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
+		    bv) {
+			CTL_GET("opt.lg_prof_sample", &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "Average profile sample interval: %"PRIu64
+			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
+
+			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
+			if (ssv >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "Average profile dump interval: %"PRIu64
+				    " (2^%zd)\n",
+				    (((uint64_t)1U) << ssv), ssv);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "Average profile dump interval: N/A\n");
+			}
+		}
+		CTL_GET("opt.lg_chunk", &sv, size_t);
+		malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n",
+		    (ZU(1) << sv), sv);
+	}
+
+	if (config_stats) {
+		size_t *cactive;
+		size_t allocated, active, mapped;
+		size_t chunks_current, chunks_high;
+		uint64_t chunks_total;
+		size_t huge_allocated;
+		uint64_t huge_nmalloc, huge_ndalloc;
+
+		CTL_GET("stats.cactive", &cactive, size_t *);
+		CTL_GET("stats.allocated", &allocated, size_t);
+		CTL_GET("stats.active", &active, size_t);
+		CTL_GET("stats.mapped", &mapped, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, mapped: %zu\n",
+		    allocated, active, mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
+
+		/* Print chunk stats. */
+		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
+		CTL_GET("stats.chunks.high", &chunks_high, size_t);
+		CTL_GET("stats.chunks.current", &chunks_current, size_t);
+		malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+		    "highchunks    curchunks\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "  %13"PRIu64" %12zu %12zu\n",
+		    chunks_total, chunks_high, chunks_current);
+
+		/* Print huge stats. */
+		CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
+		CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
+		CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge: nmalloc      ndalloc    allocated\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    " %12"PRIu64" %12"PRIu64" %12zu\n",
+		    huge_nmalloc, huge_ndalloc, huge_allocated);
+
+		if (merged) {
+			unsigned narenas;
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				VARIABLE_ARRAY(bool, initialized, narenas);
+				size_t isz;
+				unsigned i, ninitialized;
+
+				isz = sizeof(bool) * narenas;
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+				for (i = ninitialized = 0; i < narenas; i++) {
+					if (initialized[i])
+						ninitialized++;
+				}
+
+				if (ninitialized > 1 || unmerged == false) {
+					/* Print merged arena stats. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+					stats_arena_print(write_cb, cbopaque,
+					    narenas, bins, large);
+				}
+			}
+		}
+
+		if (unmerged) {
+			unsigned narenas;
+
+			/* Print stats for each arena. */
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				VARIABLE_ARRAY(bool, initialized, narenas);
+				size_t isz;
+				unsigned i;
+
+				isz = sizeof(bool) * narenas;
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+
+				for (i = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\narenas[%u]:\n", i);
+						stats_arena_print(write_cb,
+						    cbopaque, i, bins, large);
+					}
+				}
+			}
+		}
+	}
+	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
+}
--- a/src/rt/jemalloc/src/tcache.c
+++ b/src/rt/jemalloc/src/tcache.c
@ -0,0 +1,476 @@
+#define	JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_tsd_data(, tcache, tcache_t *, NULL)
+malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
+
+bool	opt_tcache = true;
+ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+
+tcache_bin_info_t	*tcache_bin_info;
+static unsigned		stack_nelms; /* Total stack elms per tcache. */
+
+size_t			nhbins;
+size_t			tcache_maxclass;
+
+/******************************************************************************/
+
+size_t	tcache_salloc(const void *ptr)
+{
+
+	return (arena_salloc(ptr, false));
+}
+
+void
+tcache_event_hard(tcache_t *tcache)
+{
+	size_t binind = tcache->next_gc_bin;
+	tcache_bin_t *tbin = &tcache->tbins[binind];
+	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+
+	if (tbin->low_water > 0) {
+		/*
+		 * Flush (ceiling) 3/4 of the objects below the low water mark.
+		 */
+		if (binind < NBINS) {
+			tcache_bin_flush_small(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		} else {
+			tcache_bin_flush_large(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		}
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
+		 * fill count is always at least 1.
+		 */
+		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
+			tbin->lg_fill_div++;
+	} else if (tbin->low_water < 0) {
+		/*
+		 * Increase fill count by 2X.  Make sure lg_fill_div stays
+		 * greater than 0.
+		 */
+		if (tbin->lg_fill_div > 1)
+			tbin->lg_fill_div--;
+	}
+	tbin->low_water = tbin->ncached;
+
+	tcache->next_gc_bin++;
+	if (tcache->next_gc_bin == nhbins)
+		tcache->next_gc_bin = 0;
+	tcache->ev_cnt = 0;
+}
+
+void *
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+{
+	void *ret;
+
+	arena_tcache_fill_small(tcache->arena, tbin, binind,
+	    config_prof ? tcache->prof_accumbytes : 0);
+	if (config_prof)
+		tcache->prof_accumbytes = 0;
+	ret = tcache_alloc_easy(tbin);
+
+	return (ret);
+}
+
+void
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache)
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+	bool merged_stats = false;
+
+	assert(binind < NBINS);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena bin associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+		arena_bin_t *bin = &arena->bins[binind];
+
+		if (config_prof && arena == tcache->arena) {
+			if (arena_prof_accum(arena, tcache->prof_accumbytes))
+				prof_idump();
+			tcache->prof_accumbytes = 0;
+		}
+
+		malloc_mutex_lock(&bin->lock);
+		if (config_stats && arena == tcache->arena) {
+			assert(merged_stats == false);
+			merged_stats = true;
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+		}
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena) {
+				size_t pageind = ((uintptr_t)ptr -
+				    (uintptr_t)chunk) >> LG_PAGE;
+				arena_chunk_map_t *mapelm =
+				    arena_mapp_get(chunk, pageind);
+				if (config_fill && opt_junk) {
+					arena_alloc_junk_small(ptr,
+					    &arena_bin_info[binind], true);
+				}
+				arena_dalloc_bin_locked(arena, chunk, ptr,
+				    mapelm);
+			} else {
+				/*
+				 * This object was allocated via a different
+				 * arena bin than the one that is currently
+				 * locked.  Stash the object, so that it can be
+				 * handled in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&bin->lock);
+	}
+	if (config_stats && merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_bin_t *bin = &tcache->arena->bins[binind];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&bin->lock);
+	}
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+void
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache)
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+	bool merged_stats = false;
+
+	assert(binind < nhbins);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+		UNUSED bool idump;
+
+		if (config_prof)
+			idump = false;
+		malloc_mutex_lock(&arena->lock);
+		if ((config_prof || config_stats) && arena == tcache->arena) {
+			if (config_prof) {
+				idump = arena_prof_accum_locked(arena,
+				    tcache->prof_accumbytes);
+				tcache->prof_accumbytes = 0;
+			}
+			if (config_stats) {
+				merged_stats = true;
+				arena->stats.nrequests_large +=
+				    tbin->tstats.nrequests;
+				arena->stats.lstats[binind - NBINS].nrequests +=
+				    tbin->tstats.nrequests;
+				tbin->tstats.nrequests = 0;
+			}
+		}
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena)
+				arena_dalloc_large_locked(arena, chunk, ptr);
+			else {
+				/*
+				 * This object was allocated via a different
+				 * arena than the one that is currently locked.
+				 * Stash the object, so that it can be handled
+				 * in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&arena->lock);
+		if (config_prof && idump)
+			prof_idump();
+	}
+	if (config_stats && merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_t *arena = tcache->arena;
+		malloc_mutex_lock(&arena->lock);
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		arena->stats.lstats[binind - NBINS].nrequests +=
+		    tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&arena->lock);
+	}
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+void
+tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+{
+
+	if (config_stats) {
+		/* Link into list of extant tcaches. */
+		malloc_mutex_lock(&arena->lock);
+		ql_elm_new(tcache, link);
+		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		malloc_mutex_unlock(&arena->lock);
+	}
+	tcache->arena = arena;
+}
+
+void
+tcache_arena_dissociate(tcache_t *tcache)
+{
+
+	if (config_stats) {
+		/* Unlink from list of extant tcaches. */
+		malloc_mutex_lock(&tcache->arena->lock);
+		ql_remove(&tcache->arena->tcache_ql, tcache, link);
+		malloc_mutex_unlock(&tcache->arena->lock);
+		tcache_stats_merge(tcache, tcache->arena);
+	}
+}
+
+tcache_t *
+tcache_create(arena_t *arena)
+{
+	tcache_t *tcache;
+	size_t size, stack_offset;
+	unsigned i;
+
+	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+	/* Naturally align the pointer stacks. */
+	size = PTR_CEILING(size);
+	stack_offset = size;
+	size += stack_nelms * sizeof(void *);
+	/*
+	 * Round up to the nearest multiple of the cacheline size, in order to
+	 * avoid the possibility of false cacheline sharing.
+	 *
+	 * That this works relies on the same logic as in ipalloc(), but we
+	 * cannot directly call ipalloc() here due to tcache bootstrapping
+	 * issues.
+	 */
+	size = (size + CACHELINE_MASK) & (-CACHELINE);
+
+	if (size <= SMALL_MAXCLASS)
+		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+	else if (size <= tcache_maxclass)
+		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
+	else
+		tcache = (tcache_t *)icallocx(size, false, arena);
+
+	if (tcache == NULL)
+		return (NULL);
+
+	tcache_arena_associate(tcache, arena);
+
+	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+	for (i = 0; i < nhbins; i++) {
+		tcache->tbins[i].lg_fill_div = 1;
+		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
+		    (uintptr_t)stack_offset);
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	}
+
+	tcache_tsd_set(&tcache);
+
+	return (tcache);
+}
+
+void
+tcache_destroy(tcache_t *tcache)
+{
+	unsigned i;
+	size_t tcache_size;
+
+	tcache_arena_dissociate(tcache);
+
+	for (i = 0; i < NBINS; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_small(tbin, i, 0, tcache);
+
+		if (config_stats && tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			arena_bin_t *bin = &arena->bins[i];
+			malloc_mutex_lock(&bin->lock);
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			malloc_mutex_unlock(&bin->lock);
+		}
+	}
+
+	for (; i < nhbins; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_large(tbin, i, 0, tcache);
+
+		if (config_stats && tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			malloc_mutex_lock(&arena->lock);
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[i - NBINS].nrequests +=
+			    tbin->tstats.nrequests;
+			malloc_mutex_unlock(&arena->lock);
+		}
+	}
+
+	if (config_prof && tcache->prof_accumbytes > 0 &&
+	    arena_prof_accum(tcache->arena, tcache->prof_accumbytes))
+		prof_idump();
+
+	tcache_size = arena_salloc(tcache, false);
+	if (tcache_size <= SMALL_MAXCLASS) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
+		    LG_PAGE;
+		arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+
+		arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
+	} else if (tcache_size <= tcache_maxclass) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+
+		arena_dalloc_large(arena, chunk, tcache);
+	} else
+		idallocx(tcache, false);
+}
+
+void
+tcache_thread_cleanup(void *arg)
+{
+	tcache_t *tcache = *(tcache_t **)arg;
+
+	if (tcache == TCACHE_STATE_DISABLED) {
+		/* Do nothing. */
+	} else if (tcache == TCACHE_STATE_REINCARNATED) {
+		/*
+		 * Another destructor called an allocator function after this
+		 * destructor was called.  Reset tcache to
+		 * TCACHE_STATE_PURGATORY in order to receive another callback.
+		 */
+		tcache = TCACHE_STATE_PURGATORY;
+		tcache_tsd_set(&tcache);
+	} else if (tcache == TCACHE_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
+		 * cause re-creation of the tcache.  This time, do nothing, so
+		 * that the destructor will not be called again.
+		 */
+	} else if (tcache != NULL) {
+		assert(tcache != TCACHE_STATE_PURGATORY);
+		tcache_destroy(tcache);
+		tcache = TCACHE_STATE_PURGATORY;
+		tcache_tsd_set(&tcache);
+	}
+}
+
+void
+tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+{
+	unsigned i;
+
+	/* Merge and reset tcache stats. */
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		malloc_mutex_unlock(&bin->lock);
+		tbin->tstats.nrequests = 0;
+	}
+
+	for (; i < nhbins; i++) {
+		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		lstats->nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+	}
+}
+
+bool
+tcache_boot0(void)
+{
+	unsigned i;
+
+	/*
+	 * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
+	 * known.
+	 */
+	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+		tcache_maxclass = SMALL_MAXCLASS;
+	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
+		tcache_maxclass = arena_maxclass;
+	else
+		tcache_maxclass = (1U << opt_lg_tcache_max);
+
+	nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
+
+	/* Initialize tcache_bin_info. */
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	    sizeof(tcache_bin_info_t));
+	if (tcache_bin_info == NULL)
+		return (true);
+	stack_nelms = 0;
+	for (i = 0; i < NBINS; i++) {
+		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+			tcache_bin_info[i].ncached_max =
+			    (arena_bin_info[i].nregs << 1);
+		} else {
+			tcache_bin_info[i].ncached_max =
+			    TCACHE_NSLOTS_SMALL_MAX;
+		}
+		stack_nelms += tcache_bin_info[i].ncached_max;
+	}
+	for (; i < nhbins; i++) {
+		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+		stack_nelms += tcache_bin_info[i].ncached_max;
+	}
+
+	return (false);
+}
+
+bool
+tcache_boot1(void)
+{
+
+	if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
+		return (true);
+
+	return (false);
+}
--- a/src/rt/jemalloc/src/tsd.c
+++ b/src/rt/jemalloc/src/tsd.c
@ -0,0 +1,107 @@
+#define	JEMALLOC_TSD_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+static unsigned ncleanups;
+static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+
+/******************************************************************************/
+
+void *
+malloc_tsd_malloc(size_t size)
+{
+
+	/* Avoid choose_arena() in order to dodge bootstrapping issues. */
+	return (arena_malloc(arenas[0], size, false, false));
+}
+
+void
+malloc_tsd_dalloc(void *wrapper)
+{
+
+	idalloc(wrapper);
+}
+
+void
+malloc_tsd_no_cleanup(void *arg)
+{
+
+	not_reached();
+}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+#ifndef _WIN32
+JEMALLOC_EXPORT
+#endif
+void
+_malloc_thread_cleanup(void)
+{
+	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
+	unsigned i;
+
+	for (i = 0; i < ncleanups; i++)
+		pending[i] = true;
+
+	do {
+		again = false;
+		for (i = 0; i < ncleanups; i++) {
+			if (pending[i]) {
+				pending[i] = cleanups[i]();
+				if (pending[i])
+					again = true;
+			}
+		}
+	} while (again);
+}
+#endif
+
+void
+malloc_tsd_cleanup_register(bool (*f)(void))
+{
+
+	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
+	cleanups[ncleanups] = f;
+	ncleanups++;
+}
+
+void
+malloc_tsd_boot(void)
+{
+
+	ncleanups = 0;
+}
+
+#ifdef _WIN32
+static BOOL WINAPI
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+
+	switch (fdwReason) {
+#ifdef JEMALLOC_LAZY_LOCK
+	case DLL_THREAD_ATTACH:
+		isthreaded = true;
+		break;
+#endif
+	case DLL_THREAD_DETACH:
+		_malloc_thread_cleanup();
+		break;
+	default:
+		break;
+	}
+	return (true);
+}
+
+#ifdef _MSC_VER
+#  ifdef _M_IX86
+#    pragma comment(linker, "/INCLUDE:__tls_used")
+#  else
+#    pragma comment(linker, "/INCLUDE:_tls_used")
+#  endif
+#  pragma section(".CRT$XLY",long,read)
+#endif
+JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
+static const BOOL	(WINAPI *tls_callback)(HINSTANCE hinstDLL,
+    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
+#endif
--- a/src/rt/jemalloc/src/util.c
+++ b/src/rt/jemalloc/src/util.c
@ -0,0 +1,641 @@
+#define	assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_write("<jemalloc>: Failed assertion\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Unreachable code reached\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Not implemented\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	JEMALLOC_UTIL_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	wrtmessage(void *cbopaque, const char *s);
+#define	U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char	*u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+    size_t *slen_p);
+#define	D2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define	O2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define	X2S_BUFSIZE	(2 + U2S_BUFSIZE)
+static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+    size_t *slen_p);
+
+/******************************************************************************/
+
+/* malloc_message() setup. */
+static void
+wrtmessage(void *cbopaque, const char *s)
+{
+
+#ifdef SYS_write
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 */
+	UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
+#else
+	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
+#endif
+}
+
+JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * je_malloc_message(...) throughout the code.
+ */
+void
+malloc_write(const char *s)
+{
+
+	if (je_malloc_message != NULL)
+		je_malloc_message(NULL, s);
+	else
+		wrtmessage(NULL, s);
+}
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(char *buf, size_t buflen)
+{
+
+#ifdef _WIN32
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0,
+	    (LPSTR)buf, buflen, NULL);
+	return (0);
+#elif defined(_GNU_SOURCE)
+	char *b = strerror_r(errno, buf, buflen);
+	if (b != buf) {
+		strncpy(buf, b, buflen);
+		buf[buflen-1] = '\0';
+	}
+	return (0);
+#else
+	return (strerror_r(errno, buf, buflen));
+#endif
+}
+
+uintmax_t
+malloc_strtoumax(const char *nptr, char **endptr, int base)
+{
+	uintmax_t ret, digit;
+	int b;
+	bool neg;
+	const char *p, *ns;
+
+	if (base < 0 || base == 1 || base > 36) {
+		set_errno(EINVAL);
+		return (UINTMAX_MAX);
+	}
+	b = base;
+
+	/* Swallow leading whitespace and get sign, if any. */
+	neg = false;
+	p = nptr;
+	while (true) {
+		switch (*p) {
+		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+			p++;
+			break;
+		case '-':
+			neg = true;
+			/* Fall through. */
+		case '+':
+			p++;
+			/* Fall through. */
+		default:
+			goto label_prefix;
+		}
+	}
+
+	/* Get prefix, if any. */
+	label_prefix:
+	/*
+	 * Note where the first non-whitespace/sign character is so that it is
+	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
+	 * "  -x").
+	 */
+	ns = p;
+	if (*p == '0') {
+		switch (p[1]) {
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7':
+			if (b == 0)
+				b = 8;
+			if (b == 8)
+				p++;
+			break;
+		case 'x':
+			switch (p[2]) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+				if (b == 0)
+					b = 16;
+				if (b == 16)
+					p += 2;
+				break;
+			default:
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	if (b == 0)
+		b = 10;
+
+	/* Convert. */
+	ret = 0;
+	while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b)
+	    || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b)
+	    || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) {
+		uintmax_t pret = ret;
+		ret *= b;
+		ret += digit;
+		if (ret < pret) {
+			/* Overflow. */
+			set_errno(ERANGE);
+			return (UINTMAX_MAX);
+		}
+		p++;
+	}
+	if (neg)
+		ret = -ret;
+
+	if (endptr != NULL) {
+		if (p == ns) {
+			/* No characters were converted. */
+			*endptr = (char *)nptr;
+		} else
+			*endptr = (char *)p;
+	}
+
+	return (ret);
+}
+
+static char *
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
+{
+	unsigned i;
+
+	i = U2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEF"
+		    : "0123456789abcdef";
+
+		do {
+			i--;
+			s[i] = digits[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	} default: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    : "0123456789abcdefghijklmnopqrstuvwxyz";
+
+		assert(base >= 2 && base <= 36);
+		do {
+			i--;
+			s[i] = digits[x % (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}}
+
+	*slen_p = U2S_BUFSIZE - 1 - i;
+	return (&s[i]);
+}
+
+static char *
+d2s(intmax_t x, char sign, char *s, size_t *slen_p)
+{
+	bool neg;
+
+	if ((neg = (x < 0)))
+		x = -x;
+	s = u2s(x, 10, false, s, slen_p);
+	if (neg)
+		sign = '-';
+	switch (sign) {
+	case '-':
+		if (neg == false)
+			break;
+		/* Fall through. */
+	case ' ':
+	case '+':
+		s--;
+		(*slen_p)++;
+		*s = sign;
+		break;
+	default: not_reached();
+	}
+	return (s);
+}
+
+static char *
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 8, false, s, slen_p);
+	if (alt_form && *s != '0') {
+		s--;
+		(*slen_p)++;
+		*s = '0';
+	}
+	return (s);
+}
+
+static char *
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 16, uppercase, s, slen_p);
+	if (alt_form) {
+		s -= 2;
+		(*slen_p) += 2;
+		memcpy(s, uppercase ? "0X" : "0x", 2);
+	}
+	return (s);
+}
+
+int
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+	int ret;
+	size_t i;
+	const char *f;
+
+#define	APPEND_C(c) do {						\
+	if (i < size)							\
+		str[i] = (c);						\
+	i++;								\
+} while (0)
+#define	APPEND_S(s, slen) do {						\
+	if (i < size) {							\
+		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
+		memcpy(&str[i], s, cpylen);				\
+	}								\
+	i += slen;							\
+} while (0)
+#define	APPEND_PADDED_S(s, slen, width, left_justify) do {		\
+	/* Left padding. */						\
+	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
+	    (size_t)width - slen : 0);					\
+	if (left_justify == false && pad_len != 0) {			\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+	/* Value. */							\
+	APPEND_S(s, slen);						\
+	/* Right padding. */						\
+	if (left_justify && pad_len != 0) {				\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+} while (0)
+#define GET_ARG_NUMERIC(val, len) do {					\
+	switch (len) {							\
+	case '?':							\
+		val = va_arg(ap, int);					\
+		break;							\
+	case '?' | 0x80:						\
+		val = va_arg(ap, unsigned int);				\
+		break;							\
+	case 'l':							\
+		val = va_arg(ap, long);					\
+		break;							\
+	case 'l' | 0x80:						\
+		val = va_arg(ap, unsigned long);			\
+		break;							\
+	case 'q':							\
+		val = va_arg(ap, long long);				\
+		break;							\
+	case 'q' | 0x80:						\
+		val = va_arg(ap, unsigned long long);			\
+		break;							\
+	case 'j':							\
+		val = va_arg(ap, intmax_t);				\
+		break;							\
+	case 't':							\
+		val = va_arg(ap, ptrdiff_t);				\
+		break;							\
+	case 'z':							\
+		val = va_arg(ap, ssize_t);				\
+		break;							\
+	case 'z' | 0x80:						\
+		val = va_arg(ap, size_t);				\
+		break;							\
+	case 'p': /* Synthetic; used for %p. */				\
+		val = va_arg(ap, uintptr_t);				\
+		break;							\
+	default: not_reached();						\
+	}								\
+} while (0)
+
+	i = 0;
+	f = format;
+	while (true) {
+		switch (*f) {
+		case '\0': goto label_out;
+		case '%': {
+			bool alt_form = false;
+			bool left_justify = false;
+			bool plus_space = false;
+			bool plus_plus = false;
+			int prec = -1;
+			int width = -1;
+			unsigned char len = '?';
+
+			f++;
+			if (*f == '%') {
+				/* %% */
+				APPEND_C(*f);
+				break;
+			}
+			/* Flags. */
+			while (true) {
+				switch (*f) {
+				case '#':
+					assert(alt_form == false);
+					alt_form = true;
+					break;
+				case '-':
+					assert(left_justify == false);
+					left_justify = true;
+					break;
+				case ' ':
+					assert(plus_space == false);
+					plus_space = true;
+					break;
+				case '+':
+					assert(plus_plus == false);
+					plus_plus = true;
+					break;
+				default: goto label_width;
+				}
+				f++;
+			}
+			/* Width. */
+			label_width:
+			switch (*f) {
+			case '*':
+				width = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uwidth;
+				set_errno(0);
+				uwidth = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uwidth != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				width = (int)uwidth;
+				if (*f == '.') {
+					f++;
+					goto label_precision;
+				} else
+					goto label_length;
+				break;
+			} case '.':
+				f++;
+				goto label_precision;
+			default: goto label_length;
+			}
+			/* Precision. */
+			label_precision:
+			switch (*f) {
+			case '*':
+				prec = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uprec;
+				set_errno(0);
+				uprec = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uprec != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				prec = (int)uprec;
+				break;
+			}
+			default: break;
+			}
+			/* Length. */
+			label_length:
+			switch (*f) {
+			case 'l':
+				f++;
+				if (*f == 'l') {
+					len = 'q';
+					f++;
+				} else
+					len = 'l';
+				break;
+			case 'j':
+				len = 'j';
+				f++;
+				break;
+			case 't':
+				len = 't';
+				f++;
+				break;
+			case 'z':
+				len = 'z';
+				f++;
+				break;
+			default: break;
+			}
+			/* Conversion specifier. */
+			switch (*f) {
+				char *s;
+				size_t slen;
+			case 'd': case 'i': {
+				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[D2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = d2s(val, (plus_plus ? '+' : (plus_space ?
+				    ' ' : '-')), buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'o': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[O2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = o2s(val, alt_form, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'u': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[U2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = u2s(val, 10, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'x': case 'X': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = x2s(val, alt_form, *f == 'X', buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'c': {
+				unsigned char val;
+				char buf[2];
+
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				val = va_arg(ap, int);
+				buf[0] = val;
+				buf[1] = '\0';
+				APPEND_PADDED_S(buf, 1, width, left_justify);
+				f++;
+				break;
+			} case 's':
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				s = va_arg(ap, char *);
+				slen = (prec == -1) ? strlen(s) : prec;
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			case 'p': {
+				uintmax_t val;
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, 'p');
+				s = x2s(val, true, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			}
+			default: not_implemented();
+			}
+			break;
+		} default: {
+			APPEND_C(*f);
+			f++;
+			break;
+		}}
+	}
+	label_out:
+	if (i < size)
+		str[i] = '\0';
+	else
+		str[size - 1] = '\0';
+	ret = i;
+
+#undef APPEND_C
+#undef APPEND_S
+#undef APPEND_PADDED_S
+#undef GET_ARG_NUMERIC
+	return (ret);
+}
+
+JEMALLOC_ATTR(format(printf, 3, 4))
+int
+malloc_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = malloc_vsnprintf(str, size, format, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap)
+{
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
+		    wrtmessage;
+		cbopaque = NULL;
+	}
+
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/* Print to stderr in such a way as to avoid memory allocation. */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
--- a/src/rt/jemalloc/src/zone.c
+++ b/src/rt/jemalloc/src/zone.c
@ -0,0 +1,258 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/*
+ * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * We need to check whether it is present at runtime, thus the weak_import.
+ */
+extern malloc_zone_t *malloc_default_purgeable_zone(void)
+JEMALLOC_ATTR(weak_import);
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone;
+static struct malloc_introspection_t zone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static void	*zone_malloc(malloc_zone_t *zone, size_t size);
+static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void	*zone_valloc(malloc_zone_t *zone, size_t size);
+static void	zone_free(malloc_zone_t *zone, void *ptr);
+static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 5)
+static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
+    size_t size);
+static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	*zone_destroy(malloc_zone_t *zone);
+static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static void	zone_force_lock(malloc_zone_t *zone);
+static void	zone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+	/*
+	 * There appear to be places within Darwin (such as setenv(3)) that
+	 * cause calls to this function with pointers that *no* zone owns.  If
+	 * we knew that all pointers were owned by *some* zone, we could split
+	 * our zone into two parts, and use one as the default allocator and
+	 * the other as the default deallocator/reallocator.  Since that will
+	 * not work in practice, we must check all pointers to assure that they
+	 * reside within a mapped chunk before determining size.
+	 */
+	return (ivsalloc(ptr, config_prof));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+	return (je_malloc(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+	return (je_calloc(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	je_posix_memalign(&ret, PAGE, size);
+
+	return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	if (ivsalloc(ptr, config_prof) != 0) {
+		je_free(ptr);
+		return;
+	}
+
+	free(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	if (ivsalloc(ptr, config_prof) != 0)
+		return (je_realloc(ptr, size));
+
+	return (realloc(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 5)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	je_posix_memalign(&ret, alignment, size);
+
+	return (ret);
+}
+#endif
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	if (ivsalloc(ptr, config_prof) != 0) {
+		assert(ivsalloc(ptr, config_prof) == size);
+		je_free(ptr);
+		return;
+	}
+
+	free(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+	/* This function should never be called. */
+	assert(false);
+	return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+
+	if (size == 0)
+		size = 1;
+	return (s2u(size));
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_postfork_parent();
+}
+
+JEMALLOC_ATTR(constructor)
+void
+register_zone(void)
+{
+
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	malloc_zone_t *default_zone = malloc_default_zone();
+	if (!default_zone->zone_name ||
+	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
+		return;
+	}
+
+	zone.size = (void *)zone_size;
+	zone.malloc = (void *)zone_malloc;
+	zone.calloc = (void *)zone_calloc;
+	zone.valloc = (void *)zone_valloc;
+	zone.free = (void *)zone_free;
+	zone.realloc = (void *)zone_realloc;
+	zone.destroy = (void *)zone_destroy;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 5)
+	zone.memalign = zone_memalign;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone.free_definite_size = zone_free_definite_size;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 8)
+	zone.pressure_relief = NULL;
+#endif
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = (void *)zone_good_size;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = (void *)zone_force_lock;
+	zone_introspect.force_unlock = (void *)zone_force_unlock;
+	zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone_introspect.zone_locked = NULL;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 7)
+	zone_introspect.enable_discharge_checking = NULL;
+	zone_introspect.disable_discharge_checking = NULL;
+	zone_introspect.discharge = NULL;
+#ifdef __BLOCKS__
+	zone_introspect.enumerate_discharged_pointers = NULL;
+#else
+	zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#endif
+#endif
+
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	if (malloc_default_purgeable_zone != NULL)
+		malloc_default_purgeable_zone();
+
+	/* Register the custom zone.  At this point it won't be the default. */
+	malloc_zone_register(&zone);
+
+	/*
+	 * Unregister and reregister the default zone.  On OSX >= 10.6,
+	 * unregistering takes the last registered zone and places it at the
+	 * location of the specified zone.  Unregistering the default zone thus
+	 * makes the last registered one the default.  On OSX < 10.6,
+	 * unregistering shifts all registered zones.  The first registered zone
+	 * then becomes the default.
+	 */
+	do {
+		default_zone = malloc_default_zone();
+		malloc_zone_unregister(default_zone);
+		malloc_zone_register(default_zone);
+	} while (malloc_default_zone() != &zone);
+}
--- a/src/rt/jemalloc/test/ALLOCM_ARENA.c
+++ b/src/rt/jemalloc/test/ALLOCM_ARENA.c
@ -0,0 +1,67 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define	NTHREADS 10
+
+void *
+je_thread_start(void *arg)
+{
+	unsigned thread_ind = (unsigned)(uintptr_t)arg;
+	unsigned arena_ind;
+	int r;
+	void *p;
+	size_t rsz, sz;
+
+	sz = sizeof(arena_ind);
+	if (mallctl("arenas.extend", &arena_ind, &sz, NULL, 0)
+	    != 0) {
+		malloc_printf("Error in arenas.extend\n");
+		abort();
+	}
+
+	if (thread_ind % 4 != 3) {
+		size_t mib[3];
+		size_t miblen = sizeof(mib) / sizeof(size_t);
+		const char *dss_precs[] = {"disabled", "primary", "secondary"};
+		const char *dss = dss_precs[thread_ind % 4];
+		if (mallctlnametomib("arena.0.dss", mib, &miblen) != 0) {
+			malloc_printf("Error in mallctlnametomib()\n");
+			abort();
+		}
+		mib[1] = arena_ind;
+		if (mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
+		    sizeof(const char *))) {
+			malloc_printf("Error in mallctlbymib()\n");
+			abort();
+		}
+	}
+
+	r = allocm(&p, &rsz, 1, ALLOCM_ARENA(arena_ind));
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+	dallocm(p, 0);
+
+	return (NULL);
+}
+
+int
+main(void)
+{
+	je_thread_t threads[NTHREADS];
+	unsigned i;
+
+	malloc_printf("Test begin\n");
+
+	for (i = 0; i < NTHREADS; i++) {
+		je_thread_create(&threads[i], je_thread_start,
+		    (void *)(uintptr_t)i);
+	}
+
+	for (i = 0; i < NTHREADS; i++)
+		je_thread_join(threads[i], NULL);
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/ALLOCM_ARENA.exp
+++ b/src/rt/jemalloc/test/ALLOCM_ARENA.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/src/rt/jemalloc/test/aligned_alloc.c
+++ b/src/rt/jemalloc/test/aligned_alloc.c
@ -0,0 +1,119 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
+#define NITER 4
+
+int
+main(void)
+{
+	size_t alignment, size, total;
+	unsigned i;
+	void *p, *ps[NITER];
+
+	malloc_printf("Test begin\n");
+
+	/* Test error conditions. */
+	alignment = 0;
+	set_errno(0);
+	p = aligned_alloc(alignment, 1);
+	if (p != NULL || get_errno() != EINVAL) {
+		malloc_printf(
+		    "Expected error for invalid alignment %zu\n", alignment);
+	}
+
+	for (alignment = sizeof(size_t); alignment < MAXALIGN;
+	    alignment <<= 1) {
+		set_errno(0);
+		p = aligned_alloc(alignment + 1, 1);
+		if (p != NULL || get_errno() != EINVAL) {
+			malloc_printf(
+			    "Expected error for invalid alignment %zu\n",
+			    alignment + 1);
+		}
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x8000000000000000);
+	size      = UINT64_C(0x8000000000000000);
+#else
+	alignment = 0x80000000LU;
+	size      = 0x80000000LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(%zu, %zu)\n",
+		    alignment, size);
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x4000000000000000);
+	size      = UINT64_C(0x8400000000000001);
+#else
+	alignment = 0x40000000LU;
+	size      = 0x84000001LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(%zu, %zu)\n",
+		    alignment, size);
+	}
+
+	alignment = 0x10LU;
+#if LG_SIZEOF_PTR == 3
+	size = UINT64_C(0xfffffffffffffff0);
+#else
+	size = 0xfffffff0LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		malloc_printf("Alignment: %zu\n", alignment);
+		for (size = 1;
+		    size < 3 * alignment && size < (1U << 31);
+		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				ps[i] = aligned_alloc(alignment, size);
+				if (ps[i] == NULL) {
+					char buf[BUFERROR_BUF];
+
+					buferror(buf, sizeof(buf));
+					malloc_printf(
+					    "Error for size %zu (%#zx): %s\n",
+					    size, size, buf);
+					exit(1);
+				}
+				total += malloc_usable_size(ps[i]);
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					free(ps[i]);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/aligned_alloc.exp
+++ b/src/rt/jemalloc/test/aligned_alloc.exp
@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
--- a/src/rt/jemalloc/test/allocated.c
+++ b/src/rt/jemalloc/test/allocated.c
@ -0,0 +1,118 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+je_thread_start(void *arg)
+{
+	int err;
+	void *p;
+	uint64_t a0, a1, d0, d1;
+	uint64_t *ap0, *ap1, *dp0, *dp1;
+	size_t sz, usize;
+
+	sz = sizeof(a0);
+	if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto label_return;
+		}
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(ap0);
+	if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto label_return;
+		}
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*ap0 == a0);
+
+	sz = sizeof(d0);
+	if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto label_return;
+		}
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(dp0);
+	if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto label_return;
+		}
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*dp0 == d0);
+
+	p = malloc(1);
+	if (p == NULL) {
+		malloc_printf("%s(): Error in malloc()\n", __func__);
+		exit(1);
+	}
+
+	sz = sizeof(a1);
+	mallctl("thread.allocated", &a1, &sz, NULL, 0);
+	sz = sizeof(ap1);
+	mallctl("thread.allocatedp", &ap1, &sz, NULL, 0);
+	assert(*ap1 == a1);
+	assert(ap0 == ap1);
+
+	usize = malloc_usable_size(p);
+	assert(a0 + usize <= a1);
+
+	free(p);
+
+	sz = sizeof(d1);
+	mallctl("thread.deallocated", &d1, &sz, NULL, 0);
+	sz = sizeof(dp1);
+	mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	assert(*dp1 == d1);
+	assert(dp0 == dp1);
+
+	assert(d0 + usize <= d1);
+
+label_return:
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	je_thread_t thread;
+
+	malloc_printf("Test begin\n");
+
+	je_thread_start(NULL);
+
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
+
+	je_thread_start(NULL);
+
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
+
+	je_thread_start(NULL);
+
+	malloc_printf("Test end\n");
+	return (ret);
+}
--- a/src/rt/jemalloc/test/allocated.exp
+++ b/src/rt/jemalloc/test/allocated.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/src/rt/jemalloc/test/allocm.c
+++ b/src/rt/jemalloc/test/allocm.c
@ -0,0 +1,194 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
+#define NITER 4
+
+int
+main(void)
+{
+	int r;
+	void *p;
+	size_t nsz, rsz, sz, alignment, total;
+	unsigned i;
+	void *ps[NITER];
+
+	malloc_printf("Test begin\n");
+
+	sz = 42;
+	nsz = 0;
+	r = nallocm(&nsz, sz, 0);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected nallocm() error\n");
+		abort();
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, 0);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+	if (rsz < sz)
+		malloc_printf("Real size smaller than expected\n");
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
+
+	r = allocm(&p, NULL, sz, 0);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
+
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ZERO);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected nallocm() error\n");
+		abort();
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ZERO);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x8000000000000000);
+	sz        = UINT64_C(0x8000000000000000);
+#else
+	alignment = 0x80000000LU;
+	sz        = 0x80000000LU;
+#endif
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x4000000000000000);
+	sz        = UINT64_C(0x8400000000000001);
+#else
+	alignment = 0x40000000LU;
+	sz        = 0x84000001LU;
+#endif
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected nallocm() error\n");
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+	alignment = 0x10LU;
+#if LG_SIZEOF_PTR == 3
+	sz = UINT64_C(0xfffffffffffffff0);
+#else
+	sz = 0xfffffff0LU;
+#endif
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		malloc_printf("Alignment: %zu\n", alignment);
+		for (sz = 1;
+		    sz < 3 * alignment && sz < (1U << 31);
+		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				nsz = 0;
+				r = nallocm(&nsz, sz,
+				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+				if (r != ALLOCM_SUCCESS) {
+					malloc_printf(
+					    "nallocm() error for size %zu"
+					    " (%#zx): %d\n",
+					    sz, sz, r);
+					exit(1);
+				}
+				rsz = 0;
+				r = allocm(&ps[i], &rsz, sz,
+				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+				if (r != ALLOCM_SUCCESS) {
+					malloc_printf(
+					    "allocm() error for size %zu"
+					    " (%#zx): %d\n",
+					    sz, sz, r);
+					exit(1);
+				}
+				if (rsz < sz) {
+					malloc_printf(
+					    "Real size smaller than"
+					    " expected\n");
+				}
+				if (nsz != rsz) {
+					malloc_printf(
+					    "nallocm()/allocm() rsize"
+					    " mismatch\n");
+				}
+				if ((uintptr_t)p & (alignment-1)) {
+					malloc_printf(
+					    "%p inadequately aligned for"
+					    " alignment: %zu\n", p, alignment);
+				}
+				sallocm(ps[i], &rsz, 0);
+				total += rsz;
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					dallocm(ps[i], 0);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/allocm.exp
+++ b/src/rt/jemalloc/test/allocm.exp
@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
--- a/src/rt/jemalloc/test/bitmap.c
+++ b/src/rt/jemalloc/test/bitmap.c
@ -0,0 +1,153 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#if (LG_BITMAP_MAXBITS > 12)
+#  define MAXBITS	4500
+#else
+#  define MAXBITS	(1U << LG_BITMAP_MAXBITS)
+#endif
+
+static void
+test_bitmap_size(void)
+{
+	size_t i, prev_size;
+
+	prev_size = 0;
+	for (i = 1; i <= MAXBITS; i++) {
+		size_t size = bitmap_size(i);
+		assert(size >= prev_size);
+		prev_size = size;
+	}
+}
+
+static void
+test_bitmap_init(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				assert(bitmap_get(bitmap, &binfo, j) == false);
+			free(bitmap);
+
+		}
+	}
+}
+
+static void
+test_bitmap_set(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
+		}
+	}
+}
+
+static void
+test_bitmap_unset(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+			for (j = 0; j < i; j++)
+				bitmap_unset(bitmap, &binfo, j);
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
+		}
+	}
+}
+
+static void
+test_bitmap_sfu(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			ssize_t j;
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
+			bitmap_init(bitmap, &binfo);
+
+			/* Iteratively set bits starting at the beginning. */
+			for (j = 0; j < i; j++)
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+			assert(bitmap_full(bitmap, &binfo));
+
+			/*
+			 * Iteratively unset bits starting at the end, and
+			 * verify that bitmap_sfu() reaches the unset bits.
+			 */
+			for (j = i - 1; j >= 0; j--) {
+				bitmap_unset(bitmap, &binfo, j);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_get(bitmap, &binfo, 0) == false);
+
+			/*
+			 * Iteratively set bits starting at the beginning, and
+			 * verify that bitmap_sfu() looks past them.
+			 */
+			for (j = 1; j < i; j++) {
+				bitmap_set(bitmap, &binfo, j - 1);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_sfu(bitmap, &binfo) == i - 1);
+			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
+		}
+	}
+}
+
+int
+main(void)
+{
+	malloc_printf("Test begin\n");
+
+	test_bitmap_size();
+	test_bitmap_init();
+	test_bitmap_set();
+	test_bitmap_unset();
+	test_bitmap_sfu();
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/bitmap.exp
+++ b/src/rt/jemalloc/test/bitmap.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/src/rt/jemalloc/test/jemalloc_test.h.in
+++ b/src/rt/jemalloc/test/jemalloc_test.h.in
@ -0,0 +1,53 @@
+/*
+ * This header should be included by tests, rather than directly including
+ * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to
+ * have a different name.
+ */
+#include "jemalloc/jemalloc@install_suffix@.h"
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/* Abstraction layer for threading in tests */
+#ifdef _WIN32
+#include <windows.h>
+
+typedef HANDLE je_thread_t;
+
+void
+je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg)
+{
+	LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc;
+	*thread = CreateThread(NULL, 0, routine, arg, 0, NULL);
+	if (*thread == NULL) {
+		malloc_printf("Error in CreateThread()\n");
+		exit(1);
+	}
+}
+
+void
+je_thread_join(je_thread_t thread, void **ret)
+{
+	WaitForSingleObject(thread, INFINITE);
+}
+
+#else
+#include <pthread.h>
+
+typedef pthread_t je_thread_t;
+
+void
+je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg)
+{
+
+	if (pthread_create(thread, NULL, proc, arg) != 0) {
+		malloc_printf("Error in pthread_create()\n");
+		exit(1);
+	}
+}
+
+void
+je_thread_join(je_thread_t thread, void **ret)
+{
+
+	pthread_join(thread, ret);
+}
+#endif
--- a/src/rt/jemalloc/test/mremap.c
+++ b/src/rt/jemalloc/test/mremap.c
@ -0,0 +1,60 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	int ret, err;
+	size_t sz, lg_chunk, chunksize, i;
+	char *p, *q;
+
+	malloc_printf("Test begin\n");
+
+	sz = sizeof(lg_chunk);
+	if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) {
+		assert(err != ENOENT);
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto label_return;
+	}
+	chunksize = ((size_t)1U) << lg_chunk;
+
+	p = (char *)malloc(chunksize);
+	if (p == NULL) {
+		malloc_printf("malloc(%zu) --> %p\n", chunksize, p);
+		ret = 1;
+		goto label_return;
+	}
+	memset(p, 'a', chunksize);
+
+	q = (char *)realloc(p, chunksize * 2);
+	if (q == NULL) {
+		malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2,
+		    q);
+		ret = 1;
+		goto label_return;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	p = q;
+
+	q = (char *)realloc(p, chunksize);
+	if (q == NULL) {
+		malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q);
+		ret = 1;
+		goto label_return;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	free(q);
+
+	ret = 0;
+label_return:
+	malloc_printf("Test end\n");
+	return (ret);
+}
--- a/src/rt/jemalloc/test/mremap.exp
+++ b/src/rt/jemalloc/test/mremap.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/src/rt/jemalloc/test/posix_memalign.c
+++ b/src/rt/jemalloc/test/posix_memalign.c
@ -0,0 +1,115 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
+#define NITER 4
+
+int
+main(void)
+{
+	size_t alignment, size, total;
+	unsigned i;
+	int err;
+	void *p, *ps[NITER];
+
+	malloc_printf("Test begin\n");
+
+	/* Test error conditions. */
+	for (alignment = 0; alignment < sizeof(void *); alignment++) {
+		err = posix_memalign(&p, alignment, 1);
+		if (err != EINVAL) {
+			malloc_printf(
+			    "Expected error for invalid alignment %zu\n",
+			    alignment);
+		}
+	}
+
+	for (alignment = sizeof(size_t); alignment < MAXALIGN;
+	    alignment <<= 1) {
+		err = posix_memalign(&p, alignment + 1, 1);
+		if (err == 0) {
+			malloc_printf(
+			    "Expected error for invalid alignment %zu\n",
+			    alignment + 1);
+		}
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x8000000000000000);
+	size      = UINT64_C(0x8000000000000000);
+#else
+	alignment = 0x80000000LU;
+	size      = 0x80000000LU;
+#endif
+	err = posix_memalign(&p, alignment, size);
+	if (err == 0) {
+		malloc_printf(
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x4000000000000000);
+	size      = UINT64_C(0x8400000000000001);
+#else
+	alignment = 0x40000000LU;
+	size      = 0x84000001LU;
+#endif
+	err = posix_memalign(&p, alignment, size);
+	if (err == 0) {
+		malloc_printf(
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	alignment = 0x10LU;
+#if LG_SIZEOF_PTR == 3
+	size = UINT64_C(0xfffffffffffffff0);
+#else
+	size = 0xfffffff0LU;
+#endif
+	err = posix_memalign(&p, alignment, size);
+	if (err == 0) {
+		malloc_printf(
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		malloc_printf("Alignment: %zu\n", alignment);
+		for (size = 1;
+		    size < 3 * alignment && size < (1U << 31);
+		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				err = posix_memalign(&ps[i],
+				    alignment, size);
+				if (err) {
+					malloc_printf(
+					    "Error for size %zu (%#zx): %s\n",
+					    size, size, strerror(err));
+					exit(1);
+				}
+				total += malloc_usable_size(ps[i]);
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					free(ps[i]);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/posix_memalign.exp
+++ b/src/rt/jemalloc/test/posix_memalign.exp
@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
--- a/src/rt/jemalloc/test/rallocm.c
+++ b/src/rt/jemalloc/test/rallocm.c
@ -0,0 +1,127 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	size_t pagesize;
+	void *p, *q;
+	size_t sz, tsz;
+	int r;
+
+	malloc_printf("Test begin\n");
+
+	/* Get page size. */
+	{
+#ifdef _WIN32
+		SYSTEM_INFO si;
+		GetSystemInfo(&si);
+		pagesize = (size_t)si.dwPageSize;
+#else
+		long result = sysconf(_SC_PAGESIZE);
+		assert(result != -1);
+		pagesize = (size_t)result;
+#endif
+	}
+
+	r = allocm(&p, &sz, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+
+	q = p;
+	r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q != p)
+		malloc_printf("Unexpected object move\n");
+	if (tsz != sz) {
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q != p)
+		malloc_printf("Unexpected object move\n");
+	if (tsz != sz) {
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_ERR_NOT_MOVED)
+		malloc_printf("Unexpected rallocm() result\n");
+	if (q != p)
+		malloc_printf("Unexpected object move\n");
+	if (tsz != sz) {
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = rallocm(&q, &tsz, sz + 5, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q == p)
+		malloc_printf("Expected object move\n");
+	if (tsz == sz) {
+		malloc_printf("Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = rallocm(&q, &tsz, pagesize*2, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q == p)
+		malloc_printf("Expected object move\n");
+	if (tsz == sz) {
+		malloc_printf("Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = rallocm(&q, &tsz, pagesize*4, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (tsz == sz) {
+		malloc_printf("Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q != p)
+		malloc_printf("Unexpected object move\n");
+	if (tsz == sz) {
+		malloc_printf("Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected rallocm() error\n");
+	if (q != p)
+		malloc_printf("Unexpected object move\n");
+	if (tsz == sz) {
+		malloc_printf("Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	dallocm(p, 0);
+
+	malloc_printf("Test end\n");
+	return (0);
+}
--- a/src/rt/jemalloc/test/rallocm.exp
+++ b/src/rt/jemalloc/test/rallocm.exp
@ -0,0 +1,2 @@
+Test begin
+Test end
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`3.3.1-0-g9ef9d9e8c271cdf14f664b871a8f98c827714784`