From 12a68e6af31a32c52785298f036543caa96d24d3 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 6 Feb 2016 22:56:25 -0800 Subject: [PATCH 1/3] rustc: Ensure FNV hashing is inlined across crates Right now the primary hashing algorithm of the compiler isn't actually inlined across crates, meaning that it may be missing out on some crucial optimizations in a few places (perhaps unrolling smaller loops, etc). This commit made the hashing function disappear from a profiled version of the compiler, but that's likely because it was just inlined elsewhere. When compiling winapi, however, this decreased compile time from 18.3 to 17.8 seconds (a 3% improvement). --- src/librustc_data_structures/fnv.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/librustc_data_structures/fnv.rs b/src/librustc_data_structures/fnv.rs index 6f4dc28e122..da5f9f20892 100644 --- a/src/librustc_data_structures/fnv.rs +++ b/src/librustc_data_structures/fnv.rs @@ -35,10 +35,12 @@ pub fn FnvHashSet() -> FnvHashSet { pub struct FnvHasher(u64); impl Default for FnvHasher { + #[inline] fn default() -> FnvHasher { FnvHasher(0xcbf29ce484222325) } } impl Hasher for FnvHasher { + #[inline] fn write(&mut self, bytes: &[u8]) { let FnvHasher(mut hash) = *self; for byte in bytes { @@ -47,5 +49,7 @@ impl Hasher for FnvHasher { } *self = FnvHasher(hash); } + + #[inline] fn finish(&self) -> u64 { self.0 } } From cc719d2d7d1967b92e38b1dec6d19f10c5b42891 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 11 Feb 2016 11:06:31 -0800 Subject: [PATCH 2/3] trans: Don't link whole rlibs to executables Back in 9bc8e6d14 the linking of rlibs changed to using the `link_whole_rlib` function. This change, however was only intended to affect dylibs, not executables. For executables we don't actually want to link entire rlibs because we want the linker to strip out as much as possible. This commit adds a conditional to this logic to only link entire rlibs if we're creating a dylib, and otherwise an executable just links an rlib as usual. A test is included which will fail to link if this behavior is reverted. --- src/librustc_trans/back/link.rs | 6 ++++- .../run-make/lto-no-link-whole-rlib/Makefile | 18 +++++++++++++++ .../run-make/lto-no-link-whole-rlib/bar.c | 13 +++++++++++ .../run-make/lto-no-link-whole-rlib/foo.c | 13 +++++++++++ .../run-make/lto-no-link-whole-rlib/lib1.rs | 20 ++++++++++++++++ .../run-make/lto-no-link-whole-rlib/lib2.rs | 23 +++++++++++++++++++ .../run-make/lto-no-link-whole-rlib/main.rs | 17 ++++++++++++++ 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 src/test/run-make/lto-no-link-whole-rlib/Makefile create mode 100644 src/test/run-make/lto-no-link-whole-rlib/bar.c create mode 100644 src/test/run-make/lto-no-link-whole-rlib/foo.c create mode 100644 src/test/run-make/lto-no-link-whole-rlib/lib1.rs create mode 100644 src/test/run-make/lto-no-link-whole-rlib/lib2.rs create mode 100644 src/test/run-make/lto-no-link-whole-rlib/main.rs diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 69a70cdf144..33734d615a6 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -1253,7 +1253,11 @@ fn add_upstream_rust_crates(cmd: &mut Linker, sess: &Session, if any_objects { archive.build(); - cmd.link_whole_rlib(&fix_windows_verbatim_for_gcc(&dst)); + if dylib { + cmd.link_whole_rlib(&fix_windows_verbatim_for_gcc(&dst)); + } else { + cmd.link_rlib(&fix_windows_verbatim_for_gcc(&dst)); + } } }); } diff --git a/src/test/run-make/lto-no-link-whole-rlib/Makefile b/src/test/run-make/lto-no-link-whole-rlib/Makefile new file mode 100644 index 00000000000..1d45cb413c5 --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/Makefile @@ -0,0 +1,18 @@ +# Copyright 2016 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +-include ../tools.mk + +all: $(call NATIVE_STATICLIB,foo) $(call NATIVE_STATICLIB,bar) + $(RUSTC) lib1.rs + $(RUSTC) lib2.rs + $(RUSTC) main.rs -Clto + $(call RUN,main) + diff --git a/src/test/run-make/lto-no-link-whole-rlib/bar.c b/src/test/run-make/lto-no-link-whole-rlib/bar.c new file mode 100644 index 00000000000..716d1abcf34 --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/bar.c @@ -0,0 +1,13 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +int foo() { + return 2; +} diff --git a/src/test/run-make/lto-no-link-whole-rlib/foo.c b/src/test/run-make/lto-no-link-whole-rlib/foo.c new file mode 100644 index 00000000000..1b36874581a --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/foo.c @@ -0,0 +1,13 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +int foo() { + return 1; +} diff --git a/src/test/run-make/lto-no-link-whole-rlib/lib1.rs b/src/test/run-make/lto-no-link-whole-rlib/lib1.rs new file mode 100644 index 00000000000..0a87c8e4725 --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/lib1.rs @@ -0,0 +1,20 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![crate_type = "rlib"] + +#[link(name = "foo", kind = "static")] +extern { + fn foo() -> i32; +} + +pub fn foo1() -> i32 { + unsafe { foo() } +} diff --git a/src/test/run-make/lto-no-link-whole-rlib/lib2.rs b/src/test/run-make/lto-no-link-whole-rlib/lib2.rs new file mode 100644 index 00000000000..6e3f382b3fd --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/lib2.rs @@ -0,0 +1,23 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![crate_type = "rlib"] + +extern crate lib1; + +#[link(name = "bar", kind = "static")] +extern { + fn foo() -> i32; +} + +pub fn foo2() -> i32 { + unsafe { foo() } +} + diff --git a/src/test/run-make/lto-no-link-whole-rlib/main.rs b/src/test/run-make/lto-no-link-whole-rlib/main.rs new file mode 100644 index 00000000000..8417af63be9 --- /dev/null +++ b/src/test/run-make/lto-no-link-whole-rlib/main.rs @@ -0,0 +1,17 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate lib1; +extern crate lib2; + +fn main() { + assert_eq!(lib1::foo1(), 2); + assert_eq!(lib2::foo2(), 2); +} From e3b414d8612314e74e2b0ebde1ed5c6997d28e8d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 6 Feb 2016 22:54:35 -0800 Subject: [PATCH 3/3] std: Stop prefixing jemalloc symbols Now that we properly only link in jemalloc when building executables, we have far less to worry about in terms of polluting the global namespace with the `free` and `malloc` symbols on Linux. This commit will primarily allow LLVM to use jemalloc so the compiler will only be using one allocator overall. Locally this took compile time for libsyntax from 95 seconds to 89 (a 6% improvement). --- mk/rt.mk | 12 ++++++++++-- src/liballoc_jemalloc/build.rs | 17 ++++++++++++++--- src/liballoc_jemalloc/lib.rs | 33 +++++++++++++++++++++++---------- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/mk/rt.mk b/mk/rt.mk index cfb210952bc..bd17490955d 100644 --- a/mk/rt.mk +++ b/mk/rt.mk @@ -148,7 +148,15 @@ ifeq ($$(CFG_WINDOWSY_$(1)),1) else ifeq ($(OSTYPE_$(1)), apple-ios) JEMALLOC_ARGS_$(1) := --disable-tls else ifeq ($(findstring android, $(OSTYPE_$(1))), android) - JEMALLOC_ARGS_$(1) := --disable-tls + # We force android to have prefixed symbols because apparently replacement of + # the libc allocator doesn't quite work. When this was tested (unprefixed + # symbols), it was found that the `realpath` function in libc would allocate + # with libc malloc (not jemalloc malloc), and then the standard library would + # free with jemalloc free, causing a segfault. + # + # If the test suite passes, however, without symbol prefixes then we should be + # good to go! + JEMALLOC_ARGS_$(1) := --disable-tls --with-jemalloc-prefix=je_ endif ifdef CFG_ENABLE_DEBUG_JEMALLOC @@ -186,7 +194,7 @@ JEMALLOC_LOCAL_$(1) := $$(JEMALLOC_BUILD_DIR_$(1))/lib/$$(JEMALLOC_REAL_NAME_$(1 $$(JEMALLOC_LOCAL_$(1)): $$(JEMALLOC_DEPS) $$(MKFILE_DEPS) @$$(call E, make: jemalloc) cd "$$(JEMALLOC_BUILD_DIR_$(1))"; "$(S)src/jemalloc/configure" \ - $$(JEMALLOC_ARGS_$(1)) --with-jemalloc-prefix=je_ $(CFG_JEMALLOC_FLAGS) \ + $$(JEMALLOC_ARGS_$(1)) $(CFG_JEMALLOC_FLAGS) \ --build=$$(CFG_GNU_TRIPLE_$(CFG_BUILD)) --host=$$(CFG_GNU_TRIPLE_$(1)) \ CC="$$(CC_$(1)) $$(CFG_JEMALLOC_CFLAGS_$(1))" \ AR="$$(AR_$(1))" \ diff --git a/src/liballoc_jemalloc/build.rs b/src/liballoc_jemalloc/build.rs index 4bc752af48e..c9508322a31 100644 --- a/src/liballoc_jemalloc/build.rs +++ b/src/liballoc_jemalloc/build.rs @@ -50,7 +50,7 @@ fn main() { .env("AR", &ar) .env("RANLIB", format!("{} s", ar.display())); - if target.contains("windows-gnu") { + if target.contains("windows") { // A bit of history here, this used to be --enable-lazy-lock added in // #14006 which was filed with jemalloc in jemalloc/jemalloc#83 which // was also reported to MinGW: @@ -72,7 +72,19 @@ fn main() { // locking, but requires passing an option due to a historical // default with jemalloc. cmd.arg("--disable-lazy-lock"); - } else if target.contains("ios") || target.contains("android") { + } else if target.contains("ios") { + cmd.arg("--disable-tls"); + } else if target.contains("android") { + // We force android to have prefixed symbols because apparently + // replacement of the libc allocator doesn't quite work. When this was + // tested (unprefixed symbols), it was found that the `realpath` + // function in libc would allocate with libc malloc (not jemalloc + // malloc), and then the standard library would free with jemalloc free, + // causing a segfault. + // + // If the test suite passes, however, without symbol prefixes then we + // should be good to go! + cmd.arg("--with-jemalloc-prefix=je_"); cmd.arg("--disable-tls"); } @@ -82,7 +94,6 @@ fn main() { // Turn off broken quarantine (see jemalloc/jemalloc#161) cmd.arg("--disable-fill"); - cmd.arg("--with-jemalloc-prefix=je_"); cmd.arg(format!("--host={}", build_helper::gnu_target(&target))); cmd.arg(format!("--build={}", build_helper::gnu_target(&host))); diff --git a/src/liballoc_jemalloc/lib.rs b/src/liballoc_jemalloc/lib.rs index 2c46e37ac32..bda001eb4f4 100644 --- a/src/liballoc_jemalloc/lib.rs +++ b/src/liballoc_jemalloc/lib.rs @@ -41,12 +41,25 @@ use libc::{c_int, c_void, size_t}; #[cfg(not(cargobuild))] extern {} +// Note that the symbols here are prefixed by default on OSX (we don't +// explicitly request it), and on Android we explicitly request it as +// unprefixing cause segfaults (mismatches in allocators). extern { - fn je_mallocx(size: size_t, flags: c_int) -> *mut c_void; - fn je_rallocx(ptr: *mut c_void, size: size_t, flags: c_int) -> *mut c_void; - fn je_xallocx(ptr: *mut c_void, size: size_t, extra: size_t, flags: c_int) -> size_t; - fn je_sdallocx(ptr: *mut c_void, size: size_t, flags: c_int); - fn je_nallocx(size: size_t, flags: c_int) -> size_t; + #[cfg_attr(any(target_os = "macos", target_os = "android"), + link_name = "je_mallocx")] + fn mallocx(size: size_t, flags: c_int) -> *mut c_void; + #[cfg_attr(any(target_os = "macos", target_os = "android"), + link_name = "je_rallocx")] + fn rallocx(ptr: *mut c_void, size: size_t, flags: c_int) -> *mut c_void; + #[cfg_attr(any(target_os = "macos", target_os = "android"), + link_name = "je_xallocx")] + fn xallocx(ptr: *mut c_void, size: size_t, extra: size_t, flags: c_int) -> size_t; + #[cfg_attr(any(target_os = "macos", target_os = "android"), + link_name = "je_sdallocx")] + fn sdallocx(ptr: *mut c_void, size: size_t, flags: c_int); + #[cfg_attr(any(target_os = "macos", target_os = "android"), + link_name = "je_nallocx")] + fn nallocx(size: size_t, flags: c_int) -> size_t; } // The minimum alignment guaranteed by the architecture. This value is used to @@ -78,7 +91,7 @@ fn align_to_flags(align: usize) -> c_int { #[no_mangle] pub extern "C" fn __rust_allocate(size: usize, align: usize) -> *mut u8 { let flags = align_to_flags(align); - unsafe { je_mallocx(size as size_t, flags) as *mut u8 } + unsafe { mallocx(size as size_t, flags) as *mut u8 } } #[no_mangle] @@ -88,7 +101,7 @@ pub extern "C" fn __rust_reallocate(ptr: *mut u8, align: usize) -> *mut u8 { let flags = align_to_flags(align); - unsafe { je_rallocx(ptr as *mut c_void, size as size_t, flags) as *mut u8 } + unsafe { rallocx(ptr as *mut c_void, size as size_t, flags) as *mut u8 } } #[no_mangle] @@ -98,19 +111,19 @@ pub extern "C" fn __rust_reallocate_inplace(ptr: *mut u8, align: usize) -> usize { let flags = align_to_flags(align); - unsafe { je_xallocx(ptr as *mut c_void, size as size_t, 0, flags) as usize } + unsafe { xallocx(ptr as *mut c_void, size as size_t, 0, flags) as usize } } #[no_mangle] pub extern "C" fn __rust_deallocate(ptr: *mut u8, old_size: usize, align: usize) { let flags = align_to_flags(align); - unsafe { je_sdallocx(ptr as *mut c_void, old_size as size_t, flags) } + unsafe { sdallocx(ptr as *mut c_void, old_size as size_t, flags) } } #[no_mangle] pub extern "C" fn __rust_usable_size(size: usize, align: usize) -> usize { let flags = align_to_flags(align); - unsafe { je_nallocx(size as size_t, flags) as usize } + unsafe { nallocx(size as size_t, flags) as usize } } // These symbols are used by jemalloc on android but the really old android