From a35a93f09cc111a53d00efc567ad678583dd5ac7 Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Thu, 17 Sep 2020 17:39:26 +0800 Subject: [PATCH] Pass tune-cpu to LLVM I think this is how it should work... --- compiler/rustc_codegen_llvm/src/attributes.rs | 15 +++++++++++++ compiler/rustc_codegen_llvm/src/context.rs | 3 ++- compiler/rustc_codegen_llvm/src/lib.rs | 3 +++ compiler/rustc_codegen_llvm/src/llvm_util.rs | 22 ++++++++++++++----- .../rustc_codegen_ssa/src/traits/backend.rs | 1 + compiler/rustc_interface/src/tests.rs | 1 + compiler/rustc_session/src/options.rs | 2 ++ src/doc/rustc/src/codegen-options/index.md | 20 +++++++++++++++-- src/test/codegen/tune-cpu-on-functions.rs | 21 ++++++++++++++++++ 9 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 src/test/codegen/tune-cpu-on-functions.rs diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 227a87ff819..434a42607d6 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { ); } +pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { + if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) { + let tune_cpu = SmallCStr::new(tune); + llvm::AddFunctionAttrStringValue( + llfn, + llvm::AttributePlace::Function, + const_cstr!("tune-cpu"), + tune_cpu.as_c_str(), + ); + } +} + /// Sets the `NonLazyBind` LLVM attribute on a given function, /// assuming the codegen options allow skipping the PLT. pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) { @@ -300,6 +312,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty:: // Without this, ThinLTO won't inline Rust functions into Clang generated // functions (because Clang annotates functions this way too). apply_target_cpu_attr(cx, llfn); + // tune-cpu is only conveyed through the attribute for our purpose. + // The target doesn't care; the subtarget reads our attribute. + apply_tune_cpu_attr(cx, llfn); let features = llvm_target_features(cx.tcx.sess) .map(|s| s.to_string()) diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 1c51a9df5d8..6e6b2cacc03 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> { } fn apply_target_cpu_attr(&self, llfn: &'ll Value) { - attributes::apply_target_cpu_attr(self, llfn) + attributes::apply_target_cpu_attr(self, llfn); + attributes::apply_tune_cpu_attr(self, llfn); } fn create_used_variable(&self) { diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 2e2abe9fb30..28e49e823c0 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend { fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str { llvm_util::target_cpu(sess) } + fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> { + llvm_util::tune_cpu(sess) + } } impl WriteBackendMethods for LlvmCodegenBackend { diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index f0b50459837..7fe30fc8ac0 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -351,11 +351,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) { } } -pub fn target_cpu(sess: &Session) -> &str { - let name = match sess.opts.cg.target_cpu { - Some(ref s) => &**s, - None => &*sess.target.target.options.cpu, - }; +fn handle_native(name: &str) -> &str { if name != "native" { return name; } @@ -366,3 +362,19 @@ pub fn target_cpu(sess: &Session) -> &str { str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap() } } + +pub fn target_cpu(sess: &Session) -> &str { + let name = match sess.opts.cg.target_cpu { + Some(ref s) => &**s, + None => &*sess.target.target.options.cpu, + }; + + handle_native(name) +} + +pub fn tune_cpu(sess: &Session) -> Option<&str> { + match sess.opts.debugging_opts.tune_cpu { + Some(ref s) => Some(handle_native(&**s)), + None => None, + } +} diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs index 3522ea01153..98cd5d82813 100644 --- a/compiler/rustc_codegen_ssa/src/traits/backend.rs +++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs @@ -116,4 +116,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se opt_level: config::OptLevel, ) -> Arc Result + Send + Sync>; fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str; + fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>; } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index dc4fa807f78..a0f2929c7e3 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -583,6 +583,7 @@ fn test_debugging_options_tracking_hash() { tracked!(symbol_mangling_version, SymbolManglingVersion::V0); tracked!(teach, true); tracked!(thinlto, Some(true)); + tracked!(tune_cpu, Some(String::from("abc"))); tracked!(tls_model, Some(TlsModel::GeneralDynamic)); tracked!(treat_err_as_bug, Some(1)); tracked!(unleash_the_miri_inside_of_you, true); diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 8cc55f4ebe8..611150f64f5 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1079,6 +1079,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options, "show extended diagnostic help (default: no)"), terminal_width: Option = (None, parse_opt_uint, [UNTRACKED], "set the current terminal width"), + tune_cpu: Option = (None, parse_opt_string, [TRACKED], + "select processor to schedule for (`rustc --print target-cpus` for details)"), thinlto: Option = (None, parse_opt_bool, [TRACKED], "enable ThinLTO when possible"), // We default to 1 here since we want to behave like diff --git a/src/doc/rustc/src/codegen-options/index.md b/src/doc/rustc/src/codegen-options/index.md index bed10ca16d3..f6493e49c3c 100644 --- a/src/doc/rustc/src/codegen-options/index.md +++ b/src/doc/rustc/src/codegen-options/index.md @@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values: This instructs `rustc` to generate code specifically for a particular processor. You can run `rustc --print target-cpus` to see the valid options to pass -here. Additionally, `native` can be passed to use the processor of the host -machine. Each target has a default base CPU. +here. Each target has a default base CPU. Special values include: + +* `native` can be passed to use the processor of the host machine. +* `generic` refers to an LLVM target with minimal features but modern tuning. ## target-feature @@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control Each target and [`target-cpu`](#target-cpu) has a default set of enabled features. +## tune-cpu + +This instructs `rustc` to schedule code specifically for a particular +processor. This does not affect the compatibility (instruction sets or ABI), +but should make your code slightly more efficient on the selected CPU. + +The valid options are the same as those for [`target-cpu`](#target-cpu). +The default is `None`, which LLVM translates as the `target-cpu`. + +This is an unstable option. Use `-Z tune-cpu=machine` to specify a value. + +Due to limitations in LLVM (12.0.0-git9218f92), this option is currently +effective only for x86 targets. + [option-emit]: ../command-line-arguments.md#option-emit [option-o-optimize]: ../command-line-arguments.md#option-o-optimize [profile-guided optimization]: ../profile-guided-optimization.md diff --git a/src/test/codegen/tune-cpu-on-functions.rs b/src/test/codegen/tune-cpu-on-functions.rs new file mode 100644 index 00000000000..9121799cdbf --- /dev/null +++ b/src/test/codegen/tune-cpu-on-functions.rs @@ -0,0 +1,21 @@ +// This test makes sure that functions get annotated with the proper +// "tune-cpu" attribute in LLVM. + +// no-prefer-dynamic +// ignore-tidy-linelength +// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic + +#![crate_type = "staticlib"] + +// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0 +#[no_mangle] +pub extern fn exported() { + not_exported(); +} + +// CHECK-LABEL: ; tune_cpu_on_functions::not_exported +// CHECK-NEXT: ; Function Attrs: +// CHECK-NEXT: define {{.*}}() {{.*}} #0 +fn not_exported() {} + +// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"