From 2c17bfc20cf2140adf2d7c3fff0f02db6799b93e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Fri, 18 Sep 2015 15:46:58 +0200 Subject: [PATCH] Skip no-op adjustments in trans That allows us to keep using trans_into() in case of adjustments that may actually be ignored in trans because they are a plain deref/ref pair with no overloaded deref or unsizing. Unoptimized(!) benchmarks from servo/servo#7638 Before ``` test goser::bench_clone ... bench: 17,701 ns/iter (+/- 58) = 30 MB/s test goser::bincode::bench_decoder ... bench: 33,715 ns/iter (+/- 300) = 11 MB/s test goser::bincode::bench_deserialize ... bench: 36,804 ns/iter (+/- 329) = 9 MB/s test goser::bincode::bench_encoder ... bench: 34,695 ns/iter (+/- 149) = 11 MB/s test goser::bincode::bench_populate ... bench: 18,879 ns/iter (+/- 88) test goser::bincode::bench_serialize ... bench: 31,668 ns/iter (+/- 156) = 11 MB/s test goser::capnp::bench_deserialize ... bench: 2,049 ns/iter (+/- 87) = 218 MB/s test goser::capnp::bench_deserialize_packed ... bench: 10,707 ns/iter (+/- 258) = 31 MB/s test goser::capnp::bench_populate ... bench: 635 ns/iter (+/- 5) test goser::capnp::bench_serialize ... bench: 35,657 ns/iter (+/- 155) = 12 MB/s test goser::capnp::bench_serialize_packed ... bench: 37,881 ns/iter (+/- 146) = 8 MB/s test goser::msgpack::bench_decoder ... bench: 50,634 ns/iter (+/- 307) = 5 MB/s test goser::msgpack::bench_encoder ... bench: 25,738 ns/iter (+/- 90) = 11 MB/s test goser::msgpack::bench_populate ... bench: 18,900 ns/iter (+/- 138) test goser::protobuf::bench_decoder ... bench: 2,791 ns/iter (+/- 29) = 102 MB/s test goser::protobuf::bench_encoder ... bench: 75,414 ns/iter (+/- 358) = 3 MB/s test goser::protobuf::bench_populate ... bench: 19,248 ns/iter (+/- 92) test goser::rustc_serialize_json::bench_decoder ... bench: 109,999 ns/iter (+/- 797) = 5 MB/s test goser::rustc_serialize_json::bench_encoder ... bench: 58,777 ns/iter (+/- 418) = 10 MB/s test goser::rustc_serialize_json::bench_populate ... bench: 18,887 ns/iter (+/- 76) test goser::serde_json::bench_deserializer ... bench: 104,803 ns/iter (+/- 770) = 5 MB/s test goser::serde_json::bench_populate ... bench: 18,890 ns/iter (+/- 69) test goser::serde_json::bench_serializer ... bench: 75,046 ns/iter (+/- 435) = 8 MB/s ``` After ``` test goser::bench_clone ... bench: 16,052 ns/iter (+/- 188) = 34 MB/s test goser::bincode::bench_decoder ... bench: 31,194 ns/iter (+/- 941) = 12 MB/s test goser::bincode::bench_deserialize ... bench: 33,934 ns/iter (+/- 352) = 10 MB/s test goser::bincode::bench_encoder ... bench: 30,737 ns/iter (+/- 1,969) = 13 MB/s test goser::bincode::bench_populate ... bench: 17,234 ns/iter (+/- 176) test goser::bincode::bench_serialize ... bench: 28,269 ns/iter (+/- 452) = 12 MB/s test goser::capnp::bench_deserialize ... bench: 2,019 ns/iter (+/- 85) = 221 MB/s test goser::capnp::bench_deserialize_packed ... bench: 10,662 ns/iter (+/- 527) = 31 MB/s test goser::capnp::bench_populate ... bench: 607 ns/iter (+/- 2) test goser::capnp::bench_serialize ... bench: 30,488 ns/iter (+/- 219) = 14 MB/s test goser::capnp::bench_serialize_packed ... bench: 33,731 ns/iter (+/- 201) = 9 MB/s test goser::msgpack::bench_decoder ... bench: 46,921 ns/iter (+/- 461) = 6 MB/s test goser::msgpack::bench_encoder ... bench: 22,315 ns/iter (+/- 96) = 12 MB/s test goser::msgpack::bench_populate ... bench: 17,268 ns/iter (+/- 73) test goser::protobuf::bench_decoder ... bench: 2,658 ns/iter (+/- 44) = 107 MB/s test goser::protobuf::bench_encoder ... bench: 71,024 ns/iter (+/- 359) = 4 MB/s test goser::protobuf::bench_populate ... bench: 17,704 ns/iter (+/- 104) test goser::rustc_serialize_json::bench_decoder ... bench: 107,867 ns/iter (+/- 759) = 5 MB/s test goser::rustc_serialize_json::bench_encoder ... bench: 52,327 ns/iter (+/- 479) = 11 MB/s test goser::rustc_serialize_json::bench_populate ... bench: 17,262 ns/iter (+/- 68) test goser::serde_json::bench_deserializer ... bench: 99,156 ns/iter (+/- 657) = 6 MB/s test goser::serde_json::bench_populate ... bench: 17,264 ns/iter (+/- 77) test goser::serde_json::bench_serializer ... bench: 66,135 ns/iter (+/- 392) = 9 MB/s ``` --- src/librustc_trans/trans/expr.rs | 33 +++++++++++++++++++++++++++++++- src/test/codegen/adjustments.rs | 9 +++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/librustc_trans/trans/expr.rs b/src/librustc_trans/trans/expr.rs index 5b0dae23807..370e5667c1f 100644 --- a/src/librustc_trans/trans/expr.rs +++ b/src/librustc_trans/trans/expr.rs @@ -119,7 +119,7 @@ pub fn trans_into<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, debuginfo::set_source_location(bcx.fcx, expr.id, expr.span); - if bcx.tcx().tables.borrow().adjustments.contains_key(&expr.id) { + if adjustment_required(bcx, expr) { // use trans, which may be less efficient but // which will perform the adjustments: let datum = unpack_datum!(bcx, trans(bcx, expr)); @@ -334,6 +334,37 @@ pub fn unsized_info<'ccx, 'tcx>(ccx: &CrateContext<'ccx, 'tcx>, } } +fn adjustment_required<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, + expr: &hir::Expr) -> bool { + let adjustment = match bcx.tcx().tables.borrow().adjustments.get(&expr.id).cloned() { + None => { return false; } + Some(adj) => adj + }; + + // Don't skip a conversion from Box to &T, etc. + if bcx.tcx().is_overloaded_autoderef(expr.id, 0) { + return true; + } + + match adjustment { + AdjustReifyFnPointer => { + // FIXME(#19925) once fn item types are + // zero-sized, we'll need to return true here + false + } + AdjustUnsafeFnPointer => { + // purely a type-level thing + false + } + AdjustDerefRef(ref adj) => { + // We are a bit paranoid about adjustments and thus might have a re- + // borrow here which merely derefs and then refs again (it might have + // a different region or mutability, but we don't care here). + !(adj.autoderefs == 1 && adj.autoref.is_some() && adj.unsize.is_none()) + } + } +} + /// Helper for trans that apply adjustments from `expr` to `datum`, which should be the unadjusted /// translation of `expr`. fn apply_adjustments<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, diff --git a/src/test/codegen/adjustments.rs b/src/test/codegen/adjustments.rs index b0438f561b9..d9a7acf1129 100644 --- a/src/test/codegen/adjustments.rs +++ b/src/test/codegen/adjustments.rs @@ -26,3 +26,12 @@ pub fn no_op_slice_adjustment(x: &[u8]) -> &[u8] { // CHECK: call void @llvm.memcpy.{{.*}}(i8* [[DST]], i8* [[SRC]], { x } } + +// CHECK-LABEL: @no_op_slice_adjustment2 +#[no_mangle] +pub fn no_op_slice_adjustment2(x: &[u8]) -> &[u8] { + // We used to generate an extra alloca and memcpy for the function's return value, so check + // that there's no memcpy (the slice is written to sret_slot element-wise) +// CHECK-NOT: call void @llvm.memcpy. + no_op_slice_adjustment(x) +}