auto merge of #16662 : pczarn/rust/format-fmtstr-opt, r=brson

Based on an observation that strings and arguments are always interleaved, thanks to #15832. Additionally optimize invocations where formatting parameters are unspecified for all arguments, e.g. `"{} {:?} {:x}"`, by emptying the `__STATIC_FMTARGS` array. Next, `Arguments::new` replaces an empty slice with `None` so that passing empty `__STATIC_FMTARGS` generates slightly less machine code when `Arguments::new` is inlined. Furthermore, formatting itself treats these cases separately without making redundant copies of formatting parameters.

All in all, this adds a single mov instruction per `write!` in most cases. That's why code size has increased.
This commit is contained in:
bors 2014-09-09 23:55:43 +00:00
commit a1f4973090
5 changed files with 232 additions and 66 deletions

View File

@ -113,6 +113,33 @@ impl<'a> Arguments<'a> {
/// Arguments structure. The compiler inserts an `unsafe` block to call this, /// Arguments structure. The compiler inserts an `unsafe` block to call this,
/// which is valid because the compiler performs all necessary validation to /// which is valid because the compiler performs all necessary validation to
/// ensure that the resulting call to format/write would be safe. /// ensure that the resulting call to format/write would be safe.
#[cfg(not(stage0))]
#[doc(hidden)] #[inline]
pub unsafe fn new<'a>(pieces: &'static [&'static str],
args: &'a [Argument<'a>]) -> Arguments<'a> {
Arguments {
pieces: mem::transmute(pieces),
fmt: None,
args: args
}
}
/// This function is used to specify nonstandard formatting parameters.
/// The `pieces` array must be at least as long as `fmt` to construct
/// a valid Arguments structure.
#[cfg(not(stage0))]
#[doc(hidden)] #[inline]
pub unsafe fn with_placeholders<'a>(pieces: &'static [&'static str],
fmt: &'static [rt::Argument<'static>],
args: &'a [Argument<'a>]) -> Arguments<'a> {
Arguments {
pieces: mem::transmute(pieces),
fmt: Some(mem::transmute(fmt)),
args: args
}
}
#[cfg(stage0)]
#[doc(hidden)] #[inline] #[doc(hidden)] #[inline]
pub unsafe fn new<'a>(fmt: &'static [rt::Piece<'static>], pub unsafe fn new<'a>(fmt: &'static [rt::Piece<'static>],
args: &'a [Argument<'a>]) -> Arguments<'a> { args: &'a [Argument<'a>]) -> Arguments<'a> {
@ -129,6 +156,20 @@ impl<'a> Arguments<'a> {
/// and pass it to a function or closure, passed as the first argument. The /// and pass it to a function or closure, passed as the first argument. The
/// macro validates the format string at compile-time so usage of the `write` /// macro validates the format string at compile-time so usage of the `write`
/// and `format` functions can be safely performed. /// and `format` functions can be safely performed.
#[cfg(not(stage0))]
pub struct Arguments<'a> {
// Format string pieces to print.
pieces: &'a [&'a str],
// Placeholder specs, or `None` if all specs are default (as in "{}{}").
fmt: Option<&'a [rt::Argument<'a>]>,
// Dynamic arguments for interpolation, to be interleaved with string
// pieces. (Every argument is preceded by a string piece.)
args: &'a [Argument<'a>],
}
#[cfg(stage0)] #[doc(hidden)]
pub struct Arguments<'a> { pub struct Arguments<'a> {
fmt: &'a [rt::Piece<'a>], fmt: &'a [rt::Piece<'a>],
args: &'a [Argument<'a>], args: &'a [Argument<'a>],
@ -255,6 +296,18 @@ uniform_fn_call_workaround! {
secret_upper_exp, UpperExp; secret_upper_exp, UpperExp;
} }
#[cfg(not(stage0))]
static DEFAULT_ARGUMENT: rt::Argument<'static> = rt::Argument {
position: rt::ArgumentNext,
format: rt::FormatSpec {
fill: ' ',
align: rt::AlignUnknown,
flags: 0,
precision: rt::CountImplied,
width: rt::CountImplied,
}
};
/// The `write` function takes an output stream, a precompiled format string, /// The `write` function takes an output stream, a precompiled format string,
/// and a list of arguments. The arguments will be formatted according to the /// and a list of arguments. The arguments will be formatted according to the
/// specified format string into the output stream provided. /// specified format string into the output stream provided.
@ -263,6 +316,51 @@ uniform_fn_call_workaround! {
/// ///
/// * output - the buffer to write output to /// * output - the buffer to write output to
/// * args - the precompiled arguments generated by `format_args!` /// * args - the precompiled arguments generated by `format_args!`
#[cfg(not(stage0))]
pub fn write(output: &mut FormatWriter, args: &Arguments) -> Result {
let mut formatter = Formatter {
flags: 0,
width: None,
precision: None,
buf: output,
align: rt::AlignUnknown,
fill: ' ',
args: args.args,
curarg: args.args.iter(),
};
let mut pieces = args.pieces.iter();
match args.fmt {
None => {
// We can use default formatting parameters for all arguments.
for _ in range(0, args.args.len()) {
try!(formatter.buf.write(pieces.next().unwrap().as_bytes()));
try!(formatter.run(&DEFAULT_ARGUMENT));
}
}
Some(fmt) => {
// Every spec has a corresponding argument that is preceded by
// a string piece.
for (arg, piece) in fmt.iter().zip(pieces.by_ref()) {
try!(formatter.buf.write(piece.as_bytes()));
try!(formatter.run(arg));
}
}
}
// There can be only one trailing string piece left.
match pieces.next() {
Some(piece) => {
try!(formatter.buf.write(piece.as_bytes()));
}
None => {}
}
Ok(())
}
#[cfg(stage0)] #[doc(hidden)]
pub fn write(output: &mut FormatWriter, args: &Arguments) -> Result { pub fn write(output: &mut FormatWriter, args: &Arguments) -> Result {
let mut formatter = Formatter { let mut formatter = Formatter {
flags: 0, flags: 0,
@ -285,7 +383,26 @@ impl<'a> Formatter<'a> {
// First up is the collection of functions used to execute a format string // First up is the collection of functions used to execute a format string
// at runtime. This consumes all of the compile-time statics generated by // at runtime. This consumes all of the compile-time statics generated by
// the format! syntax extension. // the format! syntax extension.
#[cfg(not(stage0))]
fn run(&mut self, arg: &rt::Argument) -> Result {
// Fill in the format parameters into the formatter
self.fill = arg.format.fill;
self.align = arg.format.align;
self.flags = arg.format.flags;
self.width = self.getcount(&arg.format.width);
self.precision = self.getcount(&arg.format.precision);
// Extract the correct argument
let value = match arg.position {
rt::ArgumentNext => { *self.curarg.next().unwrap() }
rt::ArgumentIs(i) => self.args[i],
};
// Then actually do some printing
(value.formatter)(value.value, self)
}
#[cfg(stage0)] #[doc(hidden)]
fn run(&mut self, piece: &rt::Piece) -> Result { fn run(&mut self, piece: &rt::Piece) -> Result {
match *piece { match *piece {
rt::String(s) => self.buf.write(s.as_bytes()), rt::String(s) => self.buf.write(s.as_bytes()),

View File

@ -14,7 +14,7 @@
//! These definitions are similar to their `ct` equivalents, but differ in that //! These definitions are similar to their `ct` equivalents, but differ in that
//! these can be statically allocated and are slightly optimized for the runtime //! these can be statically allocated and are slightly optimized for the runtime
#[cfg(stage0)]
#[doc(hidden)] #[doc(hidden)]
pub enum Piece<'a> { pub enum Piece<'a> {
String(&'a str), String(&'a str),

View File

@ -9,3 +9,10 @@
// except according to those terms. // except according to those terms.
mod num; mod num;
#[test]
fn test_format_flags() {
// No residual flags left by pointer formatting
let p = "".as_ptr();
assert_eq!(format!("{:p} {:x}", p, 16u), format!("{:p} 10", p));
}

View File

@ -49,11 +49,16 @@ struct Context<'a, 'b:'a> {
name_types: HashMap<String, ArgumentType>, name_types: HashMap<String, ArgumentType>,
name_ordering: Vec<String>, name_ordering: Vec<String>,
/// The latest consecutive literal strings /// The latest consecutive literal strings, or empty if there weren't any.
literal: Option<String>, literal: String,
/// Collection of the compiled `rt::Piece` structures /// Collection of the compiled `rt::Argument` structures
pieces: Vec<Gc<ast::Expr>>, pieces: Vec<Gc<ast::Expr>>,
/// Collection of string literals
str_pieces: Vec<Gc<ast::Expr>>,
/// Stays `true` if all formatting parameters are default (as in "{}{}").
all_pieces_simple: bool,
name_positions: HashMap<String, uint>, name_positions: HashMap<String, uint>,
method_statics: Vec<Gc<ast::Item>>, method_statics: Vec<Gc<ast::Item>>,
@ -370,28 +375,21 @@ impl<'a, 'b> Context<'a, 'b> {
} }
} }
/// Translate the accumulated string literals to a static `rt::Piece` /// Translate the accumulated string literals to a literal expression
fn trans_literal_string(&mut self) -> Option<Gc<ast::Expr>> { fn trans_literal_string(&mut self) -> Gc<ast::Expr> {
let sp = self.fmtsp; let sp = self.fmtsp;
self.literal.take().map(|s| { let s = token::intern_and_get_ident(self.literal.as_slice());
let s = token::intern_and_get_ident(s.as_slice()); self.literal.clear();
self.ecx.expr_call_global(sp, self.ecx.expr_str(sp, s)
self.rtpath("String"),
vec!(
self.ecx.expr_str(sp, s)
))
})
} }
/// Translate a `parse::Piece` to a static `rt::Piece` /// Translate a `parse::Piece` to a static `rt::Argument` or append
/// to the `literal` string.
fn trans_piece(&mut self, piece: &parse::Piece) -> Option<Gc<ast::Expr>> { fn trans_piece(&mut self, piece: &parse::Piece) -> Option<Gc<ast::Expr>> {
let sp = self.fmtsp; let sp = self.fmtsp;
match *piece { match *piece {
parse::String(s) => { parse::String(s) => {
match self.literal { self.literal.push_str(s);
Some(ref mut sb) => sb.push_str(s),
ref mut empty => *empty = Some(String::from_str(s)),
}
None None
} }
parse::Argument(ref arg) => { parse::Argument(ref arg) => {
@ -420,8 +418,25 @@ impl<'a, 'b> Context<'a, 'b> {
} }
}; };
// Translate the format let simple_arg = parse::Argument {
position: parse::ArgumentNext,
format: parse::FormatSpec {
fill: arg.format.fill,
align: parse::AlignUnknown,
flags: 0,
precision: parse::CountImplied,
width: parse::CountImplied,
ty: arg.format.ty
}
};
let fill = match arg.format.fill { Some(c) => c, None => ' ' }; let fill = match arg.format.fill { Some(c) => c, None => ' ' };
if *arg != simple_arg || fill != ' ' {
self.all_pieces_simple = false;
}
// Translate the format
let fill = self.ecx.expr_lit(sp, ast::LitChar(fill)); let fill = self.ecx.expr_lit(sp, ast::LitChar(fill));
let align = match arg.format.align { let align = match arg.format.align {
parse::AlignLeft => { parse::AlignLeft => {
@ -450,14 +465,33 @@ impl<'a, 'b> Context<'a, 'b> {
self.ecx.field_imm(sp, self.ecx.ident_of("width"), width))); self.ecx.field_imm(sp, self.ecx.ident_of("width"), width)));
let path = self.ecx.path_global(sp, self.rtpath("Argument")); let path = self.ecx.path_global(sp, self.rtpath("Argument"));
let s = self.ecx.expr_struct(sp, path, vec!( Some(self.ecx.expr_struct(sp, path, vec!(
self.ecx.field_imm(sp, self.ecx.ident_of("position"), pos), self.ecx.field_imm(sp, self.ecx.ident_of("position"), pos),
self.ecx.field_imm(sp, self.ecx.ident_of("format"), fmt))); self.ecx.field_imm(sp, self.ecx.ident_of("format"), fmt))))
Some(self.ecx.expr_call_global(sp, self.rtpath("Argument"), vec!(s)))
} }
} }
} }
fn item_static_array(&self,
name: ast::Ident,
piece_ty: Gc<ast::Ty>,
pieces: Vec<Gc<ast::Expr>>)
-> ast::Stmt
{
let pieces_len = self.ecx.expr_uint(self.fmtsp, pieces.len());
let fmt = self.ecx.expr_vec(self.fmtsp, pieces);
let ty = ast::TyFixedLengthVec(
piece_ty,
pieces_len
);
let ty = self.ecx.ty(self.fmtsp, ty);
let st = ast::ItemStatic(ty, ast::MutImmutable, fmt);
let item = self.ecx.item(self.fmtsp, name,
self.static_attrs(), st);
let decl = respan(self.fmtsp, ast::DeclItem(item));
respan(self.fmtsp, ast::StmtDecl(box(GC) decl, ast::DUMMY_NODE_ID))
}
/// Actually builds the expression which the iformat! block will be expanded /// Actually builds the expression which the iformat! block will be expanded
/// to /// to
fn to_expr(&self, invocation: Invocation) -> Gc<ast::Expr> { fn to_expr(&self, invocation: Invocation) -> Gc<ast::Expr> {
@ -476,30 +510,31 @@ impl<'a, 'b> Context<'a, 'b> {
// Next, build up the static array which will become our precompiled // Next, build up the static array which will become our precompiled
// format "string" // format "string"
let fmt = self.ecx.expr_vec(self.fmtsp, self.pieces.clone()); let static_str_name = self.ecx.ident_of("__STATIC_FMTSTR");
let piece_ty = self.ecx.ty_path(self.ecx.path_all( let static_lifetime = self.ecx.lifetime(self.fmtsp, self.ecx.ident_of("'static").name);
let piece_ty = self.ecx.ty_rptr(
self.fmtsp, self.fmtsp,
true, vec!( self.ecx.ty_ident(self.fmtsp, self.ecx.ident_of("str")),
self.ecx.ident_of("std"), Some(static_lifetime),
self.ecx.ident_of("fmt"), ast::MutImmutable);
self.ecx.ident_of("rt"), lets.push(box(GC) self.item_static_array(static_str_name,
self.ecx.ident_of("Piece")), piece_ty,
vec!(self.ecx.lifetime(self.fmtsp, self.str_pieces.clone()));
self.ecx.ident_of("'static").name)),
Vec::new() // Then, build up the static array which will store our precompiled
), None); // nonstandard placeholders, if there are any.
let ty = ast::TyFixedLengthVec( let static_args_name = self.ecx.ident_of("__STATIC_FMTARGS");
piece_ty, if !self.all_pieces_simple {
self.ecx.expr_uint(self.fmtsp, self.pieces.len()) let piece_ty = self.ecx.ty_path(self.ecx.path_all(
); self.fmtsp,
let ty = self.ecx.ty(self.fmtsp, ty); true, self.rtpath("Argument"),
let st = ast::ItemStatic(ty, ast::MutImmutable, fmt); vec![static_lifetime],
let static_name = self.ecx.ident_of("__STATIC_FMTSTR"); vec![]
let item = self.ecx.item(self.fmtsp, static_name, ), None);
self.static_attrs(), st); lets.push(box(GC) self.item_static_array(static_args_name,
let decl = respan(self.fmtsp, ast::DeclItem(item)); piece_ty,
lets.push(box(GC) respan(self.fmtsp, self.pieces.clone()));
ast::StmtDecl(box(GC) decl, ast::DUMMY_NODE_ID))); }
// Right now there is a bug such that for the expression: // Right now there is a bug such that for the expression:
// foo(bar(&1)) // foo(bar(&1))
@ -545,13 +580,21 @@ impl<'a, 'b> Context<'a, 'b> {
} }
// Now create the fmt::Arguments struct with all our locals we created. // Now create the fmt::Arguments struct with all our locals we created.
let fmt = self.ecx.expr_ident(self.fmtsp, static_name); let pieces = self.ecx.expr_ident(self.fmtsp, static_str_name);
let args_slice = self.ecx.expr_ident(self.fmtsp, slicename); let args_slice = self.ecx.expr_ident(self.fmtsp, slicename);
let (fn_name, fn_args) = if self.all_pieces_simple {
("new", vec![pieces, args_slice])
} else {
let fmt = self.ecx.expr_ident(self.fmtsp, static_args_name);
("with_placeholders", vec![pieces, fmt, args_slice])
};
let result = self.ecx.expr_call_global(self.fmtsp, vec!( let result = self.ecx.expr_call_global(self.fmtsp, vec!(
self.ecx.ident_of("std"), self.ecx.ident_of("std"),
self.ecx.ident_of("fmt"), self.ecx.ident_of("fmt"),
self.ecx.ident_of("Arguments"), self.ecx.ident_of("Arguments"),
self.ecx.ident_of("new")), vec!(fmt, args_slice)); self.ecx.ident_of(fn_name)), fn_args);
// We did all the work of making sure that the arguments // We did all the work of making sure that the arguments
// structure is safe, so we can safely have an unsafe block. // structure is safe, so we can safely have an unsafe block.
@ -718,8 +761,10 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span,
name_ordering: name_ordering, name_ordering: name_ordering,
nest_level: 0, nest_level: 0,
next_arg: 0, next_arg: 0,
literal: None, literal: String::new(),
pieces: Vec::new(), pieces: Vec::new(),
str_pieces: Vec::new(),
all_pieces_simple: true,
method_statics: Vec::new(), method_statics: Vec::new(),
fmtsp: sp, fmtsp: sp,
}; };
@ -739,8 +784,8 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span,
cx.verify_piece(&piece); cx.verify_piece(&piece);
match cx.trans_piece(&piece) { match cx.trans_piece(&piece) {
Some(piece) => { Some(piece) => {
cx.trans_literal_string().map(|piece| let s = cx.trans_literal_string();
cx.pieces.push(piece)); cx.str_pieces.push(s);
cx.pieces.push(piece); cx.pieces.push(piece);
} }
None => {} None => {}
@ -758,7 +803,10 @@ pub fn expand_preparsed_format_args(ecx: &mut ExtCtxt, sp: Span,
} }
None => {} None => {}
} }
cx.trans_literal_string().map(|piece| cx.pieces.push(piece)); if !cx.literal.is_empty() {
let s = cx.trans_literal_string();
cx.str_pieces.push(s);
}
// Make sure that all arguments were used and all arguments have types. // Make sure that all arguments were used and all arguments have types.
for (i, ty) in cx.arg_types.iter().enumerate() { for (i, ty) in cx.arg_types.iter().enumerate() {

View File

@ -39,26 +39,20 @@ pub fn bar() {
() => { () => {
#[inline] #[inline]
#[allow(dead_code)] #[allow(dead_code)]
static __STATIC_FMTSTR: static __STATIC_FMTSTR: [&'static str, ..(1u as uint)] =
[::std::fmt::rt::Piece<'static>, ..(1u as uint)] = ([("test" as &'static str)] as [&'static str, .. 1]);
([((::std::fmt::rt::String as
fn(&'static str) -> core::fmt::rt::Piece<'static>)(("test"
as
&'static str))
as core::fmt::rt::Piece<'static>)] as
[core::fmt::rt::Piece<'static>, .. 1]);
let __args_vec = let __args_vec =
(&([] as [core::fmt::Argument<'_>, .. 0]) as (&([] as [core::fmt::Argument<'_>, .. 0]) as
&[core::fmt::Argument<'_>, .. 0]); &[core::fmt::Argument<'_>, .. 0]);
let __args = let __args =
(unsafe { (unsafe {
((::std::fmt::Arguments::new as ((::std::fmt::Arguments::new as
unsafe fn(&'static [core::fmt::rt::Piece<'static>], &'a [core::fmt::Argument<'a>]) -> core::fmt::Arguments<'a>)((__STATIC_FMTSTR unsafe fn(&'static [&'static str], &'a [core::fmt::Argument<'a>]) -> core::fmt::Arguments<'a>)((__STATIC_FMTSTR
as as
[core::fmt::rt::Piece<'static>, .. 1]), [&'static str, .. 1]),
(__args_vec (__args_vec
as as
&[core::fmt::Argument<'_>, .. 0])) &[core::fmt::Argument<'_>, .. 0]))
as core::fmt::Arguments<'_>) as core::fmt::Arguments<'_>)
} as core::fmt::Arguments<'_>); } as core::fmt::Arguments<'_>);