From 37494d39d38be33a589a1f46dae38fe2ceb9d94f Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Fri, 5 Jul 2013 19:01:57 -0400 Subject: [PATCH 1/2] Switch json parsing to read_chars for performance Avoids the overhead of read_char for every character. Benchmark reading example.json 10 times from https://code.google.com/p/rapidjson/wiki/Performance Before: 2.55s After: 0.16s Regression testing is already done by isrustfastyet. --- src/libextra/json.rs | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/libextra/json.rs b/src/libextra/json.rs index 71d99479693..5b9cc338b37 100644 --- a/src/libextra/json.rs +++ b/src/libextra/json.rs @@ -481,9 +481,13 @@ pub fn to_pretty_str(json: &Json) -> ~str { io::with_str_writer(|wr| to_pretty_writer(wr, json)) } +static BUF_SIZE : uint = 64000; + #[allow(missing_doc)] pub struct Parser { priv rdr: @io::Reader, + priv buf: ~[char], + priv buf_idx: uint, priv ch: char, priv line: uint, priv col: uint, @@ -491,12 +495,16 @@ pub struct Parser { /// Decode a json value from an io::reader pub fn Parser(rdr: @io::Reader) -> Parser { - Parser { + let mut p = Parser { rdr: rdr, - ch: rdr.read_char(), + buf: rdr.read_chars(BUF_SIZE), + buf_idx: 0, + ch: 0 as char, line: 1, - col: 1, - } + col: 0, + }; + p.bump(); + p } impl Parser { @@ -521,13 +529,26 @@ impl Parser { fn eof(&self) -> bool { self.ch == -1 as char } fn bump(&mut self) { - self.ch = self.rdr.read_char(); + if self.eof() { + return; + } + + self.col += 1u; + + if self.buf_idx >= self.buf.len() { + self.buf = self.rdr.read_chars(BUF_SIZE); + if self.buf.len() == 0 { + self.ch = -1 as char; + return; + } + self.buf_idx = 0; + } + self.ch = self.buf[self.buf_idx]; + self.buf_idx += 1; if self.ch == '\n' { self.line += 1u; self.col = 1u; - } else { - self.col += 1u; } } From 52949fbf1876ecd03303006c534a74c5e29bc90d Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Sat, 6 Jul 2013 01:54:29 -0400 Subject: [PATCH 2/2] Faster check for ascii-space Since ' ' is by far one of the most common characters, it is worthwhile to put it first, and short-circuit the rest of the function. On the same JSON benchmark, as the json_perf improvement, reading example.json 10 times from https://code.google.com/p/rapidjson/wiki/Performance. Before: 0.16s After: 0.11s --- src/libstd/char.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 6a9555f4efc..47473c2faba 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -82,7 +82,8 @@ pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) } /// #[inline] pub fn is_whitespace(c: char) -> bool { - ('\x09' <= c && c <= '\x0d') + c == ' ' + || ('\x09' <= c && c <= '\x0d') || general_category::Zs(c) || general_category::Zl(c) || general_category::Zp(c)