Lexer: Fix offset handling in get_str_from()
As the comment said, the subtraction is bogus for multibyte characters. Fortunately, we can just use last_pos instead of pos to get the correct position without any subtraction hackery.
This commit is contained in:
parent
b8cf2f8056
commit
43cae88079
|
@ -347,7 +347,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let bstart = rdr.pos;
|
let bstart = rdr.last_pos;
|
||||||
rdr.next_token();
|
rdr.next_token();
|
||||||
//discard, and look ahead; we're working with internal state
|
//discard, and look ahead; we're working with internal state
|
||||||
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
|
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
|
||||||
|
|
|
@ -161,22 +161,20 @@ fn string_advance_token(r: @mut StringReader) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn byte_offset(rdr: &StringReader) -> BytePos {
|
fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
|
||||||
(rdr.pos - rdr.filemap.start_pos)
|
(pos - rdr.filemap.start_pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
|
pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
|
||||||
// I'm pretty skeptical about this subtraction. What if there's a
|
return str::slice(*rdr.src, start.to_uint(),
|
||||||
// multi-byte character before the mark?
|
byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
|
||||||
return str::slice(*rdr.src, start.to_uint() - 1u,
|
|
||||||
byte_offset(rdr).to_uint() - 1u).to_owned();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// EFFECT: advance the StringReader by one character. If a newline is
|
// EFFECT: advance the StringReader by one character. If a newline is
|
||||||
// discovered, add it to the FileMap's list of line start offsets.
|
// discovered, add it to the FileMap's list of line start offsets.
|
||||||
pub fn bump(rdr: &mut StringReader) {
|
pub fn bump(rdr: &mut StringReader) {
|
||||||
rdr.last_pos = rdr.pos;
|
rdr.last_pos = rdr.pos;
|
||||||
let current_byte_offset = byte_offset(rdr).to_uint();;
|
let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
|
||||||
if current_byte_offset < (*rdr.src).len() {
|
if current_byte_offset < (*rdr.src).len() {
|
||||||
assert!(rdr.curr != -1 as char);
|
assert!(rdr.curr != -1 as char);
|
||||||
let last_char = rdr.curr;
|
let last_char = rdr.curr;
|
||||||
|
@ -202,7 +200,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
|
||||||
rdr.curr == -1 as char
|
rdr.curr == -1 as char
|
||||||
}
|
}
|
||||||
pub fn nextch(rdr: @mut StringReader) -> char {
|
pub fn nextch(rdr: @mut StringReader) -> char {
|
||||||
let offset = byte_offset(rdr).to_uint();
|
let offset = byte_offset(rdr, rdr.pos).to_uint();
|
||||||
if offset < (*rdr.src).len() {
|
if offset < (*rdr.src).len() {
|
||||||
return str::char_at(*rdr.src, offset);
|
return str::char_at(*rdr.src, offset);
|
||||||
} else { return -1 as char; }
|
} else { return -1 as char; }
|
||||||
|
@ -692,7 +690,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
|
||||||
return token::LIT_INT(c2 as i64, ast::ty_char);
|
return token::LIT_INT(c2 as i64, ast::ty_char);
|
||||||
}
|
}
|
||||||
'"' => {
|
'"' => {
|
||||||
let n = byte_offset(rdr);
|
let n = byte_offset(rdr, rdr.last_pos);
|
||||||
bump(rdr);
|
bump(rdr);
|
||||||
while rdr.curr != '"' {
|
while rdr.curr != '"' {
|
||||||
if is_eof(rdr) {
|
if is_eof(rdr) {
|
||||||
|
|
Loading…
Reference in New Issue