Preserve comments when pretty-printing.

The patch also includes a number of smaller fixes to the
pretty-printer that were encountered on the way.
This commit is contained in:
Marijn Haverbeke 2011-03-24 16:33:20 +01:00 committed by Graydon Hoare
parent ffc188a4dd
commit e7e6f396d8
5 changed files with 1124 additions and 852 deletions

View File

@ -77,7 +77,7 @@ impure fn pretty_print_input(session.session sess,
auto def = tup(0, 0);
auto p = front.parser.new_parser(sess, env, def, input);
auto crate = front.parser.parse_crate_from_source_file(p);
pretty.pprust.print_ast(crate.node.module, std.io.stdout());
pretty.pprust.print_file(crate.node.module, input, std.io.stdout());
}
fn warn_wrong_compiler() {

View File

@ -1,5 +1,6 @@
import std.io;
import std._str;
import std._vec;
import std._int;
import std.map;
import std.map.hashmap;
@ -781,6 +782,85 @@ impure fn next_token(reader rdr) -> token.token {
fail;
}
tag cmnt_ {
cmnt_line(str);
cmnt_block(vec[str]);
}
type cmnt = rec(cmnt_ val, common.pos pos, bool space_after);
impure fn consume_whitespace(reader rdr) -> uint {
auto lines = 0u;
while (is_whitespace(rdr.curr())) {
if (rdr.curr() == '\n') {lines += 1u;}
rdr.bump();
}
ret lines;
}
impure fn read_line_comment(reader rdr) -> cmnt {
auto p = rdr.get_curr_pos();
rdr.bump(); rdr.bump();
consume_whitespace(rdr);
auto val = "";
while (rdr.curr() != '\n') {
_str.push_char(val, rdr.curr());
rdr.bump();
}
ret rec(val=cmnt_line(val),
pos=p,
space_after=consume_whitespace(rdr) > 1u);
}
impure fn read_block_comment(reader rdr) -> cmnt {
auto p = rdr.get_curr_pos();
rdr.bump(); rdr.bump();
consume_whitespace(rdr);
let vec[str] lines = vec();
auto val = "";
auto level = 1;
while (true) {
if (rdr.curr() == '\n') {
_vec.push[str](lines, val);
val = "";
consume_whitespace(rdr);
} else {
if (rdr.curr() == '*' && rdr.next() == '/') {
level -= 1;
if (level == 0) {
rdr.bump(); rdr.bump();
_vec.push[str](lines, val);
break;
}
} else if (rdr.curr() == '/' && rdr.next() == '*') {
level += 1;
}
_str.push_char(val, rdr.curr());
rdr.bump();
}
}
ret rec(val=cmnt_block(lines),
pos=p,
space_after=consume_whitespace(rdr) > 1u);
}
impure fn gather_comments(str path) -> vec[cmnt] {
auto srdr = io.file_reader(path);
auto rdr = lexer.new_reader(srdr, path);
let vec[cmnt] comments = vec();
while (!rdr.is_eof()) {
while (true) {
consume_whitespace(rdr);
if (rdr.curr() == '/' && rdr.next() == '/') {
_vec.push[cmnt](comments, read_line_comment(rdr));
} else if (rdr.curr() == '/' && rdr.next() == '*') {
_vec.push[cmnt](comments, read_block_comment(rdr));
} else { break; }
}
next_token(rdr);
}
ret comments;
}
//
// Local Variables:

View File

@ -116,6 +116,8 @@ impure fn new_parser(session.session sess,
}
auto srdr = io.file_reader(path);
auto rdr = lexer.new_reader(srdr, path);
// Make sure npos points at first actual token.
lexer.consume_any_whitespace(rdr);
auto npos = rdr.get_curr_pos();
ret stdio_parser(sess, env, ftype, lexer.next_token(rdr),
npos, npos, initial_def._1, UNRESTRICTED, initial_def._0,
@ -1748,8 +1750,8 @@ impure fn parse_block(parser p) -> ast.block {
}
}
p.bump();
auto hi = p.get_span();
p.bump();
auto bloc = index_block(stmts, expr);
ret spanned[ast.block_](lo, hi, bloc);

View File

@ -4,9 +4,11 @@ import std._str;
tag boxtype {box_h; box_v; box_hv; box_align;}
tag contexttype {cx_h; cx_v;}
tag scantype {scan_hv; scan_h; scan_none;}
tag token {
brk(uint);
hardbrk;
word(str);
cword(str); // closing token
open(boxtype, uint);
@ -18,29 +20,45 @@ type context = rec(contexttype tp, uint indent);
type ps = @rec(mutable vec[context] context,
uint width,
io.writer out,
mutable vec[token] buffered,
mutable uint scandepth,
mutable uint bufferedcol,
mutable uint col,
mutable bool start_of_line);
mutable uint spaces,
mutable vec[token] buffered,
mutable scantype scanning,
mutable vec[boxtype] scandepth,
mutable uint scancol,
mutable bool start_of_line,
mutable bool start_of_box,
mutable bool potential_brk);
fn mkstate(io.writer out, uint width) -> ps {
let vec[context] stack = vec(rec(tp=cx_v, indent=0u));
let vec[token] buff = vec();
let vec[boxtype] sd = vec();
ret @rec(mutable context=stack,
width=width,
out=out,
mutable buffered=buff,
mutable scandepth=0u,
mutable bufferedcol=0u,
mutable col=0u,
mutable start_of_line=true);
mutable spaces=0u,
mutable buffered=buff,
mutable scanning=scan_none,
mutable scandepth=sd,
mutable scancol=0u,
mutable start_of_line=true,
mutable start_of_box=true,
mutable potential_brk=false);
}
impure fn write_spaces(ps p, uint i) {
while (i > 0u) {
i -= 1u;
p.out.write_str(" ");
}
}
impure fn push_context(ps p, contexttype tp, uint indent) {
before_print(p, false);
_vec.push[context](p.context, rec(tp=tp, indent=base_indent(p)
+ indent));
_vec.push[context](p.context, rec(tp=tp, indent=indent));
p.start_of_box = true;
}
fn pop_context(ps p) {
@ -49,15 +67,13 @@ fn pop_context(ps p) {
impure fn add_token(ps p, token tok) {
if (p.width == 0u) {direct_token(p, tok);}
else if (p.scandepth == 0u) {do_token(p, tok);}
else if (p.scanning == scan_none) {do_token(p, tok);}
else {buffer_token(p, tok);}
}
impure fn direct_token(ps p, token tok) {
alt (tok) {
case (brk(?sz)) {
while (sz > 0u) {p.out.write_str(" "); sz -= 1u;}
}
case (brk(?sz)) {write_spaces(p, sz);}
case (word(?w)) {p.out.write_str(w);}
case (cword(?w)) {p.out.write_str(w);}
case (_) {}
@ -66,78 +82,75 @@ impure fn direct_token(ps p, token tok) {
impure fn buffer_token(ps p, token tok) {
p.buffered += vec(tok);
p.bufferedcol += token_size(tok);
alt (p.buffered.(0)) {
case (brk(_)) {
alt (tok) {
case (brk(_)) {
if (p.scandepth == 1u) {finish_break_scan(p);}
}
case (open(box_h,_)) {p.scandepth += 1u;}
case (open(_,_)) {finish_break_scan(p);}
case (close) {
p.scandepth -= 1u;
if (p.scandepth == 0u) {finish_break_scan(p);}
}
case (_) {}
}
}
case (open(_,_)) {
if (p.bufferedcol > p.width) {finish_block_scan(p, cx_v);}
else {
alt (tok) {
case (open(_,_)) {p.scandepth += 1u;}
case (close) {
p.scandepth -= 1u;
if (p.scandepth == 0u) {finish_block_scan(p, cx_h);}
auto col = p.scancol;
p.scancol = col + token_size(tok);
if (p.scancol > p.width) {
finish_scan(p, false);
} else {
alt (tok) {
case (open(?tp,_)) {
_vec.push[boxtype](p.scandepth, tp);
if (p.scanning == scan_h) {
if (tp == box_h) {
check_potential_brk(p);
}
case (_) {}
}
}
case (close) {
_vec.pop[boxtype](p.scandepth);
if (_vec.len[boxtype](p.scandepth) == 0u) {
finish_scan(p, true);
}
}
case (brk(_)) {
if (p.scanning == scan_h) {
if (p.scandepth.(_vec.len[boxtype](p.scandepth)-1u) == box_v) {
finish_scan(p, true);
}
}
}
case (_) {}
}
}
}
impure fn finish_block_scan(ps p, contexttype tp) {
impure fn check_potential_brk(ps p) {
for (boxtype tp in p.scandepth) {
if (tp != box_h) {ret;}
}
p.potential_brk = true;
}
impure fn finish_scan(ps p, bool fits) {
auto buf = p.buffered;
auto front = _vec.shift[token](buf);
auto indent;
alt (front){
case (open(box_hv,?ind)) {
indent = ind;
auto chosen_tp = cx_h;
if (!fits) {chosen_tp = cx_v;}
alt (front) {
case (open(box_hv, ?ind)) {
push_context(p, chosen_tp, base_indent(p) + ind);
}
case (open(box_align, _)) {
indent = p.col - base_indent(p);
push_context(p, chosen_tp, p.col);
}
case (open(box_h, ?ind)) {
if (!fits && !p.start_of_box && !p.start_of_line && !p.potential_brk) {
line_break(p);
}
push_context(p, cx_h, base_indent(p) + ind);
}
}
p.scandepth = 0u;
p.buffered = vec();
push_context(p, tp, indent);
p.scandepth = vec();
p.scanning = scan_none;
for (token t in buf) { add_token(p, t); }
}
impure fn finish_break_scan(ps p) {
auto buf = p.buffered;
auto front = _vec.shift[token](buf);
if (p.bufferedcol > p.width) {
line_break(p);
}
else {
auto width;
alt (front) {case(brk(?w)) {width = w;}}
auto i = 0u;
while (i < width) {p.out.write_str(" "); i+=1u;}
p.col += width;
}
p.scandepth = 0u;
impure fn start_scan(ps p, token tok, scantype tp) {
p.buffered = vec();
for (token t in buf) { add_token(p, t); }
}
impure fn start_scan(ps p, token tok) {
p.buffered = vec(tok);
p.scandepth = 1u;
p.bufferedcol = p.col;
p.scancol = p.col;
p.scanning = tp;
buffer_token(p, tok);
p.potential_brk = false;
}
fn cur_context(ps p) -> context {
@ -152,64 +165,91 @@ fn base_indent(ps p) -> uint {
}
}
fn cx_is(contexttype a, contexttype b) -> bool {
if (a == b) {ret true;}
else {ret false;}
}
fn box_is(boxtype a, boxtype b) -> bool {
if (a == b) {ret true;}
else {ret false;}
}
impure fn do_token(ps p, token tok) {
auto start_of_box = p.start_of_box;
p.start_of_box = false;
alt (tok) {
case (brk(?sz)) {
alt (cur_context(p).tp) {
case (cx_h) {
before_print(p, false);
start_scan(p, tok);
}
case (cx_v) {
line_break(p);
}
if (cx_is(cur_context(p).tp, cx_v) || sz + p.col > p.width) {
line_break(p);
}
else {
p.spaces += sz;
}
}
case (hardbrk) {
line_break(p);
}
case (word(?w)) {
auto len = _str.char_len(w);
if (len + p.col + p.spaces > p.width && !start_of_box &&
!p.start_of_line) {
line_break(p);
}
before_print(p, false);
p.out.write_str(w);
p.col += _str.byte_len(w); // TODO char_len
p.col += len;
}
case (cword(?w)) {
before_print(p, true);
p.out.write_str(w);
p.col += _str.byte_len(w); // TODO char_len
p.col += _str.char_len(w);
}
case (open(?tp, ?indent)) {
alt (tp) {
case (box_hv) {start_scan(p, tok);}
case (box_align) {start_scan(p, tok);}
case (box_h) {push_context(p, cx_h, indent);}
case (box_v) {push_context(p, cx_v, indent);}
if (tp == box_v) {
push_context(p, cx_v, base_indent(p) + indent);
} else if (box_is(tp, box_h) && cx_is(cur_context(p).tp, cx_v)) {
push_context(p, cx_h, base_indent(p) + indent);
} else if (tp == box_h) {
p.start_of_box = start_of_box;
start_scan(p, tok, scan_h);
} else {
p.start_of_box = start_of_box;
start_scan(p, tok, scan_hv);
}
}
case (close) {pop_context(p);}
case (close) {
pop_context(p);
}
}
}
impure fn line_break(ps p) {
p.out.write_str("\n");
p.col = 0u;
p.spaces = cur_context(p).indent;
p.start_of_line = true;
}
impure fn before_print(ps p, bool closing) {
if (p.start_of_line) {
p.start_of_line = false;
auto ind;
if (closing) {ind = base_indent(p);}
else {ind = cur_context(p).indent;}
p.col = ind;
while (ind > 0u) {p.out.write_str(" "); ind -= 1u;}
if (closing) {p.spaces = base_indent(p);}
else {p.spaces = cur_context(p).indent;}
}
if (p.spaces > 0u) {
write_spaces(p, p.spaces);
p.col += p.spaces;
p.spaces = 0u;
}
}
fn token_size(token tok) -> uint {
alt (tok) {
case (brk(?sz)) {ret sz;}
case (word(?w)) {ret _str.byte_len(w);}
case (cword(?w)) {ret _str.byte_len(w);}
case (open(_, _)) {ret 0u;} // TODO exception for V blocks?
case (hardbrk) {ret 0xFFFFFFu;}
case (word(?w)) {ret _str.char_len(w);}
case (cword(?w)) {ret _str.char_len(w);}
case (open(_, _)) {ret 0u;}
case (close) {ret 0u;}
}
}
@ -224,3 +264,4 @@ impure fn cwrd(ps p, str wrd) {add_token(p, cword(wrd));}
impure fn space(ps p) {add_token(p, brk(1u));}
impure fn spaces(ps p, uint n) {add_token(p, brk(n));}
impure fn line(ps p) {add_token(p, brk(0u));}
impure fn hardbreak(ps p) {add_token(p, hardbrk);}

File diff suppressed because it is too large Load Diff