diff --git a/src/libsyntax/fold.rs b/src/libsyntax/fold.rs index d419ce6f188..7562930b655 100644 --- a/src/libsyntax/fold.rs +++ b/src/libsyntax/fold.rs @@ -13,6 +13,7 @@ use core::prelude::*; use ast::*; use ast; use codemap::{span, spanned}; +use parse::token; use opt_vec::OptVec; use core::vec; @@ -904,3 +905,74 @@ impl AstFoldExtensions for @ast_fold { pub fn make_fold(afp: ast_fold_fns) -> @ast_fold { afp as @ast_fold } + +#[cfg(test)] +mod test { + use ast; + use util::parser_testing::{string_to_crate, matches_codepattern}; + use parse::token; + use print::pprust; + use super::*; + + // taken from expand + // given a function from idents to idents, produce + // an ast_fold that applies that function: + pub fn fun_to_ident_folder(f: @fn(ast::ident)->ast::ident) -> @ast_fold{ + let afp = default_ast_fold(); + let f_pre = @AstFoldFns{ + fold_ident : |id, _| f(id), + .. *afp + }; + make_fold(f_pre) + } + + // this version doesn't care about getting comments or docstrings in. + fn fake_print_crate(s: @pprust::ps, crate: ast::crate) { + pprust::print_mod(s, &crate.node.module, crate.node.attrs); + } + + // change every identifier to "zz" + pub fn to_zz() -> @fn(ast::ident)->ast::ident { + let zz_id = token::str_to_ident("zz"); + |id| {zz_id} + } + + // maybe add to expand.rs... + macro_rules! assert_pred ( + ($pred:expr, $predname:expr, $a:expr , $b:expr) => ( + { + let pred_val = $pred; + let a_val = $a; + let b_val = $b; + if !(pred_val(a_val,b_val)) { + fail!("expected args satisfying %s, got %? and %?", + $predname, a_val, b_val); + } + } + ) + ) + + // make sure idents get transformed everywhere + #[test] fn ident_transformation () { + let zz_fold = fun_to_ident_folder(to_zz()); + let ast = string_to_crate(@~"#[a] mod b {fn c (d : e, f : g) {h!(i,j,k);l;m}}"); + assert_pred!(matches_codepattern, + "matches_codepattern", + pprust::to_str(zz_fold.fold_crate(ast),fake_print_crate, + token::get_ident_interner()), + ~"#[a]mod zz{fn zz(zz:zz,zz:zz){zz!(zz,zz,zz);zz;zz}}"); + } + + // even inside macro defs.... + #[test] fn ident_transformation_in_defs () { + let zz_fold = fun_to_ident_folder(to_zz()); + let ast = string_to_crate(@~"macro_rules! a {(b $c:expr $(d $e:token)f+ +=> (g $(d $d $e)+))} "); + assert_pred!(matches_codepattern, + "matches_codepattern", + pprust::to_str(zz_fold.fold_crate(ast),fake_print_crate, + token::get_ident_interner()), + ~"zz!zz((zz$zz:zz$(zz $zz:zz)zz+=>(zz$(zz$zz$zz)+)))"); + } + +} diff --git a/src/libsyntax/util/parser_testing.rs b/src/libsyntax/util/parser_testing.rs index c5528069926..76055ca7914 100644 --- a/src/libsyntax/util/parser_testing.rs +++ b/src/libsyntax/util/parser_testing.rs @@ -69,3 +69,81 @@ pub fn string_to_pat(source_str : @~str) -> @ast::pat { pub fn strs_to_idents(ids: ~[&str]) -> ~[ast::ident] { ids.map(|u| token::str_to_ident(*u)) } + +// does the given string match the pattern? whitespace in the first string +// may be deleted or replaced with other whitespace to match the pattern. +// this function is unicode-ignorant; fortunately, the careful design of +// UTF-8 mitigates this ignorance. In particular, this function only collapses +// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode +// chars. Unsurprisingly, it doesn't do NKF-normalization(?). +pub fn matches_codepattern(a : &str, b : &str) -> bool { + let mut idx_a = 0; + let mut idx_b = 0; + loop { + if (idx_a == a.len() && idx_b == b.len()) { + return true; + } + else if (idx_a == a.len()) {return false;} + else if (idx_b == b.len()) { + // maybe the stuff left in a is all ws? + if (is_whitespace(a.char_at(idx_a))) { + return (scan_for_non_ws_or_end(a,idx_a) == a.len()); + } else { + return false; + } + } + // ws in both given and pattern: + else if (is_whitespace(a.char_at(idx_a)) + && is_whitespace(b.char_at(idx_b))) { + idx_a = scan_for_non_ws_or_end(a,idx_a); + idx_b = scan_for_non_ws_or_end(b,idx_b); + } + // ws in given only: + else if (is_whitespace(a.char_at(idx_a))) { + idx_a = scan_for_non_ws_or_end(a,idx_a); + } + // *don't* silently eat ws in expected only. + else if (a.char_at(idx_a) == b.char_at(idx_b)) { + idx_a += 1; + idx_b += 1; + } + else { + return false; + } + } +} + +// given a string and an index, return the first uint >= idx +// that is a non-ws-char or is outside of the legal range of +// the string. +fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint { + let mut i = idx; + let len = a.len(); + while ((i < len) && (is_whitespace(a.char_at(i)))) { + i += 1; + } + i +} + +// copied from lexer. +pub fn is_whitespace(c: char) -> bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] fn eqmodws() { + assert_eq!(matches_codepattern("",""),true); + assert_eq!(matches_codepattern("","a"),false); + assert_eq!(matches_codepattern("a",""),false); + assert_eq!(matches_codepattern("a","a"),true); + assert_eq!(matches_codepattern("a b","a \n\t\r b"),true); + assert_eq!(matches_codepattern("a b ","a \n\t\r b"),true); + assert_eq!(matches_codepattern("a b","a \n\t\r b "),false); + assert_eq!(matches_codepattern("a b","a b"),true); + assert_eq!(matches_codepattern("ab","a b"),false); + assert_eq!(matches_codepattern("a b","ab"),true); + } +}