regex: Remove in-tree version
The regex library was largely used for non-critical aspects of the compiler and various external tooling. The library at this point is duplicated with its out-of-tree counterpart and as such imposes a bit of a maintenance overhead as well as compile time hit for the compiler itself. The last major user of the regex library is the libtest library, using regexes for filters when running tests. This removal means that the filtering has gone back to substring matching rather than using regexes.
This commit is contained in:
parent
494896f2dd
commit
6c29708bf9
12
mk/crates.mk
12
mk/crates.mk
@ -51,7 +51,7 @@
|
||||
|
||||
TARGET_CRATES := libc std flate arena term \
|
||||
serialize getopts collections test rand \
|
||||
log regex graphviz core rbml alloc \
|
||||
log graphviz core rbml alloc \
|
||||
unicode rustc_bitflags
|
||||
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
|
||||
rustc_trans rustc_back rustc_llvm rustc_privacy
|
||||
@ -95,16 +95,15 @@ DEPS_term := std log
|
||||
DEPS_getopts := std
|
||||
DEPS_collections := core alloc unicode
|
||||
DEPS_num := std
|
||||
DEPS_test := std getopts serialize rbml term regex native:rust_test_helpers
|
||||
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
|
||||
DEPS_rand := core
|
||||
DEPS_log := std regex
|
||||
DEPS_regex := std
|
||||
DEPS_log := std
|
||||
DEPS_fmt_macros = std
|
||||
|
||||
TOOL_DEPS_compiletest := test getopts
|
||||
TOOL_DEPS_rustdoc := rustdoc
|
||||
TOOL_DEPS_rustc := rustc_driver
|
||||
TOOL_DEPS_rustbook := std regex rustdoc
|
||||
TOOL_DEPS_rustbook := std rustdoc
|
||||
TOOL_SOURCE_compiletest := $(S)src/compiletest/compiletest.rs
|
||||
TOOL_SOURCE_rustdoc := $(S)src/driver/driver.rs
|
||||
TOOL_SOURCE_rustc := $(S)src/driver/driver.rs
|
||||
@ -130,9 +129,8 @@ DOC_CRATES := $(filter-out rustc, \
|
||||
$(filter-out rustc_driver, \
|
||||
$(filter-out rustc_privacy, \
|
||||
$(filter-out log, \
|
||||
$(filter-out regex, \
|
||||
$(filter-out getopts, \
|
||||
$(filter-out syntax, $(CRATES))))))))))))
|
||||
$(filter-out syntax, $(CRATES)))))))))))
|
||||
COMPILER_DOC_CRATES := rustc rustc_trans rustc_borrowck rustc_resolve \
|
||||
rustc_typeck rustc_driver syntax rustc_privacy
|
||||
|
||||
|
@ -11,7 +11,6 @@ pub use self::Mode::*;
|
||||
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub enum Mode {
|
||||
@ -101,10 +100,7 @@ pub struct Config {
|
||||
pub run_ignored: bool,
|
||||
|
||||
// Only run tests that match this filter
|
||||
pub filter: Option<Regex>,
|
||||
|
||||
// Precompiled regex for finding expected errors in cfail
|
||||
pub cfail_regex: Regex,
|
||||
pub filter: Option<String>,
|
||||
|
||||
// Write out a parseable log of tests that were run
|
||||
pub logfile: Option<Path>,
|
||||
|
@ -22,7 +22,6 @@ extern crate getopts;
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
|
||||
use std::os;
|
||||
use std::io;
|
||||
@ -33,7 +32,6 @@ use getopts::{optopt, optflag, reqopt};
|
||||
use common::Config;
|
||||
use common::{Pretty, DebugInfoGdb, DebugInfoLldb, Codegen};
|
||||
use util::logv;
|
||||
use regex::Regex;
|
||||
|
||||
pub mod procsrv;
|
||||
pub mod util;
|
||||
@ -116,14 +114,7 @@ pub fn parse_config(args: Vec<String> ) -> Config {
|
||||
}
|
||||
|
||||
let filter = if !matches.free.is_empty() {
|
||||
let s = matches.free[0].as_slice();
|
||||
match regex::Regex::new(s) {
|
||||
Ok(re) => Some(re),
|
||||
Err(e) => {
|
||||
println!("failed to parse filter /{}/: {:?}", s, e);
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
Some(matches.free[0].clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@ -145,7 +136,6 @@ pub fn parse_config(args: Vec<String> ) -> Config {
|
||||
.as_slice()).expect("invalid mode"),
|
||||
run_ignored: matches.opt_present("ignored"),
|
||||
filter: filter,
|
||||
cfail_regex: Regex::new(errors::EXPECTED_PATTERN).unwrap(),
|
||||
logfile: matches.opt_str("logfile").map(|s| Path::new(s)),
|
||||
runtool: matches.opt_str("runtool"),
|
||||
host_rustcflags: matches.opt_str("host-rustcflags"),
|
||||
@ -374,18 +364,24 @@ fn extract_gdb_version(full_version_line: Option<String>) -> Option<String> {
|
||||
if full_version_line.as_slice().trim().len() > 0 => {
|
||||
let full_version_line = full_version_line.as_slice().trim();
|
||||
|
||||
let re = Regex::new(r"(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)").unwrap();
|
||||
|
||||
match re.captures(full_version_line) {
|
||||
Some(captures) => {
|
||||
Some(captures.at(2).unwrap_or("").to_string())
|
||||
// used to be a regex "(^|[^0-9])([0-9]\.[0-9])([^0-9]|$)"
|
||||
for (pos, c) in full_version_line.char_indices() {
|
||||
if !c.is_digit(10) { continue }
|
||||
if pos + 2 >= full_version_line.len() { continue }
|
||||
if full_version_line.char_at(pos + 1) != '.' { continue }
|
||||
if !full_version_line.char_at(pos + 2).is_digit(10) { continue }
|
||||
if pos > 0 && full_version_line.char_at_reverse(pos).is_digit(10) {
|
||||
continue
|
||||
}
|
||||
None => {
|
||||
println!("Could not extract GDB version from line '{}'",
|
||||
full_version_line);
|
||||
None
|
||||
if pos + 3 < full_version_line.len() &&
|
||||
full_version_line.char_at(pos + 3).is_digit(10) {
|
||||
continue
|
||||
}
|
||||
return Some(full_version_line[pos..pos+3].to_string());
|
||||
}
|
||||
println!("Could not extract GDB version from line '{}'",
|
||||
full_version_line);
|
||||
None
|
||||
},
|
||||
_ => None
|
||||
}
|
||||
@ -408,18 +404,26 @@ fn extract_lldb_version(full_version_line: Option<String>) -> Option<String> {
|
||||
if full_version_line.as_slice().trim().len() > 0 => {
|
||||
let full_version_line = full_version_line.as_slice().trim();
|
||||
|
||||
let re = Regex::new(r"[Ll][Ll][Dd][Bb]-([0-9]+)").unwrap();
|
||||
for (pos, l) in full_version_line.char_indices() {
|
||||
if l != 'l' && l != 'L' { continue }
|
||||
if pos + 5 >= full_version_line.len() { continue }
|
||||
let l = full_version_line.char_at(pos + 1);
|
||||
if l != 'l' && l != 'L' { continue }
|
||||
let d = full_version_line.char_at(pos + 2);
|
||||
if d != 'd' && d != 'D' { continue }
|
||||
let b = full_version_line.char_at(pos + 3);
|
||||
if b != 'b' && b != 'B' { continue }
|
||||
let dash = full_version_line.char_at(pos + 4);
|
||||
if dash != '-' { continue }
|
||||
|
||||
match re.captures(full_version_line) {
|
||||
Some(captures) => {
|
||||
Some(captures.at(1).unwrap_or("").to_string())
|
||||
}
|
||||
None => {
|
||||
println!("Could not extract LLDB version from line '{}'",
|
||||
full_version_line);
|
||||
None
|
||||
}
|
||||
let vers = full_version_line[pos + 5..].chars().take_while(|c| {
|
||||
c.is_digit(10)
|
||||
}).collect::<String>();
|
||||
if vers.len() > 0 { return Some(vers) }
|
||||
}
|
||||
println!("Could not extract LLDB version from line '{}'",
|
||||
full_version_line);
|
||||
None
|
||||
},
|
||||
_ => None
|
||||
}
|
||||
|
@ -9,9 +9,7 @@
|
||||
// except according to those terms.
|
||||
use self::WhichLine::*;
|
||||
|
||||
use std::ascii::AsciiExt;
|
||||
use std::io::{BufferedReader, File};
|
||||
use regex::Regex;
|
||||
|
||||
pub struct ExpectedError {
|
||||
pub line: uint,
|
||||
@ -19,6 +17,9 @@ pub struct ExpectedError {
|
||||
pub msg: String,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Show)]
|
||||
enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
|
||||
|
||||
/// Looks for either "//~| KIND MESSAGE" or "//~^^... KIND MESSAGE"
|
||||
/// The former is a "follow" that inherits its target from the preceding line;
|
||||
/// the latter is an "adjusts" that goes that many lines up.
|
||||
@ -26,15 +27,8 @@ pub struct ExpectedError {
|
||||
/// Goal is to enable tests both like: //~^^^ ERROR go up three
|
||||
/// and also //~^ ERROR message one for the preceding line, and
|
||||
/// //~| ERROR message two for that same line.
|
||||
|
||||
pub static EXPECTED_PATTERN : &'static str =
|
||||
r"//~(?P<follow>\|)?(?P<adjusts>\^*)\s*(?P<kind>\S*)\s*(?P<msg>.*)";
|
||||
|
||||
#[derive(PartialEq, Show)]
|
||||
enum WhichLine { ThisLine, FollowPrevious(uint), AdjustBackward(uint) }
|
||||
|
||||
// Load any test directives embedded in the file
|
||||
pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
|
||||
pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
|
||||
let mut rdr = BufferedReader::new(File::open(testfile).unwrap());
|
||||
|
||||
// `last_nonfollow_error` tracks the most recently seen
|
||||
@ -50,7 +44,7 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
|
||||
rdr.lines().enumerate().filter_map(|(line_no, ln)| {
|
||||
parse_expected(last_nonfollow_error,
|
||||
line_no + 1,
|
||||
ln.unwrap().as_slice(), re)
|
||||
ln.unwrap().as_slice())
|
||||
.map(|(which, error)| {
|
||||
match which {
|
||||
FollowPrevious(_) => {}
|
||||
@ -63,30 +57,39 @@ pub fn load_errors(re: &Regex, testfile: &Path) -> Vec<ExpectedError> {
|
||||
|
||||
fn parse_expected(last_nonfollow_error: Option<uint>,
|
||||
line_num: uint,
|
||||
line: &str,
|
||||
re: &Regex) -> Option<(WhichLine, ExpectedError)> {
|
||||
re.captures(line).and_then(|caps| {
|
||||
let adjusts = caps.name("adjusts").unwrap_or("").len();
|
||||
let kind = caps.name("kind").unwrap_or("").to_ascii_lowercase();
|
||||
let msg = caps.name("msg").unwrap_or("").trim().to_string();
|
||||
let follow = caps.name("follow").unwrap_or("").len() > 0;
|
||||
line: &str) -> Option<(WhichLine, ExpectedError)> {
|
||||
let start = match line.find_str("//~") { Some(i) => i, None => return None };
|
||||
let (follow, adjusts) = if line.char_at(start + 3) == '|' {
|
||||
(true, 0)
|
||||
} else {
|
||||
(false, line[start + 3..].chars().take_while(|c| *c == '^').count())
|
||||
};
|
||||
let kind_start = start + 3 + adjusts + (follow as usize);
|
||||
let letters = line[kind_start..].chars();
|
||||
let kind = letters.skip_while(|c| c.is_whitespace())
|
||||
.take_while(|c| !c.is_whitespace())
|
||||
.map(|c| c.to_lowercase())
|
||||
.collect::<String>();
|
||||
let letters = line[kind_start..].chars();
|
||||
let msg = letters.skip_while(|c| c.is_whitespace())
|
||||
.skip_while(|c| !c.is_whitespace())
|
||||
.collect::<String>().trim().to_string();
|
||||
|
||||
let (which, line) = if follow {
|
||||
assert!(adjusts == 0, "use either //~| or //~^, not both.");
|
||||
let line = last_nonfollow_error.unwrap_or_else(|| {
|
||||
panic!("encountered //~| without preceding //~^ line.")
|
||||
});
|
||||
(FollowPrevious(line), line)
|
||||
} else {
|
||||
let which =
|
||||
if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
|
||||
let line = line_num - adjusts;
|
||||
(which, line)
|
||||
};
|
||||
let (which, line) = if follow {
|
||||
assert!(adjusts == 0, "use either //~| or //~^, not both.");
|
||||
let line = last_nonfollow_error.unwrap_or_else(|| {
|
||||
panic!("encountered //~| without preceding //~^ line.")
|
||||
});
|
||||
(FollowPrevious(line), line)
|
||||
} else {
|
||||
let which =
|
||||
if adjusts > 0 { AdjustBackward(adjusts) } else { ThisLine };
|
||||
let line = line_num - adjusts;
|
||||
(which, line)
|
||||
};
|
||||
|
||||
debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
|
||||
Some((which, ExpectedError { line: line,
|
||||
kind: kind,
|
||||
msg: msg, }))
|
||||
})
|
||||
debug!("line={} which={:?} kind={:?} msg={:?}", line_num, which, kind, msg);
|
||||
Some((which, ExpectedError { line: line,
|
||||
kind: kind,
|
||||
msg: msg, }))
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ fn run_cfail_test(config: &Config, props: &TestProps, testfile: &Path) {
|
||||
}
|
||||
|
||||
let output_to_check = get_output(props, &proc_res);
|
||||
let expected_errors = errors::load_errors(&config.cfail_regex, testfile);
|
||||
let expected_errors = errors::load_errors(testfile);
|
||||
if !expected_errors.is_empty() {
|
||||
if !props.error_patterns.is_empty() {
|
||||
fatal("both error pattern and expected errors specified");
|
||||
|
@ -13,14 +13,11 @@
|
||||
extern crate syntax;
|
||||
extern crate rustc;
|
||||
|
||||
extern crate regex;
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io::File;
|
||||
use regex::Regex;
|
||||
|
||||
use syntax::parse;
|
||||
use syntax::parse::lexer;
|
||||
@ -167,15 +164,19 @@ fn count(lit: &str) -> usize {
|
||||
}
|
||||
|
||||
fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
|
||||
let re = Regex::new(
|
||||
r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
|
||||
).unwrap();
|
||||
// old regex:
|
||||
// \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
|
||||
let start = s.find_str("[@").unwrap();
|
||||
let comma = start + s[start..].find_str(",").unwrap();
|
||||
let colon = comma + s[comma..].find_str(":").unwrap();
|
||||
let content_start = colon + s[colon..].find_str("='").unwrap();
|
||||
let content_end = content_start + s[content_start..].find_str("',<").unwrap();
|
||||
let toknum_end = content_end + s[content_end..].find_str(">,").unwrap();
|
||||
|
||||
let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
|
||||
let start = m.name("start").unwrap_or("");
|
||||
let end = m.name("end").unwrap_or("");
|
||||
let toknum = m.name("toknum").unwrap_or("");
|
||||
let content = m.name("content").unwrap_or("");
|
||||
let start = &s[comma + 1 .. colon];
|
||||
let end = &s[colon + 1 .. content_start];
|
||||
let content = &s[content_start + 2 .. content_end];
|
||||
let toknum = &s[content_end + 3 .. toknum_end];
|
||||
|
||||
let proto_tok = tokens.get(toknum).expect(format!("didn't find token {:?} in the map",
|
||||
toknum).as_slice());
|
||||
|
@ -8,7 +8,6 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use regex::Regex;
|
||||
use std::ascii::AsciiExt;
|
||||
use std::cmp;
|
||||
|
||||
@ -34,7 +33,7 @@ fn parse_log_level(level: &str) -> Option<u32> {
|
||||
///
|
||||
/// Valid log levels are 0-255, with the most likely ones being 1-4 (defined in
|
||||
/// std::). Also supports string log levels of error, warn, info, and debug
|
||||
pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
|
||||
pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<String>) {
|
||||
let mut dirs = Vec::new();
|
||||
|
||||
let mut parts = spec.split('/');
|
||||
@ -80,17 +79,7 @@ pub fn parse_logging_spec(spec: &str) -> (Vec<LogDirective>, Option<Regex>) {
|
||||
});
|
||||
}});
|
||||
|
||||
let filter = filter.map_or(None, |filter| {
|
||||
match Regex::new(filter) {
|
||||
Ok(re) => Some(re),
|
||||
Err(e) => {
|
||||
println!("warning: invalid regex filter - {:?}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return (dirs, filter);
|
||||
(dirs, filter.map(|s| s.to_string()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -123,11 +123,11 @@
|
||||
//!
|
||||
//! # Filtering results
|
||||
//!
|
||||
//! A RUST_LOG directive may include a regex filter. The syntax is to append `/`
|
||||
//! followed by a regex. Each message is checked against the regex, and is only
|
||||
//! logged if it matches. Note that the matching is done after formatting the log
|
||||
//! string but before adding any logging meta-data. There is a single filter for all
|
||||
//! modules.
|
||||
//! A RUST_LOG directive may include a string filter. The syntax is to append
|
||||
//! `/` followed by a string. Each message is checked against the string and is
|
||||
//! only logged if it contains the string. Note that the matching is done after
|
||||
//! formatting the log string but before adding any logging meta-data. There is
|
||||
//! a single filter for all modules.
|
||||
//!
|
||||
//! Some examples:
|
||||
//!
|
||||
@ -172,8 +172,6 @@
|
||||
#![allow(unstable)]
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate regex;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::fmt;
|
||||
use std::io::LineBufferedWriter;
|
||||
@ -185,8 +183,6 @@ use std::rt;
|
||||
use std::slice;
|
||||
use std::sync::{Once, ONCE_INIT};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use directive::LOG_LEVEL_NAMES;
|
||||
|
||||
#[macro_use]
|
||||
@ -209,8 +205,8 @@ static mut LOG_LEVEL: u32 = MAX_LOG_LEVEL;
|
||||
static mut DIRECTIVES: *const Vec<directive::LogDirective> =
|
||||
0 as *const Vec<directive::LogDirective>;
|
||||
|
||||
/// Optional regex filter.
|
||||
static mut FILTER: *const Regex = 0 as *const _;
|
||||
/// Optional filter.
|
||||
static mut FILTER: *const String = 0 as *const _;
|
||||
|
||||
/// Debug log level
|
||||
pub const DEBUG: u32 = 4;
|
||||
@ -288,7 +284,7 @@ pub fn log(level: u32, loc: &'static LogLocation, args: fmt::Arguments) {
|
||||
// Test the literal string from args against the current filter, if there
|
||||
// is one.
|
||||
match unsafe { FILTER.as_ref() } {
|
||||
Some(filter) if !filter.is_match(&args.to_string()[]) => return,
|
||||
Some(filter) if !args.to_string().contains(&filter[]) => return,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@ -435,8 +431,8 @@ fn init() {
|
||||
DIRECTIVES = ptr::null();
|
||||
|
||||
if !FILTER.is_null() {
|
||||
let _filter: Box<Regex> = mem::transmute(FILTER);
|
||||
FILTER = ptr::null();
|
||||
let _filter: Box<String> = mem::transmute(FILTER);
|
||||
FILTER = 0 as *const _;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -1,275 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// Enable this to squash warnings due to exporting pieces of the representation
|
||||
// for use with the regex! macro. See lib.rs for explanation.
|
||||
|
||||
pub use self::Inst::*;
|
||||
|
||||
use std::cmp;
|
||||
use std::iter::repeat;
|
||||
use parse;
|
||||
use parse::{
|
||||
Flags, FLAG_EMPTY,
|
||||
Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt,
|
||||
Rep,
|
||||
ZeroOne, ZeroMore, OneMore,
|
||||
};
|
||||
|
||||
type InstIdx = uint;
|
||||
|
||||
#[derive(Show, Clone)]
|
||||
pub enum Inst {
|
||||
// When a Match instruction is executed, the current thread is successful.
|
||||
Match,
|
||||
|
||||
// The OneChar instruction matches a literal character.
|
||||
// The flags indicate whether to do a case insensitive match.
|
||||
OneChar(char, Flags),
|
||||
|
||||
// The CharClass instruction tries to match one input character against
|
||||
// the range of characters given.
|
||||
// The flags indicate whether to do a case insensitive match and whether
|
||||
// the character class is negated or not.
|
||||
CharClass(Vec<(char, char)>, Flags),
|
||||
|
||||
// Matches any character except new lines.
|
||||
// The flags indicate whether to include the '\n' character.
|
||||
Any(Flags),
|
||||
|
||||
// Matches the beginning of the string, consumes no characters.
|
||||
// The flags indicate whether it matches if the preceding character
|
||||
// is a new line.
|
||||
EmptyBegin(Flags),
|
||||
|
||||
// Matches the end of the string, consumes no characters.
|
||||
// The flags indicate whether it matches if the proceeding character
|
||||
// is a new line.
|
||||
EmptyEnd(Flags),
|
||||
|
||||
// Matches a word boundary (\w on one side and \W \A or \z on the other),
|
||||
// and consumes no character.
|
||||
// The flags indicate whether this matches a word boundary or something
|
||||
// that isn't a word boundary.
|
||||
EmptyWordBoundary(Flags),
|
||||
|
||||
// Saves the current position in the input string to the Nth save slot.
|
||||
Save(uint),
|
||||
|
||||
// Jumps to the instruction at the index given.
|
||||
Jump(InstIdx),
|
||||
|
||||
// Jumps to the instruction at the first index given. If that leads to
|
||||
// a panic state, then the instruction at the second index given is
|
||||
// tried.
|
||||
Split(InstIdx, InstIdx),
|
||||
}
|
||||
|
||||
/// Program represents a compiled regular expression. Once an expression is
|
||||
/// compiled, its representation is immutable and will never change.
|
||||
///
|
||||
/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
|
||||
/// "MaybeOwned" types so that a `Program` can be represented as static data.
|
||||
/// (This makes it convenient and efficient for use with the `regex!` macro.)
|
||||
#[derive(Clone)]
|
||||
pub struct Program {
|
||||
/// A sequence of instructions.
|
||||
pub insts: Vec<Inst>,
|
||||
/// If the regular expression requires a literal prefix in order to have a
|
||||
/// match, that prefix is stored here. (It's used in the VM to implement
|
||||
/// an optimization.)
|
||||
pub prefix: String,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
/// Compiles a Regex given its AST.
|
||||
pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
|
||||
let mut c = Compiler {
|
||||
insts: Vec::with_capacity(100),
|
||||
names: Vec::with_capacity(10),
|
||||
};
|
||||
|
||||
c.insts.push(Save(0));
|
||||
c.compile(ast);
|
||||
c.insts.push(Save(1));
|
||||
c.insts.push(Match);
|
||||
|
||||
// Try to discover a literal string prefix.
|
||||
// This is a bit hacky since we have to skip over the initial
|
||||
// 'Save' instruction.
|
||||
let mut pre = String::with_capacity(5);
|
||||
for inst in c.insts[1..].iter() {
|
||||
match *inst {
|
||||
OneChar(c, FLAG_EMPTY) => pre.push(c),
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
|
||||
let Compiler { insts, names } = c;
|
||||
let prog = Program {
|
||||
insts: insts,
|
||||
prefix: pre,
|
||||
};
|
||||
(prog, names)
|
||||
}
|
||||
|
||||
/// Returns the total number of capture groups in the regular expression.
|
||||
/// This includes the zeroth capture.
|
||||
pub fn num_captures(&self) -> uint {
|
||||
let mut n = 0;
|
||||
for inst in self.insts.iter() {
|
||||
match *inst {
|
||||
Save(c) => n = cmp::max(n, c+1),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// There's exactly 2 Save slots for every capture.
|
||||
n / 2
|
||||
}
|
||||
}
|
||||
|
||||
struct Compiler<'r> {
|
||||
insts: Vec<Inst>,
|
||||
names: Vec<Option<String>>,
|
||||
}
|
||||
|
||||
// The compiler implemented here is extremely simple. Most of the complexity
|
||||
// in this crate is in the parser or the VM.
|
||||
// The only tricky thing here is patching jump/split instructions to point to
|
||||
// the right instruction.
|
||||
impl<'r> Compiler<'r> {
|
||||
fn compile(&mut self, ast: parse::Ast) {
|
||||
match ast {
|
||||
Nothing => {},
|
||||
Literal(c, flags) => self.push(OneChar(c, flags)),
|
||||
Dot(nl) => self.push(Any(nl)),
|
||||
AstClass(ranges, flags) =>
|
||||
self.push(CharClass(ranges, flags)),
|
||||
Begin(flags) => self.push(EmptyBegin(flags)),
|
||||
End(flags) => self.push(EmptyEnd(flags)),
|
||||
WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
|
||||
Capture(cap, name, x) => {
|
||||
let len = self.names.len();
|
||||
if cap >= len {
|
||||
self.names.extend(repeat(None).take(10 + cap - len))
|
||||
}
|
||||
self.names[cap] = name;
|
||||
|
||||
self.push(Save(2 * cap));
|
||||
self.compile(*x);
|
||||
self.push(Save(2 * cap + 1));
|
||||
}
|
||||
Cat(xs) => {
|
||||
for x in xs.into_iter() {
|
||||
self.compile(x)
|
||||
}
|
||||
}
|
||||
Alt(x, y) => {
|
||||
let split = self.empty_split(); // push: split 0, 0
|
||||
let j1 = self.insts.len();
|
||||
self.compile(*x); // push: insts for x
|
||||
let jmp = self.empty_jump(); // push: jmp 0
|
||||
let j2 = self.insts.len();
|
||||
self.compile(*y); // push: insts for y
|
||||
let j3 = self.insts.len();
|
||||
|
||||
self.set_split(split, j1, j2); // split 0, 0 -> split j1, j2
|
||||
self.set_jump(jmp, j3); // jmp 0 -> jmp j3
|
||||
}
|
||||
Rep(x, ZeroOne, g) => {
|
||||
let split = self.empty_split();
|
||||
let j1 = self.insts.len();
|
||||
self.compile(*x);
|
||||
let j2 = self.insts.len();
|
||||
|
||||
if g.is_greedy() {
|
||||
self.set_split(split, j1, j2);
|
||||
} else {
|
||||
self.set_split(split, j2, j1);
|
||||
}
|
||||
}
|
||||
Rep(x, ZeroMore, g) => {
|
||||
let j1 = self.insts.len();
|
||||
let split = self.empty_split();
|
||||
let j2 = self.insts.len();
|
||||
self.compile(*x);
|
||||
let jmp = self.empty_jump();
|
||||
let j3 = self.insts.len();
|
||||
|
||||
self.set_jump(jmp, j1);
|
||||
if g.is_greedy() {
|
||||
self.set_split(split, j2, j3);
|
||||
} else {
|
||||
self.set_split(split, j3, j2);
|
||||
}
|
||||
}
|
||||
Rep(x, OneMore, g) => {
|
||||
let j1 = self.insts.len();
|
||||
self.compile(*x);
|
||||
let split = self.empty_split();
|
||||
let j2 = self.insts.len();
|
||||
|
||||
if g.is_greedy() {
|
||||
self.set_split(split, j1, j2);
|
||||
} else {
|
||||
self.set_split(split, j2, j1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends the given instruction to the program.
|
||||
#[inline]
|
||||
fn push(&mut self, x: Inst) {
|
||||
self.insts.push(x)
|
||||
}
|
||||
|
||||
/// Appends an *empty* `Split` instruction to the program and returns
|
||||
/// the index of that instruction. (The index can then be used to "patch"
|
||||
/// the actual locations of the split in later.)
|
||||
#[inline]
|
||||
fn empty_split(&mut self) -> InstIdx {
|
||||
self.insts.push(Split(0, 0));
|
||||
self.insts.len() - 1
|
||||
}
|
||||
|
||||
/// Sets the left and right locations of a `Split` instruction at index
|
||||
/// `i` to `pc1` and `pc2`, respectively.
|
||||
/// If the instruction at index `i` isn't a `Split` instruction, then
|
||||
/// `panic!` is called.
|
||||
#[inline]
|
||||
fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
|
||||
let split = &mut self.insts[i];
|
||||
match *split {
|
||||
Split(_, _) => *split = Split(pc1, pc2),
|
||||
_ => panic!("BUG: Invalid split index."),
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends an *empty* `Jump` instruction to the program and returns the
|
||||
/// index of that instruction.
|
||||
#[inline]
|
||||
fn empty_jump(&mut self) -> InstIdx {
|
||||
self.insts.push(Jump(0));
|
||||
self.insts.len() - 1
|
||||
}
|
||||
|
||||
/// Sets the location of a `Jump` instruction at index `i` to `pc`.
|
||||
/// If the instruction at index `i` isn't a `Jump` instruction, then
|
||||
/// `panic!` is called.
|
||||
#[inline]
|
||||
fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
|
||||
let jmp = &mut self.insts[i];
|
||||
match *jmp {
|
||||
Jump(_) => *jmp = Jump(pc),
|
||||
_ => panic!("BUG: Invalid jump index."),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
//
|
||||
// ignore-lexer-test FIXME #15679
|
||||
|
||||
//! Regular expressions implemented in Rust
|
||||
//!
|
||||
//! For official documentation, see the rust-lang/regex crate
|
||||
#![crate_name = "regex"]
|
||||
#![crate_type = "rlib"]
|
||||
#![crate_type = "dylib"]
|
||||
#![unstable = "use the crates.io `regex` library instead"]
|
||||
#![staged_api]
|
||||
#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
|
||||
html_favicon_url = "http://www.rust-lang.org/favicon.ico",
|
||||
html_root_url = "http://doc.rust-lang.org/nightly/",
|
||||
html_playground_url = "http://play.rust-lang.org/")]
|
||||
|
||||
#![allow(unknown_features)]
|
||||
#![allow(unstable)]
|
||||
#![feature(slicing_syntax)]
|
||||
#![feature(box_syntax)]
|
||||
#![allow(unknown_features)] #![feature(int_uint)]
|
||||
#![deny(missing_docs)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate "test" as stdtest;
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
// During tests, this links with the `regex` crate so that the `regex!` macro
|
||||
// can be tested.
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
||||
// Unicode tables for character classes are defined in libunicode
|
||||
extern crate unicode;
|
||||
|
||||
pub use parse::Error;
|
||||
pub use re::{Regex, Captures, SubCaptures, SubCapturesPos};
|
||||
pub use re::{FindCaptures, FindMatches};
|
||||
pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN};
|
||||
pub use re::{quote, is_match};
|
||||
|
||||
mod compile;
|
||||
mod parse;
|
||||
mod re;
|
||||
mod vm;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
/// The `native` module exists to support the `regex!` macro. Do not use.
|
||||
#[doc(hidden)]
|
||||
pub mod native {
|
||||
// Exporting this stuff is bad form, but it's necessary for two reasons.
|
||||
// Firstly, the `regex!` syntax extension is in a different crate and
|
||||
// requires access to the representation of a regex (particularly the
|
||||
// instruction set) in order to compile to native Rust. This could be
|
||||
// mitigated if `regex!` was defined in the same crate, but this has
|
||||
// undesirable consequences (such as requiring a dependency on
|
||||
// `libsyntax`).
|
||||
//
|
||||
// Secondly, the code generated by `regex!` must *also* be able
|
||||
// to access various functions in this crate to reduce code duplication
|
||||
// and to provide a value with precisely the same `Regex` type in this
|
||||
// crate. This, AFAIK, is impossible to mitigate.
|
||||
//
|
||||
// On the bright side, `rustdoc` lets us hide this from the public API
|
||||
// documentation.
|
||||
pub use compile::{
|
||||
Program,
|
||||
OneChar, CharClass, Any, Save, Jump, Split,
|
||||
Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
|
||||
};
|
||||
pub use parse::{
|
||||
FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
|
||||
FLAG_SWAP_GREED, FLAG_NEGATED,
|
||||
};
|
||||
pub use re::{Dynamic, ExDynamic, Native, ExNative};
|
||||
pub use vm::{
|
||||
MatchKind, Exists, Location, Submatches,
|
||||
StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
|
||||
CharReader, find_prefix,
|
||||
};
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,684 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub use self::NamesIter::*;
|
||||
pub use self::Regex::*;
|
||||
|
||||
use std::borrow::IntoCow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::string::CowString;
|
||||
|
||||
use compile::Program;
|
||||
use parse;
|
||||
use vm;
|
||||
use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches};
|
||||
|
||||
/// Escapes all regular expression meta characters in `text`.
|
||||
///
|
||||
/// The string returned may be safely used as a literal in a regular
|
||||
/// expression.
|
||||
pub fn quote(text: &str) -> String {
|
||||
let mut quoted = String::with_capacity(text.len());
|
||||
for c in text.chars() {
|
||||
if parse::is_punct(c) {
|
||||
quoted.push('\\')
|
||||
}
|
||||
quoted.push(c);
|
||||
}
|
||||
quoted
|
||||
}
|
||||
|
||||
/// Tests if the given regular expression matches somewhere in the text given.
|
||||
///
|
||||
/// If there was a problem compiling the regular expression, an error is
|
||||
/// returned.
|
||||
///
|
||||
/// To find submatches, split or replace text, you'll need to compile an
|
||||
/// expression first.
|
||||
///
|
||||
/// Note that you should prefer the `regex!` macro when possible. For example,
|
||||
/// `regex!("...").is_match("...")`.
|
||||
pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
|
||||
Regex::new(regex).map(|r| r.is_match(text))
|
||||
}
|
||||
|
||||
/// A compiled regular expression
|
||||
#[derive(Clone)]
|
||||
pub enum Regex {
|
||||
// The representation of `Regex` is exported to support the `regex!`
|
||||
// syntax extension. Do not rely on it.
|
||||
//
|
||||
// See the comments for the `program` module in `lib.rs` for a more
|
||||
// detailed explanation for what `regex!` requires.
|
||||
#[doc(hidden)]
|
||||
Dynamic(ExDynamic),
|
||||
#[doc(hidden)]
|
||||
Native(ExNative),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
#[doc(hidden)]
|
||||
pub struct ExDynamic {
|
||||
original: String,
|
||||
names: Vec<Option<String>>,
|
||||
#[doc(hidden)]
|
||||
pub prog: Program
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[derive(Copy)]
|
||||
pub struct ExNative {
|
||||
#[doc(hidden)]
|
||||
pub original: &'static str,
|
||||
#[doc(hidden)]
|
||||
pub names: &'static &'static [Option<&'static str>],
|
||||
#[doc(hidden)]
|
||||
pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
|
||||
}
|
||||
|
||||
impl Clone for ExNative {
|
||||
fn clone(&self) -> ExNative {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Regex {
|
||||
/// Shows the original regular expression.
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Display::fmt(self.as_str(), f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Regex {
|
||||
/// Compiles a dynamic regular expression. Once compiled, it can be
|
||||
/// used repeatedly to search, split or replace text in a string.
|
||||
///
|
||||
/// When possible, you should prefer the `regex!` macro since it is
|
||||
/// safer and always faster.
|
||||
///
|
||||
/// If an invalid expression is given, then an error is returned.
|
||||
pub fn new(re: &str) -> Result<Regex, parse::Error> {
|
||||
let ast = try!(parse::parse(re));
|
||||
let (prog, names) = Program::new(ast);
|
||||
Ok(Dynamic(ExDynamic {
|
||||
original: re.to_string(),
|
||||
names: names,
|
||||
prog: prog,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Returns true if and only if the regex matches the string given.
|
||||
pub fn is_match(&self, text: &str) -> bool {
|
||||
has_match(&exec(self, Exists, text))
|
||||
}
|
||||
|
||||
/// Returns the start and end byte range of the leftmost-first match in
|
||||
/// `text`. If no match exists, then `None` is returned.
|
||||
pub fn find(&self, text: &str) -> Option<(uint, uint)> {
|
||||
let caps = exec(self, Location, text);
|
||||
if has_match(&caps) {
|
||||
Some((caps[0].unwrap(), caps[1].unwrap()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator for each successive non-overlapping match in
|
||||
/// `text`, returning the start and end byte indices with respect to
|
||||
/// `text`.
|
||||
pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
|
||||
FindMatches {
|
||||
re: self,
|
||||
search: text,
|
||||
last_end: 0,
|
||||
last_match: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the capture groups corresponding to the leftmost-first
|
||||
/// match in `text`. Capture group `0` always corresponds to the entire
|
||||
/// match. If no match is found, then `None` is returned.
|
||||
///
|
||||
/// You should only use `captures` if you need access to submatches.
|
||||
/// Otherwise, `find` is faster for discovering the location of the overall
|
||||
/// match.
|
||||
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
|
||||
let caps = exec(self, Submatches, text);
|
||||
Captures::new(self, text, caps)
|
||||
}
|
||||
|
||||
/// Returns an iterator over all the non-overlapping capture groups matched
|
||||
/// in `text`. This is operationally the same as `find_iter` (except it
|
||||
/// yields information about submatches).
|
||||
pub fn captures_iter<'r, 't>(&'r self, text: &'t str)
|
||||
-> FindCaptures<'r, 't> {
|
||||
FindCaptures {
|
||||
re: self,
|
||||
search: text,
|
||||
last_match: None,
|
||||
last_end: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator of substrings of `text` delimited by a match
|
||||
/// of the regular expression.
|
||||
/// Namely, each element of the iterator corresponds to text that *isn't*
|
||||
/// matched by the regular expression.
|
||||
///
|
||||
/// This method will *not* copy the text given.
|
||||
pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
|
||||
RegexSplits {
|
||||
finder: self.find_iter(text),
|
||||
last: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator of at most `limit` substrings of `text` delimited
|
||||
/// by a match of the regular expression. (A `limit` of `0` will return no
|
||||
/// substrings.)
|
||||
/// Namely, each element of the iterator corresponds to text that *isn't*
|
||||
/// matched by the regular expression.
|
||||
/// The remainder of the string that is not split will be the last element
|
||||
/// in the iterator.
|
||||
///
|
||||
/// This method will *not* copy the text given.
|
||||
pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: uint)
|
||||
-> RegexSplitsN<'r, 't> {
|
||||
RegexSplitsN {
|
||||
splits: self.split(text),
|
||||
cur: 0,
|
||||
limit: limit,
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces the leftmost-first match with the replacement provided.
|
||||
/// The replacement can be a regular string (where `$N` and `$name` are
|
||||
/// expanded to match capture groups) or a function that takes the matches'
|
||||
/// `Captures` and returns the replaced string.
|
||||
///
|
||||
/// If no match is found, then a copy of the string is returned unchanged.
|
||||
pub fn replace<R: Replacer>(&self, text: &str, rep: R) -> String {
|
||||
self.replacen(text, 1, rep)
|
||||
}
|
||||
|
||||
/// Replaces all non-overlapping matches in `text` with the
|
||||
/// replacement provided. This is the same as calling `replacen` with
|
||||
/// `limit` set to `0`.
|
||||
///
|
||||
/// See the documentation for `replace` for details on how to access
|
||||
/// submatches in the replacement string.
|
||||
pub fn replace_all<R: Replacer>(&self, text: &str, rep: R) -> String {
|
||||
self.replacen(text, 0, rep)
|
||||
}
|
||||
|
||||
/// Replaces at most `limit` non-overlapping matches in `text` with the
|
||||
/// replacement provided. If `limit` is 0, then all non-overlapping matches
|
||||
/// are replaced.
|
||||
///
|
||||
/// See the documentation for `replace` for details on how to access
|
||||
/// submatches in the replacement string.
|
||||
pub fn replacen<R: Replacer>
|
||||
(&self, text: &str, limit: uint, mut rep: R) -> String {
|
||||
let mut new = String::with_capacity(text.len());
|
||||
let mut last_match = 0u;
|
||||
|
||||
for (i, cap) in self.captures_iter(text).enumerate() {
|
||||
// It'd be nicer to use the 'take' iterator instead, but it seemed
|
||||
// awkward given that '0' => no limit.
|
||||
if limit > 0 && i >= limit {
|
||||
break
|
||||
}
|
||||
|
||||
let (s, e) = cap.pos(0).unwrap(); // captures only reports matches
|
||||
new.push_str(&text[last_match..s]);
|
||||
new.push_str(&rep.reg_replace(&cap)[]);
|
||||
last_match = e;
|
||||
}
|
||||
new.push_str(&text[last_match..text.len()]);
|
||||
return new;
|
||||
}
|
||||
|
||||
/// Returns the original string of this regex.
|
||||
pub fn as_str<'a>(&'a self) -> &'a str {
|
||||
match *self {
|
||||
Dynamic(ExDynamic { ref original, .. }) => &original[],
|
||||
Native(ExNative { ref original, .. }) => &original[],
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[unstable]
|
||||
pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
|
||||
match *self {
|
||||
Native(ref n) => NamesIterNative(n.names.iter()),
|
||||
Dynamic(ref d) => NamesIterDynamic(d.names.iter())
|
||||
}
|
||||
}
|
||||
|
||||
fn names_len(&self) -> uint {
|
||||
match *self {
|
||||
Native(ref n) => n.names.len(),
|
||||
Dynamic(ref d) => d.names.len()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum NamesIter<'a> {
|
||||
NamesIterNative(::std::slice::Iter<'a, Option<&'static str>>),
|
||||
NamesIterDynamic(::std::slice::Iter<'a, Option<String>>)
|
||||
}
|
||||
|
||||
impl<'a> Iterator for NamesIter<'a> {
|
||||
type Item = Option<String>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<String>> {
|
||||
match *self {
|
||||
NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_string())),
|
||||
NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// NoExpand indicates literal string replacement.
|
||||
///
|
||||
/// It can be used with `replace` and `replace_all` to do a literal
|
||||
/// string replacement without expanding `$name` to their corresponding
|
||||
/// capture groups.
|
||||
///
|
||||
/// `'r` is the lifetime of the literal text.
|
||||
pub struct NoExpand<'t>(pub &'t str);
|
||||
|
||||
/// Replacer describes types that can be used to replace matches in a string.
|
||||
pub trait Replacer {
|
||||
/// Returns a possibly owned string that is used to replace the match
|
||||
/// corresponding to the `caps` capture group.
|
||||
///
|
||||
/// The `'a` lifetime refers to the lifetime of a borrowed string when
|
||||
/// a new owned string isn't needed (e.g., for `NoExpand`).
|
||||
fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a>;
|
||||
}
|
||||
|
||||
impl<'t> Replacer for NoExpand<'t> {
|
||||
fn reg_replace<'a>(&'a mut self, _: &Captures) -> CowString<'a> {
|
||||
let NoExpand(s) = *self;
|
||||
s.into_cow()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Replacer for &'t str {
|
||||
fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
|
||||
caps.expand(*self).into_cow()
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Replacer for F where F: FnMut(&Captures) -> String {
|
||||
fn reg_replace<'a>(&'a mut self, caps: &Captures) -> CowString<'a> {
|
||||
(*self)(caps).into_cow()
|
||||
}
|
||||
}
|
||||
|
||||
/// Yields all substrings delimited by a regular expression match.
|
||||
///
|
||||
/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
|
||||
/// of the string being split.
|
||||
#[derive(Clone)]
|
||||
pub struct RegexSplits<'r, 't> {
|
||||
finder: FindMatches<'r, 't>,
|
||||
last: uint,
|
||||
}
|
||||
|
||||
impl<'r, 't> Iterator for RegexSplits<'r, 't> {
|
||||
type Item = &'t str;
|
||||
|
||||
fn next(&mut self) -> Option<&'t str> {
|
||||
let text = self.finder.search;
|
||||
match self.finder.next() {
|
||||
None => {
|
||||
if self.last >= text.len() {
|
||||
None
|
||||
} else {
|
||||
let s = &text[self.last..text.len()];
|
||||
self.last = text.len();
|
||||
Some(s)
|
||||
}
|
||||
}
|
||||
Some((s, e)) => {
|
||||
let matched = &text[self.last..s];
|
||||
self.last = e;
|
||||
Some(matched)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Yields at most `N` substrings delimited by a regular expression match.
|
||||
///
|
||||
/// The last substring will be whatever remains after splitting.
|
||||
///
|
||||
/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
|
||||
/// of the string being split.
|
||||
#[derive(Clone)]
|
||||
pub struct RegexSplitsN<'r, 't> {
|
||||
splits: RegexSplits<'r, 't>,
|
||||
cur: uint,
|
||||
limit: uint,
|
||||
}
|
||||
|
||||
impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
|
||||
type Item = &'t str;
|
||||
|
||||
fn next(&mut self) -> Option<&'t str> {
|
||||
let text = self.splits.finder.search;
|
||||
if self.cur >= self.limit {
|
||||
None
|
||||
} else {
|
||||
self.cur += 1;
|
||||
if self.cur >= self.limit {
|
||||
Some(&text[self.splits.last..text.len()])
|
||||
} else {
|
||||
self.splits.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Captures represents a group of captured strings for a single match.
|
||||
///
|
||||
/// The 0th capture always corresponds to the entire match. Each subsequent
|
||||
/// index corresponds to the next capture group in the regex.
|
||||
/// If a capture group is named, then the matched string is *also* available
|
||||
/// via the `name` method. (Note that the 0th capture is always unnamed and so
|
||||
/// must be accessed with the `at` method.)
|
||||
///
|
||||
/// Positions returned from a capture group are always byte indices.
|
||||
///
|
||||
/// `'t` is the lifetime of the matched text.
|
||||
pub struct Captures<'t> {
|
||||
text: &'t str,
|
||||
locs: CaptureLocs,
|
||||
named: Option<HashMap<String, uint>>,
|
||||
}
|
||||
|
||||
impl<'t> Captures<'t> {
|
||||
#[allow(unstable)]
|
||||
fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
|
||||
-> Option<Captures<'t>> {
|
||||
if !has_match(&locs) {
|
||||
return None
|
||||
}
|
||||
|
||||
let named =
|
||||
if re.names_len() == 0 {
|
||||
None
|
||||
} else {
|
||||
let mut named = HashMap::new();
|
||||
for (i, name) in re.names_iter().enumerate() {
|
||||
match name {
|
||||
None => {},
|
||||
Some(name) => {
|
||||
named.insert(name, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(named)
|
||||
};
|
||||
Some(Captures {
|
||||
text: search,
|
||||
locs: locs,
|
||||
named: named,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the start and end positions of the Nth capture group.
|
||||
/// Returns `None` if `i` is not a valid capture group or if the capture
|
||||
/// group did not match anything.
|
||||
/// The positions returned are *always* byte indices with respect to the
|
||||
/// original string matched.
|
||||
pub fn pos(&self, i: uint) -> Option<(uint, uint)> {
|
||||
let (s, e) = (i * 2, i * 2 + 1);
|
||||
if e >= self.locs.len() || self.locs[s].is_none() {
|
||||
// VM guarantees that each pair of locations are both Some or None.
|
||||
return None
|
||||
}
|
||||
Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
|
||||
}
|
||||
|
||||
/// Returns the matched string for the capture group `i`. If `i` isn't
|
||||
/// a valid capture group or didn't match anything, then `None` is
|
||||
/// returned.
|
||||
pub fn at(&self, i: uint) -> Option<&'t str> {
|
||||
match self.pos(i) {
|
||||
None => None,
|
||||
Some((s, e)) => Some(&self.text[s.. e])
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the matched string for the capture group named `name`. If
|
||||
/// `name` isn't a valid capture group or didn't match anything, then
|
||||
/// `None` is returned.
|
||||
pub fn name(&self, name: &str) -> Option<&'t str> {
|
||||
match self.named {
|
||||
None => None,
|
||||
Some(ref h) => {
|
||||
match h.get(name) {
|
||||
None => None,
|
||||
Some(i) => self.at(*i),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates an iterator of all the capture groups in order of appearance
|
||||
/// in the regular expression.
|
||||
pub fn iter(&'t self) -> SubCaptures<'t> {
|
||||
SubCaptures { idx: 0, caps: self, }
|
||||
}
|
||||
|
||||
/// Creates an iterator of all the capture group positions in order of
|
||||
/// appearance in the regular expression. Positions are byte indices
|
||||
/// in terms of the original string matched.
|
||||
pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
|
||||
SubCapturesPos { idx: 0, caps: self, }
|
||||
}
|
||||
|
||||
/// Expands all instances of `$name` in `text` to the corresponding capture
|
||||
/// group `name`.
|
||||
///
|
||||
/// `name` may be an integer corresponding to the index of the
|
||||
/// capture group (counted by order of opening parenthesis where `0` is the
|
||||
/// entire match) or it can be a name (consisting of letters, digits or
|
||||
/// underscores) corresponding to a named capture group.
|
||||
///
|
||||
/// If `name` isn't a valid capture group (whether the name doesn't exist or
|
||||
/// isn't a valid index), then it is replaced with the empty string.
|
||||
///
|
||||
/// To write a literal `$` use `$$`.
|
||||
pub fn expand(&self, text: &str) -> String {
|
||||
// How evil can you get?
|
||||
// FIXME: Don't use regexes for this. It's completely unnecessary.
|
||||
let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
|
||||
let text = re.replace_all(text, |&mut: refs: &Captures| -> String {
|
||||
let pre = refs.at(1).unwrap_or("");
|
||||
let name = refs.at(2).unwrap_or("");
|
||||
format!("{}{}", pre,
|
||||
match name.parse::<uint>() {
|
||||
None => self.name(name).unwrap_or("").to_string(),
|
||||
Some(i) => self.at(i).unwrap_or("").to_string(),
|
||||
})
|
||||
});
|
||||
let re = Regex::new(r"\$\$").unwrap();
|
||||
re.replace_all(&text[], NoExpand("$"))
|
||||
}
|
||||
|
||||
/// Returns the number of captured groups.
|
||||
#[inline]
|
||||
pub fn len(&self) -> uint { self.locs.len() / 2 }
|
||||
|
||||
/// Returns if there are no captured groups.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool { self.len() == 0 }
|
||||
}
|
||||
|
||||
/// An iterator over capture groups for a particular match of a regular
|
||||
/// expression.
|
||||
///
|
||||
/// `'t` is the lifetime of the matched text.
|
||||
#[derive(Clone)]
|
||||
pub struct SubCaptures<'t> {
|
||||
idx: uint,
|
||||
caps: &'t Captures<'t>,
|
||||
}
|
||||
|
||||
impl<'t> Iterator for SubCaptures<'t> {
|
||||
type Item = &'t str;
|
||||
|
||||
fn next(&mut self) -> Option<&'t str> {
|
||||
if self.idx < self.caps.len() {
|
||||
self.idx += 1;
|
||||
Some(self.caps.at(self.idx - 1).unwrap_or(""))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over capture group positions for a particular match of a
|
||||
/// regular expression.
|
||||
///
|
||||
/// Positions are byte indices in terms of the original string matched.
|
||||
///
|
||||
/// `'t` is the lifetime of the matched text.
|
||||
#[derive(Clone)]
|
||||
pub struct SubCapturesPos<'t> {
|
||||
idx: uint,
|
||||
caps: &'t Captures<'t>,
|
||||
}
|
||||
|
||||
impl<'t> Iterator for SubCapturesPos<'t> {
|
||||
type Item = Option<(uint, uint)>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<(uint, uint)>> {
|
||||
if self.idx < self.caps.len() {
|
||||
self.idx += 1;
|
||||
Some(self.caps.pos(self.idx - 1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator that yields all non-overlapping capture groups matching a
|
||||
/// particular regular expression.
|
||||
///
|
||||
/// The iterator stops when no more matches can be found.
|
||||
///
|
||||
/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
|
||||
/// of the matched string.
|
||||
#[derive(Clone)]
|
||||
pub struct FindCaptures<'r, 't> {
|
||||
re: &'r Regex,
|
||||
search: &'t str,
|
||||
last_match: Option<uint>,
|
||||
last_end: uint,
|
||||
}
|
||||
|
||||
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
|
||||
type Item = Captures<'t>;
|
||||
|
||||
fn next(&mut self) -> Option<Captures<'t>> {
|
||||
if self.last_end > self.search.len() {
|
||||
return None
|
||||
}
|
||||
|
||||
let caps = exec_slice(self.re, Submatches, self.search,
|
||||
self.last_end, self.search.len());
|
||||
let (s, e) =
|
||||
if !has_match(&caps) {
|
||||
return None
|
||||
} else {
|
||||
(caps[0].unwrap(), caps[1].unwrap())
|
||||
};
|
||||
|
||||
// Don't accept empty matches immediately following a match.
|
||||
// i.e., no infinite loops please.
|
||||
if e == s && Some(self.last_end) == self.last_match {
|
||||
self.last_end += 1;
|
||||
return self.next()
|
||||
}
|
||||
self.last_end = e;
|
||||
self.last_match = Some(self.last_end);
|
||||
Captures::new(self.re, self.search, caps)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over all non-overlapping matches for a particular string.
|
||||
///
|
||||
/// The iterator yields a tuple of integers corresponding to the start and end
|
||||
/// of the match. The indices are byte offsets. The iterator stops when no more
|
||||
/// matches can be found.
|
||||
///
|
||||
/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
|
||||
/// of the matched string.
|
||||
#[derive(Clone)]
|
||||
pub struct FindMatches<'r, 't> {
|
||||
re: &'r Regex,
|
||||
search: &'t str,
|
||||
last_match: Option<uint>,
|
||||
last_end: uint,
|
||||
}
|
||||
|
||||
impl<'r, 't> Iterator for FindMatches<'r, 't> {
|
||||
type Item = (uint, uint);
|
||||
|
||||
fn next(&mut self) -> Option<(uint, uint)> {
|
||||
if self.last_end > self.search.len() {
|
||||
return None
|
||||
}
|
||||
|
||||
let caps = exec_slice(self.re, Location, self.search,
|
||||
self.last_end, self.search.len());
|
||||
let (s, e) =
|
||||
if !has_match(&caps) {
|
||||
return None
|
||||
} else {
|
||||
(caps[0].unwrap(), caps[1].unwrap())
|
||||
};
|
||||
|
||||
// Don't accept empty matches immediately following a match.
|
||||
// i.e., no infinite loops please.
|
||||
if e == s && Some(self.last_end) == self.last_match {
|
||||
self.last_end += 1;
|
||||
return self.next()
|
||||
}
|
||||
self.last_end = e;
|
||||
self.last_match = Some(self.last_end);
|
||||
Some((s, e))
|
||||
}
|
||||
}
|
||||
|
||||
fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
|
||||
exec_slice(re, which, input, 0, input.len())
|
||||
}
|
||||
|
||||
fn exec_slice(re: &Regex, which: MatchKind,
|
||||
input: &str, s: uint, e: uint) -> CaptureLocs {
|
||||
match *re {
|
||||
Dynamic(ExDynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
|
||||
Native(ExNative { ref prog, .. }) => (*prog)(which, input, s, e),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_match(caps: &CaptureLocs) -> bool {
|
||||
caps.len() >= 2 && caps[0].is_some() && caps[1].is_some()
|
||||
}
|
@ -1,183 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::rand::{Rng, thread_rng};
|
||||
use stdtest::Bencher;
|
||||
use std::iter::repeat;
|
||||
|
||||
use regex::{Regex, NoExpand};
|
||||
|
||||
fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
|
||||
b.iter(|| if !re.is_match(text) { panic!("no match") });
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn no_exponential(b: &mut Bencher) {
|
||||
let n = 100;
|
||||
let re = Regex::new(format!("{}{}",
|
||||
repeat("a?").take(n).collect::<String>(),
|
||||
repeat("a").take(n).collect::<String>()).as_slice()).unwrap();
|
||||
let text = repeat("a").take(n).collect::<String>();
|
||||
bench_assert_match(b, re, text.as_slice());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn literal(b: &mut Bencher) {
|
||||
let re = regex!("y");
|
||||
let text = format!("{}y", repeat("x").take(50).collect::<String>());
|
||||
bench_assert_match(b, re, text.as_slice());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn not_literal(b: &mut Bencher) {
|
||||
let re = regex!(".y");
|
||||
let text = format!("{}y", repeat("x").take(50).collect::<String>());
|
||||
bench_assert_match(b, re, text.as_slice());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn match_class(b: &mut Bencher) {
|
||||
let re = regex!("[abcdw]");
|
||||
let text = format!("{}w", repeat("xxxx").take(20).collect::<String>());
|
||||
bench_assert_match(b, re, text.as_slice());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn match_class_in_range(b: &mut Bencher) {
|
||||
// 'b' is between 'a' and 'c', so the class range checking doesn't help.
|
||||
let re = regex!("[ac]");
|
||||
let text = format!("{}c", repeat("bbbb").take(20).collect::<String>());
|
||||
bench_assert_match(b, re, text.as_slice());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn replace_all(b: &mut Bencher) {
|
||||
let re = regex!("[cjrw]");
|
||||
let text = "abcdefghijklmnopqrstuvwxyz";
|
||||
// FIXME: This isn't using the $name expand stuff.
|
||||
// It's possible RE2/Go is using it, but currently, the expand in this
|
||||
// crate is actually compiling a regex, so it's incredibly slow.
|
||||
b.iter(|| re.replace_all(text, NoExpand("")));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn anchored_literal_short_non_match(b: &mut Bencher) {
|
||||
let re = regex!("^zbc(d|e)");
|
||||
let text = "abcdefghijklmnopqrstuvwxyz";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn anchored_literal_long_non_match(b: &mut Bencher) {
|
||||
let re = regex!("^zbc(d|e)");
|
||||
let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
|
||||
b.iter(|| re.is_match(text.as_slice()));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn anchored_literal_short_match(b: &mut Bencher) {
|
||||
let re = regex!("^.bc(d|e)");
|
||||
let text = "abcdefghijklmnopqrstuvwxyz";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn anchored_literal_long_match(b: &mut Bencher) {
|
||||
let re = regex!("^.bc(d|e)");
|
||||
let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
|
||||
b.iter(|| re.is_match(text.as_slice()));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_short_a(b: &mut Bencher) {
|
||||
let re = regex!("^.bc(d|e)*$");
|
||||
let text = "abcddddddeeeededd";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_short_a_not(b: &mut Bencher) {
|
||||
let re = regex!(".bc(d|e)*$");
|
||||
let text = "abcddddddeeeededd";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_short_b(b: &mut Bencher) {
|
||||
let re = regex!("^.bc(?:d|e)*$");
|
||||
let text = "abcddddddeeeededd";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_short_b_not(b: &mut Bencher) {
|
||||
let re = regex!(".bc(?:d|e)*$");
|
||||
let text = "abcddddddeeeededd";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_long_prefix(b: &mut Bencher) {
|
||||
let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
|
||||
let text = "abcdefghijklmnopqrstuvwxyz";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn one_pass_long_prefix_not(b: &mut Bencher) {
|
||||
let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
|
||||
let text = "abcdefghijklmnopqrstuvwxyz";
|
||||
b.iter(|| re.is_match(text));
|
||||
}
|
||||
|
||||
macro_rules! throughput {
|
||||
($name:ident, $regex:expr, $size:expr) => (
|
||||
#[bench]
|
||||
fn $name(b: &mut Bencher) {
|
||||
let text = gen_text($size);
|
||||
b.bytes = $size;
|
||||
b.iter(|| if $regex.is_match(text.as_slice()) { panic!("match") });
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
|
||||
fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
|
||||
fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
|
||||
fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
|
||||
|
||||
fn gen_text(n: uint) -> String {
|
||||
let mut rng = thread_rng();
|
||||
let mut bytes = rng.gen_ascii_chars().map(|n| n as u8).take(n)
|
||||
.collect::<Vec<u8>>();
|
||||
for (i, b) in bytes.iter_mut().enumerate() {
|
||||
if i % 20 == 0 {
|
||||
*b = b'\n'
|
||||
}
|
||||
}
|
||||
String::from_utf8(bytes).unwrap()
|
||||
}
|
||||
|
||||
throughput!{easy0_32, easy0(), 32}
|
||||
throughput!{easy0_1K, easy0(), 1<<10}
|
||||
throughput!{easy0_32K, easy0(), 32<<10}
|
||||
|
||||
throughput!{easy1_32, easy1(), 32}
|
||||
throughput!{easy1_1K, easy1(), 1<<10}
|
||||
throughput!{easy1_32K, easy1(), 32<<10}
|
||||
|
||||
throughput!{medium_32, medium(), 32}
|
||||
throughput!{medium_1K, medium(), 1<<10}
|
||||
throughput!{medium_32K,medium(), 32<<10}
|
||||
|
||||
throughput!{hard_32, hard(), 32}
|
||||
throughput!{hard_1K, hard(), 1<<10}
|
||||
throughput!{hard_32K,hard(), 32<<10}
|
@ -1,373 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// ignore-tidy-linelength
|
||||
|
||||
// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
|
||||
// on 2014-04-23 01:33:36.539280.
|
||||
|
||||
// Tests from basic.dat
|
||||
mat!{match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))}
|
||||
mat!{match_basic_4, r"a...b", r"abababbb", Some((2, 7))}
|
||||
mat!{match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))}
|
||||
mat!{match_basic_6, r"\)", r"()", Some((1, 2))}
|
||||
mat!{match_basic_7, r"a]", r"a]a", Some((0, 2))}
|
||||
mat!{match_basic_9, r"\}", r"}", Some((0, 1))}
|
||||
mat!{match_basic_10, r"\]", r"]", Some((0, 1))}
|
||||
mat!{match_basic_12, r"]", r"]", Some((0, 1))}
|
||||
mat!{match_basic_15, r"^a", r"ax", Some((0, 1))}
|
||||
mat!{match_basic_16, r"\^a", r"a^a", Some((1, 3))}
|
||||
mat!{match_basic_17, r"a\^", r"a^", Some((0, 2))}
|
||||
mat!{match_basic_18, r"a$", r"aa", Some((1, 2))}
|
||||
mat!{match_basic_19, r"a\$", r"a$", Some((0, 2))}
|
||||
mat!{match_basic_20, r"^$", r"", Some((0, 0))}
|
||||
mat!{match_basic_21, r"$^", r"", Some((0, 0))}
|
||||
mat!{match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))}
|
||||
mat!{match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))}
|
||||
mat!{match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))}
|
||||
mat!{match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3))}
|
||||
mat!{match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))}
|
||||
mat!{match_basic_28, r"a{0}b", r"ab", Some((1, 2))}
|
||||
mat!{match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
|
||||
mat!{match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7))}
|
||||
mat!{match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4))}
|
||||
mat!{match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))}
|
||||
mat!{match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2))}
|
||||
mat!{match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))}
|
||||
mat!{match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2))}
|
||||
mat!{match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))}
|
||||
mat!{match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))}
|
||||
mat!{match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2))}
|
||||
mat!{match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
|
||||
mat!{match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))}
|
||||
mat!{match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))}
|
||||
mat!{match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))}
|
||||
mat!{match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))}
|
||||
mat!{match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2))}
|
||||
mat!{match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2))}
|
||||
mat!{match_basic_49, r"ab|a", r"xabc", Some((1, 3))}
|
||||
mat!{match_basic_50, r"ab|a", r"xxabc", Some((2, 4))}
|
||||
mat!{match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4))}
|
||||
mat!{match_basic_52, r"[^-]", r"--a", Some((2, 3))}
|
||||
mat!{match_basic_53, r"[a-]*", r"--a", Some((0, 3))}
|
||||
mat!{match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))}
|
||||
mat!{match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17))}
|
||||
mat!{match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17))}
|
||||
mat!{match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))}
|
||||
mat!{match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))}
|
||||
mat!{match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))}
|
||||
mat!{match_basic_65, r"
|
||||
", r"
|
||||
", Some((0, 1))}
|
||||
mat!{match_basic_66, r"
|
||||
", r"
|
||||
", Some((0, 1))}
|
||||
mat!{match_basic_67, r"[^a]", r"
|
||||
", Some((0, 1))}
|
||||
mat!{match_basic_68, r"
|
||||
a", r"
|
||||
a", Some((0, 2))}
|
||||
mat!{match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3))}
|
||||
mat!{match_basic_70, r"xxx", r"xxx", Some((0, 3))}
|
||||
mat!{match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6))}
|
||||
mat!{match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3))}
|
||||
mat!{match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11))}
|
||||
mat!{match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2))}
|
||||
mat!{match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81))}
|
||||
mat!{match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25))}
|
||||
mat!{match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22))}
|
||||
mat!{match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11))}
|
||||
mat!{match_basic_80, r".*", r"", Some((0, 2))}
|
||||
mat!{match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57))}
|
||||
mat!{match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))}
|
||||
mat!{match_basic_84, r"^", r"", Some((0, 0))}
|
||||
mat!{match_basic_85, r"$", r"", Some((0, 0))}
|
||||
mat!{match_basic_86, r"^$", r"", Some((0, 0))}
|
||||
mat!{match_basic_87, r"^a$", r"a", Some((0, 1))}
|
||||
mat!{match_basic_88, r"abc", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_89, r"abc", r"xabcy", Some((1, 4))}
|
||||
mat!{match_basic_90, r"abc", r"ababc", Some((2, 5))}
|
||||
mat!{match_basic_91, r"ab*c", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_92, r"ab*bc", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_93, r"ab*bc", r"abbc", Some((0, 4))}
|
||||
mat!{match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))}
|
||||
mat!{match_basic_95, r"ab+bc", r"abbc", Some((0, 4))}
|
||||
mat!{match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))}
|
||||
mat!{match_basic_97, r"ab?bc", r"abbc", Some((0, 4))}
|
||||
mat!{match_basic_98, r"ab?bc", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_99, r"ab?c", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_100, r"^abc$", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_101, r"^abc", r"abcc", Some((0, 3))}
|
||||
mat!{match_basic_102, r"abc$", r"aabc", Some((1, 4))}
|
||||
mat!{match_basic_103, r"^", r"abc", Some((0, 0))}
|
||||
mat!{match_basic_104, r"$", r"abc", Some((3, 3))}
|
||||
mat!{match_basic_105, r"a.c", r"abc", Some((0, 3))}
|
||||
mat!{match_basic_106, r"a.c", r"axc", Some((0, 3))}
|
||||
mat!{match_basic_107, r"a.*c", r"axyzc", Some((0, 5))}
|
||||
mat!{match_basic_108, r"a[bc]d", r"abd", Some((0, 3))}
|
||||
mat!{match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))}
|
||||
mat!{match_basic_110, r"a[b-d]", r"aac", Some((1, 3))}
|
||||
mat!{match_basic_111, r"a[-b]", r"a-", Some((0, 2))}
|
||||
mat!{match_basic_112, r"a[b-]", r"a-", Some((0, 2))}
|
||||
mat!{match_basic_113, r"a]", r"a]", Some((0, 2))}
|
||||
mat!{match_basic_114, r"a[]]b", r"a]b", Some((0, 3))}
|
||||
mat!{match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))}
|
||||
mat!{match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))}
|
||||
mat!{match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))}
|
||||
mat!{match_basic_118, r"ab|cd", r"abc", Some((0, 2))}
|
||||
mat!{match_basic_119, r"ab|cd", r"abcd", Some((0, 2))}
|
||||
mat!{match_basic_120, r"a\(b", r"a(b", Some((0, 3))}
|
||||
mat!{match_basic_121, r"a\(*b", r"ab", Some((0, 2))}
|
||||
mat!{match_basic_122, r"a\(*b", r"a((b", Some((0, 4))}
|
||||
mat!{match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3))}
|
||||
mat!{match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))}
|
||||
mat!{match_basic_126, r"a*", r"aaa", Some((0, 3))}
|
||||
mat!{match_basic_128, r"(a*)*", r"-", Some((0, 0)), None}
|
||||
mat!{match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))}
|
||||
mat!{match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None}
|
||||
mat!{match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))}
|
||||
mat!{match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))}
|
||||
mat!{match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_135, r"[^ab]*", r"cde", Some((0, 3))}
|
||||
mat!{match_basic_137, r"(^)*", r"-", Some((0, 0)), None}
|
||||
mat!{match_basic_138, r"a*", r"", Some((0, 0))}
|
||||
mat!{match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))}
|
||||
mat!{match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))}
|
||||
mat!{match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))}
|
||||
mat!{match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))}
|
||||
mat!{match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None}
|
||||
mat!{match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))}
|
||||
mat!{match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))}
|
||||
mat!{match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))}
|
||||
mat!{match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))}
|
||||
mat!{match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))}
|
||||
mat!{match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))}
|
||||
mat!{match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))}
|
||||
mat!{match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))}
|
||||
mat!{match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
|
||||
mat!{match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4))}
|
||||
mat!{match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4))}
|
||||
mat!{match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))}
|
||||
mat!{match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))}
|
||||
mat!{match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4))}
|
||||
mat!{match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))}
|
||||
mat!{match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))}
|
||||
mat!{match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5))}
|
||||
mat!{match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2))}
|
||||
mat!{match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6))}
|
||||
mat!{match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14))}
|
||||
mat!{match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5))}
|
||||
mat!{match_basic_167, r"abcd", r"abcd", Some((0, 4))}
|
||||
mat!{match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))}
|
||||
mat!{match_basic_169, r"a[-]?c", r"ac", Some((0, 3))}
|
||||
mat!{match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12))}
|
||||
mat!{match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12))}
|
||||
mat!{match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12))}
|
||||
mat!{match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11))}
|
||||
mat!{match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12))}
|
||||
mat!{match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11))}
|
||||
mat!{match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15))}
|
||||
mat!{match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12))}
|
||||
mat!{match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11))}
|
||||
mat!{match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13))}
|
||||
mat!{match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12))}
|
||||
mat!{match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12))}
|
||||
mat!{match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))}
|
||||
mat!{match_basic_188, r"^.+$", r"vivi", Some((0, 4))}
|
||||
mat!{match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))}
|
||||
mat!{match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19))}
|
||||
mat!{match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
|
||||
mat!{match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11))}
|
||||
mat!{match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3))}
|
||||
mat!{match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7))}
|
||||
mat!{match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
|
||||
mat!{match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
|
||||
mat!{match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
|
||||
mat!{match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
|
||||
mat!{match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3))}
|
||||
mat!{match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7))}
|
||||
mat!{match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
|
||||
mat!{match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3))}
|
||||
mat!{match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7))}
|
||||
mat!{match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3))}
|
||||
mat!{match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
|
||||
mat!{match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3))}
|
||||
mat!{match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
|
||||
mat!{match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3))}
|
||||
mat!{match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11))}
|
||||
mat!{match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7))}
|
||||
mat!{match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))}
|
||||
mat!{match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))}
|
||||
mat!{match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))}
|
||||
mat!{match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))}
|
||||
mat!{match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))}
|
||||
mat!{match_basic_221, r"\\000", r"\000", Some((0, 4))}
|
||||
|
||||
// Tests from nullsubexpr.dat
|
||||
mat!{match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None}
|
||||
mat!{match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))}
|
||||
mat!{match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))}
|
||||
mat!{match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_17, r"(a+)+", r"x", None}
|
||||
mat!{match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None}
|
||||
mat!{match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))}
|
||||
mat!{match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None}
|
||||
mat!{match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5))}
|
||||
mat!{match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None}
|
||||
mat!{match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6))}
|
||||
mat!{match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None}
|
||||
mat!{match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1))}
|
||||
mat!{match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1))}
|
||||
mat!{match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2))}
|
||||
mat!{match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2))}
|
||||
|
||||
// Tests from repetition.dat
|
||||
mat!{match_repetition_10, r"((..)|(.))", r"", None}
|
||||
mat!{match_repetition_11, r"((..)|(.))((..)|(.))", r"", None}
|
||||
mat!{match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None}
|
||||
mat!{match_repetition_14, r"((..)|(.)){1}", r"", None}
|
||||
mat!{match_repetition_15, r"((..)|(.)){2}", r"", None}
|
||||
mat!{match_repetition_16, r"((..)|(.)){3}", r"", None}
|
||||
mat!{match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))}
|
||||
mat!{match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
|
||||
mat!{match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None}
|
||||
mat!{match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None}
|
||||
mat!{match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
|
||||
mat!{match_repetition_25, r"((..)|(.)){2}", r"a", None}
|
||||
mat!{match_repetition_26, r"((..)|(.)){3}", r"a", None}
|
||||
mat!{match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1))}
|
||||
mat!{match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2))}
|
||||
mat!{match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None}
|
||||
mat!{match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2))}
|
||||
mat!{match_repetition_36, r"((..)|(.)){3}", r"aa", None}
|
||||
mat!{match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3))}
|
||||
mat!{match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3))}
|
||||
mat!{match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
|
||||
mat!{match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3))}
|
||||
mat!{match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3))}
|
||||
mat!{match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4))}
|
||||
mat!{match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4))}
|
||||
mat!{match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5))}
|
||||
mat!{match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
|
||||
mat!{match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5))}
|
||||
mat!{match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None}
|
||||
mat!{match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None}
|
||||
mat!{match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None}
|
||||
mat!{match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
|
||||
mat!{match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None}
|
||||
mat!{match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8))}
|
||||
mat!{match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8))}
|
||||
mat!{match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
|
||||
mat!{match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
|
||||
mat!{match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None}
|
||||
mat!{match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
|
||||
mat!{match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6))}
|
||||
mat!{match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None}
|
||||
mat!{match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1))}
|
||||
mat!{match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None}
|
||||
mat!{match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None}
|
||||
mat!{match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
mat!{match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6))}
|
||||
|
@ -1,24 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
macro_rules! regex {
|
||||
($re:expr) => (
|
||||
match ::regex::Regex::new($re) {
|
||||
Ok(re) => re,
|
||||
Err(err) => panic!("{:?}", err),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[path = "bench.rs"]
|
||||
mod dynamic_bench;
|
||||
#[path = "tests.rs"]
|
||||
mod dynamic_tests;
|
||||
|
@ -1,26 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use regex::Regex;
|
||||
static RE: Regex = regex!(r"\d+");
|
||||
|
||||
#[test]
|
||||
fn static_splitn() {
|
||||
let text = "cauchy123plato456tyler789binx";
|
||||
let subs: Vec<&str> = RE.splitn(text, 2).collect();
|
||||
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn static_split() {
|
||||
let text = "cauchy123plato456tyler789binx";
|
||||
let subs: Vec<&str> = RE.split(text).collect();
|
||||
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
|
||||
}
|
@ -1,245 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// ignore-tidy-linelength
|
||||
// ignore-lexer-test FIXME #15679
|
||||
|
||||
use regex::{Regex, NoExpand};
|
||||
|
||||
#[test]
|
||||
fn splitn() {
|
||||
let re = regex!(r"\d+");
|
||||
let text = "cauchy123plato456tyler789binx";
|
||||
let subs: Vec<&str> = re.splitn(text, 2).collect();
|
||||
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split() {
|
||||
let re = regex!(r"\d+");
|
||||
let text = "cauchy123plato456tyler789binx";
|
||||
let subs: Vec<&str> = re.split(text).collect();
|
||||
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_regex_empty_match() {
|
||||
let re = regex!("");
|
||||
let ms = re.find_iter("").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 0)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_regex_nonempty_match() {
|
||||
let re = regex!("");
|
||||
let ms = re.find_iter("abc").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quoted_bracket_set() {
|
||||
let re = regex!(r"([\x{5b}\x{5d}])");
|
||||
let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||||
let re = regex!(r"([\[\]])");
|
||||
let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_range_starts_with_left_bracket() {
|
||||
let re = regex!(r"([[-z])");
|
||||
let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_ends_with_escape() {
|
||||
let re = regex!(r"([\[-\x{5d}])");
|
||||
let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
|
||||
assert_eq!(ms, vec![(0, 1), (1, 2)]);
|
||||
}
|
||||
|
||||
macro_rules! replace {
|
||||
($name:ident, $which:ident, $re:expr,
|
||||
$search:expr, $replace:expr, $result:expr) => (
|
||||
#[test]
|
||||
fn $name() {
|
||||
let re = regex!($re);
|
||||
assert_eq!(re.$which($search, $replace), String::from_str($result));
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
replace!{rep_first, replace, r"\d", "age: 26", "Z", "age: Z6"}
|
||||
replace!{rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z"}
|
||||
replace!{rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ"}
|
||||
replace!{rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1"}
|
||||
replace!{rep_double_dollar, replace,
|
||||
r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1"}
|
||||
replace!{rep_no_expand, replace,
|
||||
r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1"}
|
||||
replace!{rep_named, replace_all,
|
||||
r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
|
||||
"w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3"}
|
||||
replace!{rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t",
|
||||
"", "trim me"}
|
||||
|
||||
macro_rules! noparse {
|
||||
($name:ident, $re:expr) => (
|
||||
#[test]
|
||||
fn $name() {
|
||||
let re = $re;
|
||||
match Regex::new(re) {
|
||||
Err(_) => {},
|
||||
Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
noparse!{fail_double_repeat, "a**"}
|
||||
noparse!{fail_no_repeat_arg, "*"}
|
||||
noparse!{fail_no_repeat_arg_begin, "^*"}
|
||||
noparse!{fail_incomplete_escape, "\\"}
|
||||
noparse!{fail_class_incomplete, "[A-"}
|
||||
noparse!{fail_class_not_closed, "[A"}
|
||||
noparse!{fail_class_no_begin, r"[\A]"}
|
||||
noparse!{fail_class_no_end, r"[\z]"}
|
||||
noparse!{fail_class_no_boundary, r"[\b]"}
|
||||
noparse!{fail_open_paren, "("}
|
||||
noparse!{fail_close_paren, ")"}
|
||||
noparse!{fail_invalid_range, "[a-Z]"}
|
||||
noparse!{fail_empty_capture_name, "(?P<>a)"}
|
||||
noparse!{fail_empty_capture_exp, "(?P<name>)"}
|
||||
noparse!{fail_bad_capture_name, "(?P<na-me>)"}
|
||||
noparse!{fail_bad_flag, "(?a)a"}
|
||||
noparse!{fail_empty_alt_before, "|a"}
|
||||
noparse!{fail_empty_alt_after, "a|"}
|
||||
noparse!{fail_counted_big_exact, "a{1001}"}
|
||||
noparse!{fail_counted_big_min, "a{1001,}"}
|
||||
noparse!{fail_counted_no_close, "a{1001"}
|
||||
noparse!{fail_unfinished_cap, "(?"}
|
||||
noparse!{fail_unfinished_escape, "\\"}
|
||||
noparse!{fail_octal_digit, r"\8"}
|
||||
noparse!{fail_hex_digit, r"\xG0"}
|
||||
noparse!{fail_hex_short, r"\xF"}
|
||||
noparse!{fail_hex_long_digits, r"\x{fffg}"}
|
||||
noparse!{fail_flag_bad, "(?a)"}
|
||||
noparse!{fail_flag_empty, "(?)"}
|
||||
noparse!{fail_double_neg, "(?-i-i)"}
|
||||
noparse!{fail_neg_empty, "(?i-)"}
|
||||
noparse!{fail_empty_group, "()"}
|
||||
noparse!{fail_dupe_named, "(?P<a>.)(?P<a>.)"}
|
||||
noparse!{fail_range_end_no_class, "[a-[:lower:]]"}
|
||||
noparse!{fail_range_end_no_begin, r"[a-\A]"}
|
||||
noparse!{fail_range_end_no_end, r"[a-\z]"}
|
||||
noparse!{fail_range_end_no_boundary, r"[a-\b]"}
|
||||
noparse!{fail_repeat_no_expr, r"-|+"}
|
||||
|
||||
macro_rules! mat {
|
||||
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
|
||||
#[test]
|
||||
fn $name() {
|
||||
let text = $text;
|
||||
let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
|
||||
let r = regex!($re);
|
||||
let got = match r.captures(text) {
|
||||
Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
|
||||
None => vec!(None),
|
||||
};
|
||||
// The test set sometimes leave out capture groups, so truncate
|
||||
// actual capture groups to match test set.
|
||||
let mut sgot = got.as_slice();
|
||||
if sgot.len() > expected.len() {
|
||||
sgot = &sgot[..expected.len()]
|
||||
}
|
||||
if expected != sgot {
|
||||
panic!("For RE '{}' against '{}', expected '{:?}' but got '{:?}'",
|
||||
$re, text, expected, sgot);
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// Some crazy expressions from regular-expressions.info.
|
||||
mat!{match_ranges,
|
||||
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||||
"num: 255", Some((5, 8))}
|
||||
mat!{match_ranges_not,
|
||||
r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
|
||||
"num: 256", None}
|
||||
mat!{match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))}
|
||||
mat!{match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))}
|
||||
mat!{match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))}
|
||||
mat!{match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None}
|
||||
mat!{match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||||
"mine is jam.slam@gmail.com ", Some((8, 26))}
|
||||
mat!{match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
|
||||
"mine is jam.slam@gmail ", None}
|
||||
mat!{match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
|
||||
"mine is jam.slam@gmail.com ", Some((8, 26))}
|
||||
mat!{match_date1,
|
||||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||||
"1900-01-01", Some((0, 10))}
|
||||
mat!{match_date2,
|
||||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||||
"1900-00-01", None}
|
||||
mat!{match_date3,
|
||||
r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
|
||||
"1900-13-01", None}
|
||||
|
||||
// Exercise the flags.
|
||||
mat!{match_flag_case, "(?i)abc", "ABC", Some((0, 3))}
|
||||
mat!{match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3))}
|
||||
mat!{match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None}
|
||||
mat!{match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2))}
|
||||
mat!{match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4))}
|
||||
mat!{match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None}
|
||||
mat!{match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2))}
|
||||
mat!{match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11))}
|
||||
mat!{match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))}
|
||||
mat!{match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))}
|
||||
mat!{match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))}
|
||||
|
||||
// Some Unicode tests.
|
||||
// A couple of these are commented out because something in the guts of macro expansion is creating
|
||||
// invalid byte strings.
|
||||
//mat!{uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3))}
|
||||
mat!{uni_one, r"\pN", "Ⅰ", Some((0, 3))}
|
||||
mat!{uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))}
|
||||
mat!{uni_not, r"\PN+", "abⅠ", Some((0, 2))}
|
||||
mat!{uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))}
|
||||
mat!{uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))}
|
||||
mat!{uni_case, r"(?i)Δ", "δ", Some((0, 2))}
|
||||
//mat!{uni_case_not, r"Δ", "δ", None}
|
||||
mat!{uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))}
|
||||
mat!{uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))}
|
||||
mat!{uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))}
|
||||
mat!{uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))}
|
||||
|
||||
// Test the Unicode friendliness of Perl character classes.
|
||||
mat!{uni_perl_w, r"\w+", "dδd", Some((0, 4))}
|
||||
mat!{uni_perl_w_not, r"\w+", "⥡", None}
|
||||
mat!{uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))}
|
||||
mat!{uni_perl_d, r"\d+", "1२३9", Some((0, 8))}
|
||||
mat!{uni_perl_d_not, r"\d+", "Ⅱ", None}
|
||||
mat!{uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))}
|
||||
mat!{uni_perl_s, r"\s+", " ", Some((0, 3))}
|
||||
mat!{uni_perl_s_not, r"\s+", "☃", None}
|
||||
mat!{uni_perl_s_neg, r"\S+", "☃", Some((0, 3))}
|
||||
|
||||
// And do the same for word boundaries.
|
||||
mat!{uni_boundary_none, r"\d\b", "6δ", None}
|
||||
mat!{uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))}
|
||||
|
||||
// A whole mess of tests from Glenn Fowler's regex test suite.
|
||||
// Generated by the 'src/etc/regex-match-tests' program.
|
||||
mod matches;
|
19
src/libregex/testdata/LICENSE
vendored
19
src/libregex/testdata/LICENSE
vendored
@ -1,19 +0,0 @@
|
||||
The following license covers testregex.c and all associated test data.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of the
|
||||
Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following disclaimer:
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
17
src/libregex/testdata/README
vendored
17
src/libregex/testdata/README
vendored
@ -1,17 +0,0 @@
|
||||
Test data was taken from the Go distribution, which was in turn taken from the
|
||||
testregex test suite:
|
||||
|
||||
http://www2.research.att.com/~astopen/testregex/testregex.html
|
||||
|
||||
The LICENSE in this directory corresponds to the LICENSE that the data was
|
||||
released under.
|
||||
|
||||
The tests themselves were modified for RE2/Go. A couple were modified further
|
||||
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
|
||||
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
|
||||
have been a bad idea, but I think being consistent with an established Regex
|
||||
library is worth something.
|
||||
|
||||
Note that these files are read by 'src/etc/regexp-match-tests' and turned into
|
||||
Rust tests found in 'src/libregexp/tests/matches.rs'.
|
||||
|
221
src/libregex/testdata/basic.dat
vendored
221
src/libregex/testdata/basic.dat
vendored
@ -1,221 +0,0 @@
|
||||
NOTE all standard compliant implementations should pass these : 2002-05-31
|
||||
|
||||
BE abracadabra$ abracadabracadabra (7,18)
|
||||
BE a...b abababbb (2,7)
|
||||
BE XXXXXX ..XXXXXX (2,8)
|
||||
E \) () (1,2)
|
||||
BE a] a]a (0,2)
|
||||
B } } (0,1)
|
||||
E \} } (0,1)
|
||||
BE \] ] (0,1)
|
||||
B ] ] (0,1)
|
||||
E ] ] (0,1)
|
||||
B { { (0,1)
|
||||
B } } (0,1)
|
||||
BE ^a ax (0,1)
|
||||
BE \^a a^a (1,3)
|
||||
BE a\^ a^ (0,2)
|
||||
BE a$ aa (1,2)
|
||||
BE a\$ a$ (0,2)
|
||||
BE ^$ NULL (0,0)
|
||||
E $^ NULL (0,0)
|
||||
E a($) aa (1,2)(2,2)
|
||||
E a*(^a) aa (0,1)(0,1)
|
||||
E (..)*(...)* a (0,0)
|
||||
E (..)*(...)* abcd (0,4)(2,4)
|
||||
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
|
||||
E (ab)c|abc abc (0,3)(0,2)
|
||||
E a{0}b ab (1,2)
|
||||
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E a{9876543210} NULL BADBR
|
||||
E ((a|a)|a) a (0,1)(0,1)(0,1)
|
||||
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
|
||||
E a*(a.|aa) aaaa (0,4)(2,4)
|
||||
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
|
||||
E (a|b)?.* b (0,1)(0,1)
|
||||
E (a|b)c|a(b|c) ac (0,2)(0,1)
|
||||
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
|
||||
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
|
||||
E (a|b)*c|(a|ab)*c xc (1,2)
|
||||
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
|
||||
E a?(ab|ba)ab abab (0,4)(0,2)
|
||||
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
|
||||
E ab|abab abbabab (0,2)
|
||||
E aba|bab|bba baaabbbaba (5,8)
|
||||
E aba|bab baaabbbaba (6,9)
|
||||
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
|
||||
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
|
||||
E ab|a xabc (1,3)
|
||||
E ab|a xxabc (2,4)
|
||||
Ei (Ab|cD)* aBcD (0,4)(2,4)
|
||||
BE [^-] --a (2,3)
|
||||
BE [a-]* --a (0,3)
|
||||
BE [a-m-]* --amoma-- (0,4)
|
||||
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
|
||||
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
|
||||
{E [[:upper:]] A (0,1) [[<element>]] not supported
|
||||
E [[:lower:]]+ `az{ (1,3)
|
||||
E [[:upper:]]+ @AZ[ (1,3)
|
||||
# No collation in Go
|
||||
#BE [[-]] [[-]] (2,4)
|
||||
#BE [[.NIL.]] NULL ECOLLATE
|
||||
#BE [[=aleph=]] NULL ECOLLATE
|
||||
}
|
||||
BE$ \n \n (0,1)
|
||||
BEn$ \n \n (0,1)
|
||||
BE$ [^a] \n (0,1)
|
||||
BE$ \na \na (0,2)
|
||||
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
|
||||
BE xxx xxx (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
|
||||
E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
|
||||
E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
|
||||
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
|
||||
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
|
||||
BE$ .* \x01\x7f (0,2)
|
||||
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
|
||||
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
|
||||
E a*a*a*a*a*b aaaaaaaaab (0,10)
|
||||
BE ^ NULL (0,0)
|
||||
BE $ NULL (0,0)
|
||||
BE ^$ NULL (0,0)
|
||||
BE ^a$ a (0,1)
|
||||
BE abc abc (0,3)
|
||||
BE abc xabcy (1,4)
|
||||
BE abc ababc (2,5)
|
||||
BE ab*c abc (0,3)
|
||||
BE ab*bc abc (0,3)
|
||||
BE ab*bc abbc (0,4)
|
||||
BE ab*bc abbbbc (0,6)
|
||||
E ab+bc abbc (0,4)
|
||||
E ab+bc abbbbc (0,6)
|
||||
E ab?bc abbc (0,4)
|
||||
E ab?bc abc (0,3)
|
||||
E ab?c abc (0,3)
|
||||
BE ^abc$ abc (0,3)
|
||||
BE ^abc abcc (0,3)
|
||||
BE abc$ aabc (1,4)
|
||||
BE ^ abc (0,0)
|
||||
BE $ abc (3,3)
|
||||
BE a.c abc (0,3)
|
||||
BE a.c axc (0,3)
|
||||
BE a.*c axyzc (0,5)
|
||||
BE a[bc]d abd (0,3)
|
||||
BE a[b-d]e ace (0,3)
|
||||
BE a[b-d] aac (1,3)
|
||||
BE a[-b] a- (0,2)
|
||||
BE a[b-] a- (0,2)
|
||||
BE a] a] (0,2)
|
||||
BE a[]]b a]b (0,3)
|
||||
BE a[^bc]d aed (0,3)
|
||||
BE a[^-b]c adc (0,3)
|
||||
BE a[^]b]c adc (0,3)
|
||||
E ab|cd abc (0,2)
|
||||
E ab|cd abcd (0,2)
|
||||
E a\(b a(b (0,3)
|
||||
E a\(*b ab (0,2)
|
||||
E a\(*b a((b (0,4)
|
||||
E ((a)) abc (0,1)(0,1)(0,1)
|
||||
E (a)b(c) abc (0,3)(0,1)(2,3)
|
||||
E a+b+c aabbabc (4,7)
|
||||
E a* aaa (0,3)
|
||||
#E (a*)* - (0,0)(0,0)
|
||||
E (a*)* - (0,0)(?,?) RE2/Go
|
||||
E (a*)+ - (0,0)(0,0)
|
||||
#E (a*|b)* - (0,0)(0,0)
|
||||
E (a*|b)* - (0,0)(?,?) RE2/Go
|
||||
E (a+|b)* ab (0,2)(1,2)
|
||||
E (a+|b)+ ab (0,2)(1,2)
|
||||
E (a+|b)? ab (0,1)(0,1)
|
||||
BE [^ab]* cde (0,3)
|
||||
#E (^)* - (0,0)(0,0)
|
||||
E (^)* - (0,0)(?,?) RE2/Go
|
||||
BE a* NULL (0,0)
|
||||
E ([abc])*d abbbcd (0,6)(4,5)
|
||||
E ([abc])*bcd abcd (0,4)(0,1)
|
||||
E a|b|c|d|e e (0,1)
|
||||
E (a|b|c|d|e)f ef (0,2)(0,1)
|
||||
#E ((a*|b))* - (0,0)(0,0)(0,0)
|
||||
E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
|
||||
BE abcd*efg abcdefg (0,7)
|
||||
BE ab* xabyabbbz (1,3)
|
||||
BE ab* xayabbbz (1,2)
|
||||
E (ab|cd)e abcde (2,5)(2,4)
|
||||
BE [abhgefdc]ij hij (0,3)
|
||||
E (a|b)c*d abcd (1,4)(1,2)
|
||||
E (ab|ab*)bc abc (0,3)(0,1)
|
||||
E a([bc]*)c* abc (0,3)(1,3)
|
||||
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
|
||||
E a[bcd]*dcdcde adcdcde (0,7)
|
||||
E (ab|a)b*c abc (0,3)(0,2)
|
||||
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
|
||||
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
|
||||
E ^a(bc+|b[eh])g|.h$ abh (1,3)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
|
||||
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
|
||||
BE multiple words multiple words yeah (0,14)
|
||||
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
|
||||
BE abcd abcd (0,4)
|
||||
E a(bc)d abcd (0,4)(1,3)
|
||||
E a[-]?c ac (0,3)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
|
||||
E a+(b|c)*d+ aabcdd (0,6)(3,4)
|
||||
E ^.+$ vivi (0,4)
|
||||
E ^(.+)$ vivi (0,4)(0,4)
|
||||
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
|
||||
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
|
||||
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
|
||||
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
|
||||
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
|
||||
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
|
||||
E (foo|(bar))!bas foo!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas bar!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E (foo|bar)!bas foo!bas (0,7)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E .*(/XXX).* /XXX (0,4)(0,4)
|
||||
E .*(\\XXX).* \XXX (0,4)(0,4)
|
||||
E \\XXX \XXX (0,4)
|
||||
E .*(/000).* /000 (0,4)(0,4)
|
||||
E .*(\\000).* \000 (0,4)(0,4)
|
||||
E \\000 \000 (0,4)
|
79
src/libregex/testdata/nullsubexpr.dat
vendored
79
src/libregex/testdata/nullsubexpr.dat
vendored
@ -1,79 +0,0 @@
|
||||
NOTE null subexpression matches : 2002-06-06
|
||||
|
||||
E (a*)* a (0,1)(0,1)
|
||||
#E SAME x (0,0)(0,0)
|
||||
E SAME x (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)* a (0,1)(0,1)
|
||||
E SAME x (0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)+ a (0,1)(0,1)
|
||||
E SAME x NOMATCH
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
|
||||
E ([a]*)* a (0,1)(0,1)
|
||||
#E SAME x (0,0)(0,0)
|
||||
E SAME x (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([a]*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([^b]*)* a (0,1)(0,1)
|
||||
#E SAME b (0,0)(0,0)
|
||||
E SAME b (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaab (0,6)(0,6)
|
||||
E ([ab]*)* a (0,1)(0,1)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME ababab (0,6)(0,6)
|
||||
E SAME bababa (0,6)(0,6)
|
||||
E SAME b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaabcde (0,5)(0,5)
|
||||
E ([^a]*)* b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
#E SAME aaaaaa (0,0)(0,0)
|
||||
E SAME aaaaaa (0,0)(?,?) RE2/Go
|
||||
E ([^ab]*)* ccccxx (0,6)(0,6)
|
||||
#E SAME ababab (0,0)(0,0)
|
||||
E SAME ababab (0,0)(?,?) RE2/Go
|
||||
|
||||
E ((z)+|a)* zabcde (0,2)(1,2)
|
||||
|
||||
#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
|
||||
#E (a) aaa (0,1)(0,1)
|
||||
#E (a*?) aaa (0,0)(0,0)
|
||||
#E (a)*? aaa (0,0)
|
||||
#E (a*?)*? aaa (0,0)
|
||||
#}
|
||||
|
||||
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
|
||||
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
|
||||
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
|
||||
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
|
||||
|
||||
#E (a*)*(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
|
||||
E (a*)*(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)*(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*)+(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)+(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)+(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*){2}(x) x (0,1)(0,0)(0,1)
|
||||
E (a*){2}(x) ax (0,2)(1,1)(1,2)
|
||||
E (a*){2}(x) axa (0,2)(1,1)(1,2)
|
163
src/libregex/testdata/repetition.dat
vendored
163
src/libregex/testdata/repetition.dat
vendored
@ -1,163 +0,0 @@
|
||||
NOTE implicit vs. explicit repetitions : 2009-02-02
|
||||
|
||||
# Glenn Fowler <gsf@research.att.com>
|
||||
# conforming matches (column 4) must match one of the following BREs
|
||||
# NOMATCH
|
||||
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
|
||||
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
|
||||
# i.e., each 3-tuple has two identical elements and one (?,?)
|
||||
|
||||
E ((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
|
||||
E ((..)|(.)){1} NULL NOMATCH
|
||||
E ((..)|(.)){2} NULL NOMATCH
|
||||
E ((..)|(.)){3} NULL NOMATCH
|
||||
|
||||
E ((..)|(.))* NULL (0,0)
|
||||
|
||||
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.))((..)|(.)) a NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
|
||||
|
||||
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.)){2} a NOMATCH
|
||||
E ((..)|(.)){3} a NOMATCH
|
||||
|
||||
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
|
||||
|
||||
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
|
||||
|
||||
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.)){3} aa NOMATCH
|
||||
|
||||
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
|
||||
|
||||
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
|
||||
|
||||
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
|
||||
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
|
||||
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
|
||||
|
||||
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
|
||||
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
|
||||
|
||||
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
|
||||
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
|
||||
|
||||
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
|
||||
|
||||
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
|
||||
# Linux/GLIBC gets the {8,} and {8,8} wrong.
|
||||
|
||||
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
|
||||
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
|
||||
|
||||
# These test a fixed bug in my regex-tdfa that did not keep the expanded
|
||||
# form properly grouped, so right association did the wrong thing with
|
||||
# these ambiguous patterns (crafted just to test my code when I became
|
||||
# suspicious of my implementation). The first subexpression should use
|
||||
# "ab" then "a" then "bcd".
|
||||
|
||||
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
|
||||
# results like (0,6)(4,5)(6,6).
|
||||
|
||||
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1)
|
||||
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1)
|
||||
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1)
|
||||
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1)
|
||||
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1)
|
||||
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1)
|
||||
|
||||
# The above worked on Linux/GLIBC but the following often fail.
|
||||
# They also trip up OS X / FreeBSD / NetBSD:
|
||||
|
||||
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
@ -1,582 +0,0 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
|
||||
// VM that simulates a DFA.
|
||||
//
|
||||
// According to Russ Cox[1], a DFA performs better than an NFA, principally
|
||||
// because it reuses states previously computed by the machine *and* doesn't
|
||||
// keep track of capture groups. The drawback of a DFA (aside from its
|
||||
// complexity) is that it can't accurately return the locations of submatches.
|
||||
// The NFA *can* do that. (This is my understanding anyway.)
|
||||
//
|
||||
// Cox suggests that a DFA ought to be used to answer "does this match" and
|
||||
// "where does it match" questions. (In the latter, the starting position of
|
||||
// the match is computed by executing the regex backwards.) Cox also suggests
|
||||
// that a DFA should be run when asking "where are the submatches", which can
|
||||
// 1) quickly answer "no" is there's no match and 2) discover the substring
|
||||
// that matches, which means running the NFA on smaller input.
|
||||
//
|
||||
// Currently, the NFA simulation implemented below does some dirty tricks to
|
||||
// avoid tracking capture groups when they aren't needed (which only works
|
||||
// for 'is_match', not 'find'). This is a half-measure, but does provide some
|
||||
// perf improvement.
|
||||
//
|
||||
// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
|
||||
//
|
||||
// [1] - http://swtch.com/~rsc/regex/regex3.html
|
||||
|
||||
pub use self::MatchKind::*;
|
||||
pub use self::StepState::*;
|
||||
|
||||
use std::cmp;
|
||||
use std::cmp::Ordering::{self, Less, Equal, Greater};
|
||||
use std::mem;
|
||||
use std::iter::repeat;
|
||||
use std::slice::SliceExt;
|
||||
use compile::{
|
||||
Program,
|
||||
Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary,
|
||||
Save, Jump, Split,
|
||||
};
|
||||
use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED};
|
||||
use unicode::regex::PERLW;
|
||||
|
||||
pub type CaptureLocs = Vec<Option<uint>>;
|
||||
|
||||
/// Indicates the type of match to be performed by the VM.
|
||||
#[derive(Copy)]
|
||||
pub enum MatchKind {
|
||||
/// Only checks if a match exists or not. Does not return location.
|
||||
Exists,
|
||||
/// Returns the start and end indices of the entire match in the input
|
||||
/// given.
|
||||
Location,
|
||||
/// Returns the start and end indices of each submatch in the input given.
|
||||
Submatches,
|
||||
}
|
||||
|
||||
/// Runs an NFA simulation on the compiled expression given on the search text
|
||||
/// `input`. The search begins at byte index `start` and ends at byte index
|
||||
/// `end`. (The range is specified here so that zero-width assertions will work
|
||||
/// correctly when searching for successive non-overlapping matches.)
|
||||
///
|
||||
/// The `which` parameter indicates what kind of capture information the caller
|
||||
/// wants. There are three choices: match existence only, the location of the
|
||||
/// entire match or the locations of the entire match in addition to the
|
||||
/// locations of each submatch.
|
||||
pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
|
||||
start: uint, end: uint) -> CaptureLocs {
|
||||
Nfa {
|
||||
which: which,
|
||||
prog: prog,
|
||||
input: input,
|
||||
start: start,
|
||||
end: end,
|
||||
ic: 0,
|
||||
chars: CharReader::new(input),
|
||||
}.run()
|
||||
}
|
||||
|
||||
struct Nfa<'r, 't> {
|
||||
which: MatchKind,
|
||||
prog: &'r Program,
|
||||
input: &'t str,
|
||||
start: uint,
|
||||
end: uint,
|
||||
ic: uint,
|
||||
chars: CharReader<'t>,
|
||||
}
|
||||
|
||||
/// Indicates the next action to take after a single non-empty instruction
|
||||
/// is processed.
|
||||
#[derive(Copy)]
|
||||
pub enum StepState {
|
||||
/// This is returned if and only if a Match instruction is reached and
|
||||
/// we only care about the existence of a match. It instructs the VM to
|
||||
/// quit early.
|
||||
StepMatchEarlyReturn,
|
||||
/// Indicates that a match was found. Thus, the rest of the states in the
|
||||
/// *current* queue should be dropped (i.e., leftmost-first semantics).
|
||||
/// States in the "next" queue can still be processed.
|
||||
StepMatch,
|
||||
/// No match was found. Continue with the next state in the queue.
|
||||
StepContinue,
|
||||
}
|
||||
|
||||
impl<'r, 't> Nfa<'r, 't> {
|
||||
fn run(&mut self) -> CaptureLocs {
|
||||
let ncaps = match self.which {
|
||||
Exists => 0,
|
||||
Location => 1,
|
||||
Submatches => self.prog.num_captures(),
|
||||
};
|
||||
let mut matched = false;
|
||||
let ninsts = self.prog.insts.len();
|
||||
let mut clist = &mut Threads::new(self.which, ninsts, ncaps);
|
||||
let mut nlist = &mut Threads::new(self.which, ninsts, ncaps);
|
||||
|
||||
let mut groups: Vec<_> = repeat(None).take(ncaps * 2).collect();
|
||||
|
||||
// Determine if the expression starts with a '^' so we can avoid
|
||||
// simulating .*?
|
||||
// Make sure multi-line mode isn't enabled for it, otherwise we can't
|
||||
// drop the initial .*?
|
||||
let prefix_anchor =
|
||||
match self.prog.insts[1] {
|
||||
EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
self.ic = self.start;
|
||||
let mut next_ic = self.chars.set(self.start);
|
||||
while self.ic <= self.end {
|
||||
if clist.size == 0 {
|
||||
// We have a match and we're done exploring alternatives.
|
||||
// Time to quit.
|
||||
if matched {
|
||||
break
|
||||
}
|
||||
|
||||
// If there are no threads to try, then we'll have to start
|
||||
// over at the beginning of the regex.
|
||||
// BUT, if there's a literal prefix for the program, try to
|
||||
// jump ahead quickly. If it can't be found, then we can bail
|
||||
// out early.
|
||||
if self.prog.prefix.len() > 0 && clist.size == 0 {
|
||||
let needle = self.prog.prefix.as_bytes();
|
||||
let haystack = &self.input.as_bytes()[self.ic..];
|
||||
match find_prefix(needle, haystack) {
|
||||
None => break,
|
||||
Some(i) => {
|
||||
self.ic += i;
|
||||
next_ic = self.chars.set(self.ic);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This simulates a preceding '.*?' for every regex by adding
|
||||
// a state starting at the current position in the input for the
|
||||
// beginning of the program only if we don't already have a match.
|
||||
if clist.size == 0 || (!prefix_anchor && !matched) {
|
||||
self.add(clist, 0, groups.as_mut_slice())
|
||||
}
|
||||
|
||||
// Now we try to read the next character.
|
||||
// As a result, the 'step' method will look at the previous
|
||||
// character.
|
||||
self.ic = next_ic;
|
||||
next_ic = self.chars.advance();
|
||||
|
||||
for i in range(0, clist.size) {
|
||||
let pc = clist.pc(i);
|
||||
let step_state = self.step(groups.as_mut_slice(), nlist,
|
||||
clist.groups(i), pc);
|
||||
match step_state {
|
||||
StepMatchEarlyReturn => return vec![Some(0), Some(0)],
|
||||
StepMatch => { matched = true; break },
|
||||
StepContinue => {},
|
||||
}
|
||||
}
|
||||
mem::swap(&mut clist, &mut nlist);
|
||||
nlist.empty();
|
||||
}
|
||||
match self.which {
|
||||
Exists if matched => vec![Some(0), Some(0)],
|
||||
Exists => vec![None, None],
|
||||
Location | Submatches => groups,
|
||||
}
|
||||
}
|
||||
|
||||
fn step(&self, groups: &mut [Option<uint>], nlist: &mut Threads,
|
||||
caps: &mut [Option<uint>], pc: uint)
|
||||
-> StepState {
|
||||
match self.prog.insts[pc] {
|
||||
Match => {
|
||||
match self.which {
|
||||
Exists => {
|
||||
return StepMatchEarlyReturn
|
||||
}
|
||||
Location => {
|
||||
groups[0] = caps[0];
|
||||
groups[1] = caps[1];
|
||||
return StepMatch
|
||||
}
|
||||
Submatches => {
|
||||
for (slot, val) in groups.iter_mut().zip(caps.iter()) {
|
||||
*slot = *val;
|
||||
}
|
||||
return StepMatch
|
||||
}
|
||||
}
|
||||
}
|
||||
OneChar(c, flags) => {
|
||||
if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) {
|
||||
self.add(nlist, pc+1, caps);
|
||||
}
|
||||
}
|
||||
CharClass(ref ranges, flags) => {
|
||||
if self.chars.prev.is_some() {
|
||||
let c = self.chars.prev.unwrap();
|
||||
let negate = flags & FLAG_NEGATED > 0;
|
||||
let casei = flags & FLAG_NOCASE > 0;
|
||||
let found = ranges.as_slice();
|
||||
let found = found.binary_search_by(|&rc| class_cmp(casei, c, rc)).is_ok();
|
||||
if found ^ negate {
|
||||
self.add(nlist, pc+1, caps);
|
||||
}
|
||||
}
|
||||
}
|
||||
Any(flags) => {
|
||||
if flags & FLAG_DOTNL > 0
|
||||
|| !self.char_eq(false, self.chars.prev, '\n') {
|
||||
self.add(nlist, pc+1, caps)
|
||||
}
|
||||
}
|
||||
EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_)
|
||||
| Save(_) | Jump(_) | Split(_, _) => {},
|
||||
}
|
||||
StepContinue
|
||||
}
|
||||
|
||||
fn add(&self, nlist: &mut Threads, pc: uint, groups: &mut [Option<uint>]) {
|
||||
if nlist.contains(pc) {
|
||||
return
|
||||
}
|
||||
// We have to add states to the threads list even if their empty.
|
||||
// TL;DR - It prevents cycles.
|
||||
// If we didn't care about cycles, we'd *only* add threads that
|
||||
// correspond to non-jumping instructions (OneChar, Any, Match, etc.).
|
||||
// But, it's possible for valid regexs (like '(a*)*') to result in
|
||||
// a cycle in the instruction list. e.g., We'll keep chasing the Split
|
||||
// instructions forever.
|
||||
// So we add these instructions to our thread queue, but in the main
|
||||
// VM loop, we look for them but simply ignore them.
|
||||
// Adding them to the queue prevents them from being revisited so we
|
||||
// can avoid cycles (and the inevitable stack overflow).
|
||||
//
|
||||
// We make a minor optimization by indicating that the state is "empty"
|
||||
// so that its capture groups are not filled in.
|
||||
match self.prog.insts[pc] {
|
||||
EmptyBegin(flags) => {
|
||||
let multi = flags & FLAG_MULTI > 0;
|
||||
nlist.add(pc, groups, true);
|
||||
if self.chars.is_begin()
|
||||
|| (multi && self.char_is(self.chars.prev, '\n')) {
|
||||
self.add(nlist, pc + 1, groups)
|
||||
}
|
||||
}
|
||||
EmptyEnd(flags) => {
|
||||
let multi = flags & FLAG_MULTI > 0;
|
||||
nlist.add(pc, groups, true);
|
||||
if self.chars.is_end()
|
||||
|| (multi && self.char_is(self.chars.cur, '\n')) {
|
||||
self.add(nlist, pc + 1, groups)
|
||||
}
|
||||
}
|
||||
EmptyWordBoundary(flags) => {
|
||||
nlist.add(pc, groups, true);
|
||||
if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) {
|
||||
self.add(nlist, pc + 1, groups)
|
||||
}
|
||||
}
|
||||
Save(slot) => {
|
||||
nlist.add(pc, groups, true);
|
||||
match self.which {
|
||||
Location if slot <= 1 => {
|
||||
let old = groups[slot];
|
||||
groups[slot] = Some(self.ic);
|
||||
self.add(nlist, pc + 1, groups);
|
||||
groups[slot] = old;
|
||||
}
|
||||
Submatches => {
|
||||
let old = groups[slot];
|
||||
groups[slot] = Some(self.ic);
|
||||
self.add(nlist, pc + 1, groups);
|
||||
groups[slot] = old;
|
||||
}
|
||||
Exists | Location => self.add(nlist, pc + 1, groups),
|
||||
}
|
||||
}
|
||||
Jump(to) => {
|
||||
nlist.add(pc, groups, true);
|
||||
self.add(nlist, to, groups)
|
||||
}
|
||||
Split(x, y) => {
|
||||
nlist.add(pc, groups, true);
|
||||
self.add(nlist, x, groups);
|
||||
self.add(nlist, y, groups);
|
||||
}
|
||||
Match | OneChar(_, _) | CharClass(_, _) | Any(_) => {
|
||||
nlist.add(pc, groups, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: For case insensitive comparisons, it uses the uppercase
|
||||
// character and tests for equality. IIUC, this does not generalize to
|
||||
// all of Unicode. I believe we need to check the entire fold for each
|
||||
// character. This will be easy to add if and when it gets added to Rust's
|
||||
// standard library.
|
||||
#[inline]
|
||||
fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
|
||||
match textc {
|
||||
None => false,
|
||||
Some(textc) => {
|
||||
regc == textc
|
||||
|| (casei && regc.to_uppercase() == textc.to_uppercase())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_is(&self, textc: Option<char>, regc: char) -> bool {
|
||||
textc == Some(regc)
|
||||
}
|
||||
}
|
||||
|
||||
/// CharReader is responsible for maintaining a "previous" and a "current"
|
||||
/// character. This one-character lookahead is necessary for assertions that
|
||||
/// look one character before or after the current position.
|
||||
pub struct CharReader<'t> {
|
||||
/// The previous character read. It is None only when processing the first
|
||||
/// character of the input.
|
||||
pub prev: Option<char>,
|
||||
/// The current character.
|
||||
pub cur: Option<char>,
|
||||
input: &'t str,
|
||||
next: uint,
|
||||
}
|
||||
|
||||
impl<'t> CharReader<'t> {
|
||||
/// Returns a new CharReader that advances through the input given.
|
||||
/// Note that a CharReader has no knowledge of the range in which to search
|
||||
/// the input.
|
||||
pub fn new(input: &'t str) -> CharReader<'t> {
|
||||
CharReader {
|
||||
prev: None,
|
||||
cur: None,
|
||||
input: input,
|
||||
next: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the previous and current character given any arbitrary byte
|
||||
/// index (at a Unicode codepoint boundary).
|
||||
#[inline]
|
||||
pub fn set(&mut self, ic: uint) -> uint {
|
||||
self.prev = None;
|
||||
self.cur = None;
|
||||
self.next = 0;
|
||||
|
||||
if self.input.len() == 0 {
|
||||
return 1
|
||||
}
|
||||
if ic > 0 {
|
||||
let i = cmp::min(ic, self.input.len());
|
||||
let prev = self.input.char_range_at_reverse(i);
|
||||
self.prev = Some(prev.ch);
|
||||
}
|
||||
if ic < self.input.len() {
|
||||
let cur = self.input.char_range_at(ic);
|
||||
self.cur = Some(cur.ch);
|
||||
self.next = cur.next;
|
||||
self.next
|
||||
} else {
|
||||
self.input.len() + 1
|
||||
}
|
||||
}
|
||||
|
||||
/// Does the same as `set`, except it always advances to the next
|
||||
/// character in the input (and therefore does half as many UTF8 decodings).
|
||||
#[inline]
|
||||
pub fn advance(&mut self) -> uint {
|
||||
self.prev = self.cur;
|
||||
if self.next < self.input.len() {
|
||||
let cur = self.input.char_range_at(self.next);
|
||||
self.cur = Some(cur.ch);
|
||||
self.next = cur.next;
|
||||
} else {
|
||||
self.cur = None;
|
||||
self.next = self.input.len() + 1;
|
||||
}
|
||||
self.next
|
||||
}
|
||||
|
||||
/// Returns true if and only if this is the beginning of the input
|
||||
/// (ignoring the range of the input to search).
|
||||
#[inline]
|
||||
pub fn is_begin(&self) -> bool { self.prev.is_none() }
|
||||
|
||||
/// Returns true if and only if this is the end of the input
|
||||
/// (ignoring the range of the input to search).
|
||||
#[inline]
|
||||
pub fn is_end(&self) -> bool { self.cur.is_none() }
|
||||
|
||||
/// Returns true if and only if the current position is a word boundary.
|
||||
/// (Ignoring the range of the input to search.)
|
||||
pub fn is_word_boundary(&self) -> bool {
|
||||
if self.is_begin() {
|
||||
return is_word(self.cur)
|
||||
}
|
||||
if self.is_end() {
|
||||
return is_word(self.prev)
|
||||
}
|
||||
(is_word(self.cur) && !is_word(self.prev))
|
||||
|| (is_word(self.prev) && !is_word(self.cur))
|
||||
}
|
||||
}
|
||||
|
||||
struct Thread {
|
||||
pc: uint,
|
||||
groups: Vec<Option<uint>>,
|
||||
}
|
||||
|
||||
struct Threads {
|
||||
which: MatchKind,
|
||||
queue: Vec<Thread>,
|
||||
sparse: Vec<uint>,
|
||||
size: uint,
|
||||
}
|
||||
|
||||
impl Threads {
|
||||
// This is using a wicked neat trick to provide constant time lookup
|
||||
// for threads in the queue using a sparse set. A queue of threads is
|
||||
// allocated once with maximal size when the VM initializes and is reused
|
||||
// throughout execution. That is, there should be zero allocation during
|
||||
// the execution of a VM.
|
||||
//
|
||||
// See http://research.swtch.com/sparse for the deets.
|
||||
fn new(which: MatchKind, num_insts: uint, ncaps: uint) -> Threads {
|
||||
Threads {
|
||||
which: which,
|
||||
queue: range(0, num_insts).map(|_| {
|
||||
Thread { pc: 0, groups: repeat(None).take(ncaps * 2).collect() }
|
||||
}).collect(),
|
||||
sparse: repeat(0u).take(num_insts).collect(),
|
||||
size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, pc: uint, groups: &[Option<uint>], empty: bool) {
|
||||
let t = &mut self.queue[self.size];
|
||||
t.pc = pc;
|
||||
match (empty, self.which) {
|
||||
(_, Exists) | (true, _) => {},
|
||||
(false, Location) => {
|
||||
t.groups[0] = groups[0];
|
||||
t.groups[1] = groups[1];
|
||||
}
|
||||
(false, Submatches) => {
|
||||
for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
|
||||
*slot = *val;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.sparse[pc] = self.size;
|
||||
self.size += 1;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains(&self, pc: uint) -> bool {
|
||||
let s = self.sparse[pc];
|
||||
s < self.size && self.queue[s].pc == pc
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn empty(&mut self) {
|
||||
self.size = 0;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn pc(&self, i: uint) -> uint {
|
||||
self.queue[i].pc
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn groups<'r>(&'r mut self, i: uint) -> &'r mut [Option<uint>] {
|
||||
let q = &mut self.queue[i];
|
||||
q.groups.as_mut_slice()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the character is a word character, according to the
|
||||
/// (Unicode friendly) Perl character class '\w'.
|
||||
/// Note that this is only use for testing word boundaries. The actual '\w'
|
||||
/// is encoded as a CharClass instruction.
|
||||
pub fn is_word(c: Option<char>) -> bool {
|
||||
let c = match c {
|
||||
None => return false,
|
||||
Some(c) => c,
|
||||
};
|
||||
// Try the common ASCII case before invoking binary search.
|
||||
match c {
|
||||
'_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true,
|
||||
_ => PERLW.binary_search_by(|&(start, end)| {
|
||||
if c >= start && c <= end {
|
||||
Equal
|
||||
} else if start > c {
|
||||
Greater
|
||||
} else {
|
||||
Less
|
||||
}
|
||||
}).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a character and a single character class range, return an ordering
|
||||
/// indicating whether the character is less than the start of the range,
|
||||
/// in the range (inclusive) or greater than the end of the range.
|
||||
///
|
||||
/// If `casei` is `true`, then this ordering is computed case insensitively.
|
||||
///
|
||||
/// This function is meant to be used with a binary search.
|
||||
#[inline]
|
||||
fn class_cmp(casei: bool, mut textc: char,
|
||||
(mut start, mut end): (char, char)) -> Ordering {
|
||||
if casei {
|
||||
// FIXME: This is pretty ridiculous. All of this case conversion
|
||||
// can be moved outside this function:
|
||||
// 1) textc should be uppercased outside the bsearch.
|
||||
// 2) the character class itself should be uppercased either in the
|
||||
// parser or the compiler.
|
||||
// FIXME: This is too simplistic for correct Unicode support.
|
||||
// See also: char_eq
|
||||
textc = textc.to_uppercase();
|
||||
start = start.to_uppercase();
|
||||
end = end.to_uppercase();
|
||||
}
|
||||
if textc >= start && textc <= end {
|
||||
Equal
|
||||
} else if start > textc {
|
||||
Greater
|
||||
} else {
|
||||
Less
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the starting location of `needle` in `haystack`.
|
||||
/// If `needle` is not in `haystack`, then `None` is returned.
|
||||
///
|
||||
/// Note that this is using a naive substring algorithm.
|
||||
#[inline]
|
||||
pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<uint> {
|
||||
let (hlen, nlen) = (haystack.len(), needle.len());
|
||||
if nlen > hlen || nlen == 0 {
|
||||
return None
|
||||
}
|
||||
for (offset, window) in haystack.windows(nlen).enumerate() {
|
||||
if window == needle {
|
||||
return Some(offset)
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
@ -37,7 +37,6 @@ extern crate fmt_macros;
|
||||
extern crate getopts;
|
||||
extern crate graphviz;
|
||||
extern crate libc;
|
||||
extern crate regex;
|
||||
extern crate rustc_llvm;
|
||||
extern crate rustc_back;
|
||||
extern crate serialize;
|
||||
|
@ -15,8 +15,6 @@ use metadata::filesearch;
|
||||
use session::search_paths::PathKind;
|
||||
use util::nodemap::NodeMap;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use syntax::ast::NodeId;
|
||||
use syntax::codemap::Span;
|
||||
use syntax::diagnostic::{self, Emitter};
|
||||
@ -253,50 +251,54 @@ fn split_msg_into_multilines(msg: &str) -> Option<String> {
|
||||
!msg.contains("structure constructor specifies a structure of type") {
|
||||
return None
|
||||
}
|
||||
|
||||
let first = Regex::new(r"[( ]expected").unwrap();
|
||||
let second = Regex::new(r" found").unwrap();
|
||||
let third = Regex::new(
|
||||
r"\((values differ|lifetime|cyclic type of infinite size)").unwrap();
|
||||
let first = msg.match_indices("expected").filter(|s| {
|
||||
s.0 > 0 && (msg.char_at_reverse(s.0) == ' ' ||
|
||||
msg.char_at_reverse(s.0) == '(')
|
||||
}).map(|(a, b)| (a - 1, b));
|
||||
let second = msg.match_indices("found").filter(|s| {
|
||||
msg.char_at_reverse(s.0) == ' '
|
||||
}).map(|(a, b)| (a - 1, b));
|
||||
|
||||
let mut new_msg = String::new();
|
||||
let mut head = 0u;
|
||||
|
||||
// Insert `\n` before expected and found.
|
||||
for (pos1, pos2) in first.find_iter(msg).zip(
|
||||
second.find_iter(msg)) {
|
||||
for (pos1, pos2) in first.zip(second) {
|
||||
new_msg = new_msg +
|
||||
// A `(` may be preceded by a space and it should be trimmed
|
||||
msg[head..pos1.0].trim_right() + // prefix
|
||||
"\n" + // insert before first
|
||||
&msg[pos1.0..pos1.1] + // insert what first matched
|
||||
&msg[pos1.1..pos2.0] + // between matches
|
||||
"\n " + // insert before second
|
||||
// 123
|
||||
// `expected` is 3 char longer than `found`. To align the types, `found` gets
|
||||
// 3 spaces prepended.
|
||||
&msg[pos2.0..pos2.1]; // insert what second matched
|
||||
// A `(` may be preceded by a space and it should be trimmed
|
||||
msg[head..pos1.0].trim_right() + // prefix
|
||||
"\n" + // insert before first
|
||||
&msg[pos1.0..pos1.1] + // insert what first matched
|
||||
&msg[pos1.1..pos2.0] + // between matches
|
||||
"\n " + // insert before second
|
||||
// 123
|
||||
// `expected` is 3 char longer than `found`. To align the types,
|
||||
// `found` gets 3 spaces prepended.
|
||||
&msg[pos2.0..pos2.1]; // insert what second matched
|
||||
|
||||
head = pos2.1;
|
||||
}
|
||||
|
||||
let mut tail = &msg[head..];
|
||||
let third = tail.find_str("(values differ")
|
||||
.or(tail.find_str("(lifetime"))
|
||||
.or(tail.find_str("(cyclic type of infinite size"));
|
||||
// Insert `\n` before any remaining messages which match.
|
||||
for pos in third.find_iter(tail).take(1) {
|
||||
// The end of the message may just be wrapped in `()` without `expected`/`found`.
|
||||
// Push this also to a new line and add the final tail after.
|
||||
if let Some(pos) = third {
|
||||
// The end of the message may just be wrapped in `()` without
|
||||
// `expected`/`found`. Push this also to a new line and add the
|
||||
// final tail after.
|
||||
new_msg = new_msg +
|
||||
// `(` is usually preceded by a space and should be trimmed.
|
||||
tail[..pos.0].trim_right() + // prefix
|
||||
"\n" + // insert before paren
|
||||
&tail[pos.0..]; // append the tail
|
||||
// `(` is usually preceded by a space and should be trimmed.
|
||||
tail[..pos].trim_right() + // prefix
|
||||
"\n" + // insert before paren
|
||||
&tail[pos..]; // append the tail
|
||||
|
||||
tail = "";
|
||||
}
|
||||
|
||||
new_msg.push_str(tail);
|
||||
|
||||
return Some(new_msg)
|
||||
return Some(new_msg);
|
||||
}
|
||||
|
||||
pub fn build_session(sopts: config::Options,
|
||||
|
@ -38,7 +38,6 @@
|
||||
#![allow(unstable)]
|
||||
|
||||
extern crate getopts;
|
||||
extern crate regex;
|
||||
extern crate serialize;
|
||||
extern crate "serialize" as rustc_serialize;
|
||||
extern crate term;
|
||||
@ -53,7 +52,6 @@ use self::OutputLocation::*;
|
||||
|
||||
use stats::Stats;
|
||||
use getopts::{OptGroup, optflag, optopt};
|
||||
use regex::Regex;
|
||||
use serialize::Encodable;
|
||||
use term::Terminal;
|
||||
use term::color::{Color, RED, YELLOW, GREEN, CYAN};
|
||||
@ -279,7 +277,7 @@ pub enum ColorConfig {
|
||||
}
|
||||
|
||||
pub struct TestOpts {
|
||||
pub filter: Option<Regex>,
|
||||
pub filter: Option<String>,
|
||||
pub run_ignored: bool,
|
||||
pub run_tests: bool,
|
||||
pub run_benchmarks: bool,
|
||||
@ -365,11 +363,7 @@ pub fn parse_opts(args: &[String]) -> Option<OptRes> {
|
||||
if matches.opt_present("h") { usage(args[0].as_slice()); return None; }
|
||||
|
||||
let filter = if matches.free.len() > 0 {
|
||||
let s = matches.free[0].as_slice();
|
||||
match Regex::new(s) {
|
||||
Ok(re) => Some(re),
|
||||
Err(e) => return Some(Err(format!("could not parse /{}/: {:?}", s, e)))
|
||||
}
|
||||
Some(matches.free[0].clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@ -833,9 +827,10 @@ pub fn filter_tests(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> Vec<TestDescA
|
||||
// Remove tests that don't match the test filter
|
||||
filtered = match opts.filter {
|
||||
None => filtered,
|
||||
Some(ref re) => {
|
||||
filtered.into_iter()
|
||||
.filter(|test| re.is_match(test.desc.name.as_slice())).collect()
|
||||
Some(ref filter) => {
|
||||
filtered.into_iter().filter(|test| {
|
||||
test.desc.name.as_slice().contains(&filter[])
|
||||
}).collect()
|
||||
}
|
||||
};
|
||||
|
||||
@ -1230,16 +1225,6 @@ mod tests {
|
||||
assert!(res == TrFailed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_free_arg_should_be_a_filter() {
|
||||
let args = vec!("progname".to_string(), "some_regex_filter".to_string());
|
||||
let opts = match parse_opts(args.as_slice()) {
|
||||
Some(Ok(o)) => o,
|
||||
_ => panic!("Malformed arg in first_free_arg_should_be_a_filter")
|
||||
};
|
||||
assert!(opts.filter.expect("should've found filter").is_match("some_regex_filter"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_ignored_flag() {
|
||||
let args = vec!("progname".to_string(),
|
||||
@ -1336,37 +1321,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn filter_tests_regex() {
|
||||
let mut opts = TestOpts::new();
|
||||
opts.filter = Some(::regex::Regex::new("a.*b.+c").unwrap());
|
||||
|
||||
let mut names = ["yes::abXc", "yes::aXXXbXXXXc",
|
||||
"no::XYZ", "no::abc"];
|
||||
names.sort();
|
||||
|
||||
fn test_fn() {}
|
||||
let tests = names.iter().map(|name| {
|
||||
TestDescAndFn {
|
||||
desc: TestDesc {
|
||||
name: DynTestName(name.to_string()),
|
||||
ignore: false,
|
||||
should_fail: ShouldFail::No,
|
||||
},
|
||||
testfn: DynTestFn(Thunk::new(test_fn))
|
||||
}
|
||||
}).collect();
|
||||
let filtered = filter_tests(&opts, tests);
|
||||
|
||||
let expected: Vec<&str> =
|
||||
names.iter().map(|&s| s).filter(|name| name.starts_with("yes")).collect();
|
||||
|
||||
assert_eq!(filtered.len(), expected.len());
|
||||
for (test, expected_name) in filtered.iter().zip(expected.iter()) {
|
||||
assert_eq!(test.desc.name.as_slice(), *expected_name);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_metricmap_compare() {
|
||||
let mut m1 = MetricMap::new();
|
||||
|
@ -13,7 +13,6 @@
|
||||
use std::io::BufferedReader;
|
||||
use std::iter;
|
||||
use std::iter::AdditiveIterator;
|
||||
use regex::Regex;
|
||||
|
||||
pub struct BookItem {
|
||||
pub title: String,
|
||||
@ -94,8 +93,6 @@ pub fn parse_summary<R: Reader>(input: R, src: &Path) -> Result<Book, Vec<String
|
||||
}
|
||||
}
|
||||
|
||||
let regex = r"(?P<indent>[\t ]*)\*[:space:]*\[(?P<title>.*)\]\((?P<path>.*)\)";
|
||||
let item_re = Regex::new(regex).unwrap();
|
||||
let mut top_items = vec!();
|
||||
let mut stack = vec!();
|
||||
let mut errors = vec!();
|
||||
@ -117,45 +114,51 @@ pub fn parse_summary<R: Reader>(input: R, src: &Path) -> Result<Book, Vec<String
|
||||
}
|
||||
};
|
||||
|
||||
item_re.captures(&line[]).map(|cap| {
|
||||
let given_path = cap.name("path");
|
||||
let title = cap.name("title").unwrap().to_string();
|
||||
let star_idx = match line.find_str("*") { Some(i) => i, None => continue };
|
||||
|
||||
let path_from_root = match src.join(given_path.unwrap()).path_relative_from(src) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
errors.push(format!("paths in SUMMARY.md must be relative, \
|
||||
but path '{}' for section '{}' is not.",
|
||||
given_path.unwrap(), title));
|
||||
Path::new("")
|
||||
}
|
||||
};
|
||||
let path_to_root = Path::new(iter::repeat("../")
|
||||
.take(path_from_root.components().count() - 1)
|
||||
.collect::<String>());
|
||||
let item = BookItem {
|
||||
title: title,
|
||||
path: path_from_root,
|
||||
path_to_root: path_to_root,
|
||||
children: vec!(),
|
||||
};
|
||||
let level = cap.name("indent").unwrap().chars().map(|c| {
|
||||
match c {
|
||||
' ' => 1us,
|
||||
'\t' => 4,
|
||||
_ => unreachable!()
|
||||
}
|
||||
}).sum() / 4 + 1;
|
||||
let start_bracket = star_idx + line[star_idx..].find_str("[").unwrap();
|
||||
let end_bracket = start_bracket + line[start_bracket..].find_str("](").unwrap();
|
||||
let start_paren = end_bracket + 1;
|
||||
let end_paren = start_paren + line[start_paren..].find_str(")").unwrap();
|
||||
|
||||
if level > stack.len() + 1 {
|
||||
errors.push(format!("section '{}' is indented too deeply; \
|
||||
found {}, expected {} or less",
|
||||
item.title, level, stack.len() + 1));
|
||||
} else if level <= stack.len() {
|
||||
collapse(&mut stack, &mut top_items, level);
|
||||
let given_path = &line[start_paren + 1 .. end_paren];
|
||||
let title = line[start_bracket + 1..end_bracket].to_string();
|
||||
let indent = &line[..star_idx];
|
||||
|
||||
let path_from_root = match src.join(given_path).path_relative_from(src) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
errors.push(format!("paths in SUMMARY.md must be relative, \
|
||||
but path '{}' for section '{}' is not.",
|
||||
given_path, title));
|
||||
Path::new("")
|
||||
}
|
||||
stack.push(item)
|
||||
});
|
||||
};
|
||||
let path_to_root = Path::new(iter::repeat("../")
|
||||
.take(path_from_root.components().count() - 1)
|
||||
.collect::<String>());
|
||||
let item = BookItem {
|
||||
title: title,
|
||||
path: path_from_root,
|
||||
path_to_root: path_to_root,
|
||||
children: vec!(),
|
||||
};
|
||||
let level = indent.chars().map(|c| {
|
||||
match c {
|
||||
' ' => 1us,
|
||||
'\t' => 4,
|
||||
_ => unreachable!()
|
||||
}
|
||||
}).sum() / 4 + 1;
|
||||
|
||||
if level > stack.len() + 1 {
|
||||
errors.push(format!("section '{}' is indented too deeply; \
|
||||
found {}, expected {} or less",
|
||||
item.title, level, stack.len() + 1));
|
||||
} else if level <= stack.len() {
|
||||
collapse(&mut stack, &mut top_items, level);
|
||||
}
|
||||
stack.push(item)
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
|
@ -22,8 +22,6 @@ use book::{Book, BookItem};
|
||||
use css;
|
||||
use javascript;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use rustdoc;
|
||||
|
||||
struct Build;
|
||||
@ -81,9 +79,6 @@ fn render(book: &Book, tgt: &Path) -> CliResult<()> {
|
||||
|
||||
let out_path = tgt.join(item.path.dirname());
|
||||
|
||||
let regex = r"\[(?P<title>[^]]*)\]\((?P<url_stem>[^)]*)\.(?P<ext>md|markdown)\)";
|
||||
let md_urls = Regex::new(regex).unwrap();
|
||||
|
||||
let src;
|
||||
if os::args().len() < 3 {
|
||||
src = os::getcwd().unwrap().clone();
|
||||
@ -94,7 +89,7 @@ fn render(book: &Book, tgt: &Path) -> CliResult<()> {
|
||||
let markdown_data = try!(File::open(&src.join(&item.path)).read_to_string());
|
||||
let preprocessed_path = tmp.path().join(item.path.filename().unwrap());
|
||||
{
|
||||
let urls = md_urls.replace_all(&markdown_data[], "[$title]($url_stem.html)");
|
||||
let urls = markdown_data.replace(".md)", ".html)");
|
||||
try!(File::create(&preprocessed_path)
|
||||
.write_str(&urls[]));
|
||||
}
|
||||
|
@ -11,8 +11,6 @@
|
||||
#![feature(slicing_syntax, box_syntax)]
|
||||
#![allow(unstable)]
|
||||
|
||||
extern crate regex;
|
||||
|
||||
extern crate rustdoc;
|
||||
|
||||
use std::os;
|
||||
|
@ -1,126 +0,0 @@
|
||||
// The Computer Language Benchmarks Game
|
||||
// http://benchmarksgame.alioth.debian.org/
|
||||
//
|
||||
// contributed by the Rust Project Developers
|
||||
|
||||
// Copyright (c) 2014 The Rust Project Developers
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in
|
||||
// the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
//
|
||||
// - Neither the name of "The Computer Language Benchmarks Game" nor
|
||||
// the name of "The Computer Language Shootout Benchmarks" nor the
|
||||
// names of its contributors may be used to endorse or promote
|
||||
// products derived from this software without specific prior
|
||||
// written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
// OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// ignore-stage1
|
||||
// ignore-cross-compile #12102
|
||||
|
||||
#![feature(box_syntax)]
|
||||
|
||||
extern crate regex;
|
||||
|
||||
use std::io;
|
||||
use regex::{NoExpand, Regex};
|
||||
use std::sync::{Arc, Future};
|
||||
|
||||
macro_rules! regex {
|
||||
($e:expr) => (Regex::new($e).unwrap())
|
||||
}
|
||||
|
||||
fn count_matches(seq: &str, variant: &Regex) -> int {
|
||||
let mut n = 0;
|
||||
for _ in variant.find_iter(seq) {
|
||||
n += 1;
|
||||
}
|
||||
n
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut rdr = if std::os::getenv("RUST_BENCH").is_some() {
|
||||
let fd = io::File::open(&Path::new("shootout-k-nucleotide.data"));
|
||||
box io::BufferedReader::new(fd) as Box<io::Reader>
|
||||
} else {
|
||||
box io::stdin() as Box<io::Reader>
|
||||
};
|
||||
let mut seq = rdr.read_to_string().unwrap();
|
||||
let ilen = seq.len();
|
||||
|
||||
seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand(""));
|
||||
let seq_arc = Arc::new(seq.clone()); // copy before it moves
|
||||
let clen = seq.len();
|
||||
|
||||
let mut seqlen = Future::spawn(move|| {
|
||||
let substs = vec![
|
||||
(regex!("B"), "(c|g|t)"),
|
||||
(regex!("D"), "(a|g|t)"),
|
||||
(regex!("H"), "(a|c|t)"),
|
||||
(regex!("K"), "(g|t)"),
|
||||
(regex!("M"), "(a|c)"),
|
||||
(regex!("N"), "(a|c|g|t)"),
|
||||
(regex!("R"), "(a|g)"),
|
||||
(regex!("S"), "(c|g)"),
|
||||
(regex!("V"), "(a|c|g)"),
|
||||
(regex!("W"), "(a|t)"),
|
||||
(regex!("Y"), "(c|t)"),
|
||||
];
|
||||
let mut seq = seq;
|
||||
for (re, replacement) in substs.into_iter() {
|
||||
seq = re.replace_all(seq.as_slice(), NoExpand(replacement));
|
||||
}
|
||||
seq.len()
|
||||
});
|
||||
|
||||
let variants = vec![
|
||||
regex!("agggtaaa|tttaccct"),
|
||||
regex!("[cgt]gggtaaa|tttaccc[acg]"),
|
||||
regex!("a[act]ggtaaa|tttacc[agt]t"),
|
||||
regex!("ag[act]gtaaa|tttac[agt]ct"),
|
||||
regex!("agg[act]taaa|ttta[agt]cct"),
|
||||
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
|
||||
regex!("agggt[cgt]aa|tt[acg]accct"),
|
||||
regex!("agggta[cgt]a|t[acg]taccct"),
|
||||
regex!("agggtaa[cgt]|[acg]ttaccct"),
|
||||
];
|
||||
let (mut variant_strs, mut counts) = (vec!(), vec!());
|
||||
for variant in variants.into_iter() {
|
||||
let seq_arc_copy = seq_arc.clone();
|
||||
variant_strs.push(variant.to_string());
|
||||
counts.push(Future::spawn(move|| {
|
||||
count_matches(seq_arc_copy.as_slice(), &variant)
|
||||
}));
|
||||
}
|
||||
|
||||
for (i, variant) in variant_strs.iter().enumerate() {
|
||||
println!("{} {}", variant, counts[i].get());
|
||||
}
|
||||
println!("");
|
||||
println!("{}", ilen);
|
||||
println!("{}", clen);
|
||||
println!("{}", seqlen.get());
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// exec-env:RUST_LOG=rust-log-filter/f.o
|
||||
// exec-env:RUST_LOG=rust-log-filter/foo
|
||||
|
||||
#![allow(unknown_features)]
|
||||
#![feature(box_syntax)]
|
||||
@ -42,18 +42,14 @@ pub fn main() {
|
||||
let _t = Thread::spawn(move|| {
|
||||
log::set_logger(logger);
|
||||
|
||||
// our regex is "f.o"
|
||||
// ensure it is a regex, and isn't anchored
|
||||
info!("foo");
|
||||
info!("bar");
|
||||
info!("foo bar");
|
||||
info!("bar foo");
|
||||
info!("f1o");
|
||||
});
|
||||
|
||||
assert_eq!(rx.recv().unwrap().as_slice(), "foo");
|
||||
assert_eq!(rx.recv().unwrap().as_slice(), "foo bar");
|
||||
assert_eq!(rx.recv().unwrap().as_slice(), "bar foo");
|
||||
assert_eq!(rx.recv().unwrap().as_slice(), "f1o");
|
||||
assert!(rx.recv().is_err());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user