libstdc++: std::basic_regex should treat '\0' as an ordinary char [PR84110]
When the input sequence contains a _CharT(0) character, the strchr call in _Scanner<_CharT>::_M_scan_normal() will search for '\0' and so return a pointer to the terminating null at the end of the string. This makes the scanner think it's found a special character. Because it doesn't match any of the actual special characters, we fall off the end of the function (or assert in debug mode). We should check for a null character explicitly and either treat it as an ordinary character (for the ECMAScript grammar) or an error (for all others). I'm not 100% sure that's right, but it seems consistent with the POSIX RE rules where a '\0' means the end of the regex pattern or the end of the sequence being matched. Signed-off-by: Jonathan Wakely <jwakely@redhat.com> libstdc++-v3/ChangeLog: PR libstdc++/84110 * include/bits/regex_error.h (regex_constants::_S_null): New error code for internal use. * include/bits/regex_scanner.tcc (_Scanner::_M_scan_normal()): Check for null character. * testsuite/28_regex/basic_regex/84110.cc: New test.
This commit is contained in:
parent
b59be1adba
commit
b701e1f8f6
@ -61,6 +61,7 @@ namespace regex_constants
|
||||
_S_error_badrepeat,
|
||||
_S_error_complexity,
|
||||
_S_error_stack,
|
||||
_S_null
|
||||
};
|
||||
|
||||
/** The expression contained an invalid collating element name. */
|
||||
|
@ -175,6 +175,16 @@ namespace __detail
|
||||
_M_state = _S_state_in_brace;
|
||||
_M_token = _S_token_interval_begin;
|
||||
}
|
||||
else if (__builtin_expect(__c == _CharT(0), false))
|
||||
{
|
||||
if (!_M_is_ecma())
|
||||
{
|
||||
__throw_regex_error(regex_constants::_S_null,
|
||||
"Unexpected null character in regular expression");
|
||||
}
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
else if (__c != ']' && __c != '}')
|
||||
{
|
||||
auto __it = _M_token_tbl;
|
||||
|
39
libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc
Normal file
39
libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc
Normal file
@ -0,0 +1,39 @@
|
||||
// { dg-do run { target c++11 } }
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
void test01()
|
||||
{
|
||||
const std::string s(1ul, '\0');
|
||||
std::regex re(s);
|
||||
VERIFY( std::regex_match(s, re) ); // PR libstdc++/84110
|
||||
|
||||
#if __cpp_exceptions
|
||||
using namespace std::regex_constants;
|
||||
for (auto syn : {basic, extended, awk, grep, egrep})
|
||||
{
|
||||
try
|
||||
{
|
||||
std::regex{s, syn}; // '\0' is not valid for other grammars
|
||||
VERIFY( false );
|
||||
}
|
||||
catch (const std::regex_error&)
|
||||
{
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void test02()
|
||||
{
|
||||
const std::string s("uh-\0h", 5);
|
||||
std::regex re(s);
|
||||
VERIFY( std::regex_match(s, re) );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test01();
|
||||
test02();
|
||||
}
|
Loading…
Reference in New Issue
Block a user