libstdc++: Improve std::regex_error::what() strings

This replaces the vague "regex_error" for std::regex_error::what() with
a string that corresponds to the error_type enum passed to the
constructor. This allows us to remove many of the strings passed to
__throw_regex_error, because the default string is at least as good.
When a string argument to __throw_regex_error is kept it should add some
context-specific detail absent from the default string.

Also remove full stops (periods) from the end of those strings, to make
it easier to include them in logs and other output. I've left them
starting with an upper-case letter, which is consistent with strerror
output for (at least) Glibc, Solaris and BSD. I'm ambivalent whether
that's the right choice.

This also adds the missing noreturn attribute to __throw_regex_error.

libstdc++-v3/ChangeLog:

	* include/bits/regex_compiler.tcc: Adjust all calls to
	__throw_regex_error.
	* include/bits/regex_error.h (__throw_regex_error): Add noreturn
	attribute.
	* include/bits/regex_scanner.tcc: Likewise.
	* src/c++11/regex.cc (desc): New helper function.
	(regex_error::regex_error(error_type)): Use desc to get a string
	corresponding to the error code.
This commit is contained in:
Jonathan Wakely 2021-12-16 13:39:09 +00:00
parent b1e701dc4a
commit 260a5334ee
4 changed files with 111 additions and 76 deletions

View File

@ -157,8 +157,7 @@ namespace __detail
auto __neg = _M_value[0] == 'n';
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren,
"Parenthesis is not closed.");
__throw_regex_error(regex_constants::error_paren);
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa->_M_insert_accept());
_M_stack.push(
@ -180,8 +179,7 @@ namespace __detail
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat,
"Nothing to repeat before a quantifier.");
__throw_regex_error(regex_constants::error_badrepeat);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
@ -217,11 +215,9 @@ namespace __detail
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat,
"Nothing to repeat before a quantifier.");
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace,
"Unexpected token in brace expression.");
__throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_pop());
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
long __min_rep = _M_cur_int_value(10);
@ -237,8 +233,7 @@ namespace __detail
__infi = true;
}
if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace,
"Unexpected end of brace expression.");
__throw_regex_error(regex_constants::error_brace);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
@ -257,8 +252,7 @@ namespace __detail
else
{
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace,
"Invalid range in brace expression.");
__throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa->_M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
@ -325,8 +319,7 @@ namespace __detail
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren,
"Parenthesis is not closed.");
__throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
_M_stack.push(__r);
}
@ -335,8 +328,7 @@ namespace __detail
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren,
"Parenthesis is not closed.");
__throw_regex_error(regex_constants::error_paren);
__r._M_append(_M_pop());
__r._M_append(_M_nfa->_M_insert_subexpr_end());
_M_stack.push(__r);
@ -503,7 +495,8 @@ namespace __detail
{
// "\\w-" is invalid, start of range must be a single char.
__throw_regex_error(regex_constants::error_range,
"Invalid start of range in bracket expression.");
"Invalid start of '[x-x]' range in "
"regular expression");
}
else if (__last_char._M_is_char())
{
@ -521,7 +514,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_range,
"Invalid end of range in bracket expression.");
"Invalid end of '[x-x]' range in "
"regular expression");
}
else if (_M_flags & regex_constants::ECMAScript)
{
@ -532,7 +526,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_range,
"Invalid dash in bracket expression.");
"Invalid location of '-' within '[...]' in "
"POSIX regular expression");
}
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{
@ -543,8 +538,8 @@ namespace __detail
}
else
__throw_regex_error(regex_constants::error_brack,
"Unexpected character in bracket expression.");
"Unexpected character within '[...]' in "
"regular expression");
return true;
}

View File

@ -133,7 +133,9 @@ namespace regex_constants
*/
class regex_error : public std::runtime_error
{
regex_constants::error_type _M_code;
using error_type = regex_constants::error_type;
error_type _M_code;
public:
/**
@ -142,7 +144,7 @@ namespace regex_constants
* @param __ecode the regex error code.
*/
explicit
regex_error(regex_constants::error_type __ecode);
regex_error(error_type __ecode);
virtual ~regex_error() throw();
@ -156,23 +158,30 @@ namespace regex_constants
{ return _M_code; }
private:
regex_error(regex_constants::error_type __ecode, const char* __what)
regex_error(error_type __ecode, const char* __what)
: std::runtime_error(__what), _M_code(__ecode)
{ }
friend void __throw_regex_error(regex_constants::error_type, const char*);
[[__noreturn__]]
friend void
__throw_regex_error(error_type __ecode __attribute__((__unused__)),
const char* __what __attribute__((__unused__)))
{ _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
};
///@} // group regex
/// @cond undocumented
[[__noreturn__]]
void
__throw_regex_error(regex_constants::error_type __ecode);
[[__noreturn__]]
inline void
__throw_regex_error(regex_constants::error_type __ecode
__attribute__((__unused__)),
const char* __what __attribute__((__unused__)))
{ _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
__throw_regex_error(regex_constants::error_type __ecode, const char* __what);
/// @endcond
///@} // group regex
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std

View File

@ -108,7 +108,7 @@ namespace __detail
if (_M_current == _M_end)
__throw_regex_error(
regex_constants::error_escape,
"Unexpected end of regex when escaping.");
"Invalid escape at end of regular expression");
if (!_M_is_basic()
|| (*_M_current != '('
@ -125,9 +125,7 @@ namespace __detail
if (_M_is_ecma() && *_M_current == '?')
{
if (++_M_current == _M_end)
__throw_regex_error(
regex_constants::error_paren,
"Unexpected end of regex when in an open parenthesis.");
__throw_regex_error(regex_constants::error_paren);
if (*_M_current == ':')
{
@ -147,9 +145,9 @@ namespace __detail
_M_value.assign(1, 'n');
}
else
__throw_regex_error(
regex_constants::error_paren,
"Invalid special open parenthesis.");
__throw_regex_error(regex_constants::error_paren,
"Invalid '(?...)' zero-width assertion "
"in regular expression");
}
else if (_M_flags & regex_constants::nosubs)
_M_token = _S_token_subexpr_no_group_begin;
@ -178,10 +176,7 @@ namespace __detail
else if (__builtin_expect(__c == _CharT(0), false))
{
if (!_M_is_ecma())
{
__throw_regex_error(regex_constants::_S_null,
"Unexpected null character in regular expression");
}
__throw_regex_error(regex_constants::_S_null);
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
}
@ -213,9 +208,7 @@ namespace __detail
_M_scan_in_bracket()
{
if (_M_current == _M_end)
__throw_regex_error(
regex_constants::error_brack,
"Unexpected end of regex when in bracket expression.");
__throw_regex_error(regex_constants::error_brack);
auto __c = *_M_current++;
@ -225,7 +218,8 @@ namespace __detail
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_brack,
"Unexpected character class open bracket.");
"Incomplete '[[' character class in "
"regular expression");
if (*_M_current == '.')
{
@ -250,7 +244,7 @@ namespace __detail
}
// In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
// literally. So "[]]" and "[^]]" are valid regexes. See the testcases
// `*/empty_range.cc`.
// `.../empty_range.cc`.
else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
{
_M_token = _S_token_bracket_end;
@ -275,9 +269,7 @@ namespace __detail
_M_scan_in_brace()
{
if (_M_current == _M_end)
__throw_regex_error(
regex_constants::error_brace,
"Unexpected end of regex when in brace expression.");
__throw_regex_error(regex_constants::error_brace);
auto __c = *_M_current++;
@ -301,8 +293,7 @@ namespace __detail
++_M_current;
}
else
__throw_regex_error(regex_constants::error_badbrace,
"Unexpected character in brace expression.");
__throw_regex_error(regex_constants::error_badbrace);
}
else if (__c == '}')
{
@ -310,8 +301,7 @@ namespace __detail
_M_token = _S_token_interval_end;
}
else
__throw_regex_error(regex_constants::error_badbrace,
"Unexpected character in brace expression.");
__throw_regex_error(regex_constants::error_badbrace);
}
template<typename _CharT>
@ -320,8 +310,7 @@ namespace __detail
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape,
"Unexpected end of regex when escaping.");
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
@ -355,22 +344,26 @@ namespace __detail
else if (__c == 'c')
{
if (_M_current == _M_end)
__throw_regex_error(
regex_constants::error_escape,
"Unexpected end of regex when reading control code.");
__throw_regex_error(regex_constants::error_escape,
"invalid '\\cX' control character in "
"regular expression");
_M_token = _S_token_ord_char;
_M_value.assign(1, *_M_current++);
}
else if (__c == 'x' || __c == 'u')
{
_M_value.erase();
for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
_M_value.clear();
const int __n = __c == 'x' ? 2 : 4;
for (int __i = 0; __i < __n; __i++)
{
if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
__throw_regex_error(
regex_constants::error_escape,
"Unexpected end of regex when ascii character.");
__throw_regex_error(regex_constants::error_escape,
__n == 2
? "Invalid '\\xNN' control character in "
"regular expression"
: "Invalid '\\uNNNN' control character in "
"regular expression");
_M_value += *_M_current++;
}
_M_token = _S_token_hex_num;
@ -399,8 +392,7 @@ namespace __detail
_M_eat_escape_posix()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape,
"Unexpected end of regex when escaping.");
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current;
auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
@ -425,8 +417,7 @@ namespace __detail
{
#ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
__throw_regex_error(regex_constants::error_escape,
"Unexpected escape character.");
__throw_regex_error(regex_constants::error_escape);
#else
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@ -466,8 +457,7 @@ namespace __detail
return;
}
else
__throw_regex_error(regex_constants::error_escape,
"Unexpected escape character.");
__throw_regex_error(regex_constants::error_escape);
}
// Eats a character class or throws an exception.
@ -485,12 +475,8 @@ namespace __detail
|| _M_current == _M_end // skip __ch
|| *_M_current++ != ']') // skip ']'
{
if (__ch == ':')
__throw_regex_error(regex_constants::error_ctype,
"Unexpected end of character class.");
else
__throw_regex_error(regex_constants::error_collate,
"Unexpected end of character class.");
__throw_regex_error(__ch == ':' ? regex_constants::error_ctype
: regex_constants::error_collate);
}
}

View File

@ -35,8 +35,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__attribute__((unused)))
{ _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode)); }
namespace
{
const char*
desc(regex_constants::error_type e)
{
using namespace regex_constants;
switch (e)
{
case error_collate:
return "Invalid collating element in regular expression";
case error_ctype:
return "Invalid character class in regular expression";
case error_escape:
return "Invalid escape in regular expression";
case error_backref:
return "Invalid back reference in regular expression";
case error_brack:
return "Mismatched '[' and ']' in regular expression";
case error_paren:
return "Mismatched '(' and ')' in regular expression";
case error_brace:
return "Mismatched '{' and '}' in regular expression";
case error_badbrace:
return "Invalid range in '{}' in regular expression";
case error_range:
return "Invalid character range in regular expression";
case error_space:
return "Insufficient memory to compile regular expression";
case error_badrepeat:
return "Invalid '?', '*', or '+' in regular expression";
case error_complexity:
return "Complexity of regex match exceeded implementation limits";
case error_stack:
return "Insufficient memory to determine regex match";
case _S_null:
return "Unexpected null character in regular expression";
case _S_grammar:
return "Conflicting regex grammar options";
default:
return "regex error";
};
}
}
regex_error::regex_error(regex_constants::error_type __ecode)
: std::runtime_error("regex_error"), _M_code(__ecode)
: std::runtime_error(desc(__ecode)), _M_code(__ecode)
{ }
regex_error::~regex_error() throw() { }