libstdc++: Simplify std::basic_regex construction and assignment

Introduce a new _M_compile function which does the common work needed by
all constructors and assignment. Call that directly to avoid multiple
levels of constructor delegation or calls to basic_regex::assign
overloads.

For assignment, there is no need to construct a std::basic_string if we
already have a contiguous sequence of the correct character type, and no
need to construct a temporary basic_regex when assigning from an
existing basic_regex.

Also define the copy and move assignment operators as defaulted, which
does the right thing without constructing a temporary and swapping it.
Copying or moving the shared_ptr member cannot fail, so they can be
noexcept. The assign(const basic_regex&) and assign(basic_regex&&)
member can then be defined in terms of copy or move assignment.

The new _M_compile function takes pointer arguments, so the caller has
to convert arbitrary iterator ranges into a contiguous sequence of
characters. With that simplification, the __compile_nfa helpers are not
needed and can be removed.

This also fixes a bug where construction from a contiguous sequence with
the wrong character type would fail to compile, rather than converting
the elements to the regex character type.

Signed-off-by: Jonathan Wakely <jwakely@redhat.com>

libstdc++-v3/ChangeLog:

	* include/bits/regex.h (__detail::__is_contiguous_iter): Move
	here from <bits/regex_compiler.h>.
	(basic_regex::_M_compile): New function to compile an NFA from
	a regular expression string.
	(basic_regex::basic_regex): Use _M_compile instead of delegating
	to other constructors.
	(basic_regex::operator=(const basic_regex&)): Define as
	defaulted.
	(basic_regex::operator=(initializer_list<C>)): Use _M_compile.
	(basic_regex::assign(const basic_regex&)): Use copy assignment.
	(basic_regex::assign(basic_regex&&)): Use move assignment.
	(basic_regex::assign(const C*, flag_type)): Use _M_compile
	instead of constructing a temporary string.
	(basic_regex::assign(const C*, size_t, flag_type)): Likewise.
	(basic_regex::assign(const basic_string<C,T,A>&, flag_type)):
	Use _M_compile instead of constructing a temporary basic_regex.
	(basic_regex::assign(InputIter, InputIter, flag_type)): Avoid
	constructing a temporary string for contiguous iterators of the
	right value type.
	* include/bits/regex_compiler.h (__is_contiguous_iter): Move to
	<bits/regex.h>.
	(__enable_if_contiguous_iter, __disable_if_contiguous_iter)
	(__compile_nfa): Remove.
	* testsuite/28_regex/basic_regex/assign/exception_safety.cc: New
	test.
	* testsuite/28_regex/basic_regex/ctors/char/other.cc: New test.
This commit is contained in:
Jonathan Wakely 2021-09-29 13:48:02 +01:00
parent b34fa88bec
commit b59be1adba
4 changed files with 117 additions and 82 deletions

View File

@ -57,6 +57,16 @@ namespace __detail
template<typename, typename, typename, bool>
class _Executor;
template<typename _Tp>
struct __is_contiguous_iter : false_type { };
template<typename _Tp>
struct __is_contiguous_iter<_Tp*> : true_type { };
template<typename _Tp, typename _Cont>
struct __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>>
: true_type { };
}
_GLIBCXX_BEGIN_NAMESPACE_CXX11
@ -438,8 +448,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
*/
explicit
basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript)
: basic_regex(__p, __p + char_traits<_Ch_type>::length(__p), __f)
{ }
{ _M_compile(__p, __p + _Rx_traits::length(__p), __f); }
/**
* @brief Constructs a basic regular expression from the sequence
@ -455,8 +464,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
*/
basic_regex(const _Ch_type* __p, std::size_t __len,
flag_type __f = ECMAScript)
: basic_regex(__p, __p + __len, __f)
{ }
{ _M_compile(__p, __p + __len, __f); }
/**
* @brief Copy-constructs a basic regular expression.
@ -486,8 +494,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
_Ch_alloc>& __s,
flag_type __f = ECMAScript)
: basic_regex(__s.data(), __s.data() + __s.size(), __f)
{ }
{ _M_compile(__s.data(), __s.data() + __s.size(), __f); }
/**
* @brief Constructs a basic regular expression from the range
@ -505,8 +512,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
template<typename _FwdIter>
basic_regex(_FwdIter __first, _FwdIter __last,
flag_type __f = ECMAScript)
: basic_regex(std::move(__first), std::move(__last), locale_type(), __f)
{ }
{ this->assign(__first, __last, __f); }
/**
* @brief Constructs a basic regular expression from an initializer list.
@ -517,8 +523,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
* @throws regex_error if @p __l is not a valid regular expression.
*/
basic_regex(initializer_list<_Ch_type> __l, flag_type __f = ECMAScript)
: basic_regex(__l.begin(), __l.end(), __f)
{ }
{ _M_compile(__l.begin(), __l.end(), __f); }
/**
* @brief Destroys a basic regular expression.
@ -530,15 +535,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
* @brief Assigns one regular expression to another.
*/
basic_regex&
operator=(const basic_regex& __rhs)
{ return this->assign(__rhs); }
operator=(const basic_regex&) = default;
/**
* @brief Move-assigns one regular expression to another.
*/
basic_regex&
operator=(basic_regex&& __rhs) noexcept
{ return this->assign(std::move(__rhs)); }
operator=(basic_regex&&) = default;
/**
* @brief Replaces a regular expression with a new one constructed from
@ -561,7 +564,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
*/
basic_regex&
operator=(initializer_list<_Ch_type> __l)
{ return this->assign(__l.begin(), __l.end()); }
{ return this->assign(__l); }
/**
* @brief Replaces a regular expression with a new one constructed from
@ -576,30 +579,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
// [7.8.3] assign
/**
* @brief the real assignment operator.
* @brief Assigns one regular expression to another.
*
* @param __rhs Another regular expression object.
*/
basic_regex&
assign(const basic_regex& __rhs)
{
basic_regex __tmp(__rhs);
this->swap(__tmp);
return *this;
}
assign(const basic_regex& __rhs) noexcept
{ return *this = __rhs; }
/**
* @brief The move-assignment operator.
* @brief Move-assigns one regular expression to another.
*
* @param __rhs Another regular expression object.
*/
basic_regex&
assign(basic_regex&& __rhs) noexcept
{
basic_regex __tmp(std::move(__rhs));
this->swap(__tmp);
return *this;
}
{ return *this = std::move(__rhs); }
/**
* @brief Assigns a new regular expression to a regex object from a
@ -616,7 +611,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
*/
basic_regex&
assign(const _Ch_type* __p, flag_type __flags = ECMAScript)
{ return this->assign(string_type(__p), __flags); }
{
_M_compile(__p, __p + _Rx_traits::length(__p), __flags);
return *this;
}
/**
* @brief Assigns a new regular expression to a regex object from a
@ -635,7 +633,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
// 3296. Inconsistent default argument for basic_regex<>::assign
basic_regex&
assign(const _Ch_type* __p, size_t __len, flag_type __flags = ECMAScript)
{ return this->assign(string_type(__p, __len), __flags); }
{
_M_compile(__p, __p + __len, __flags);
return *this;
}
/**
* @brief Assigns a new regular expression to a regex object from a
@ -653,8 +654,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
assign(const basic_string<_Ch_type, _Ch_traits, _Alloc>& __s,
flag_type __flags = ECMAScript)
{
return this->assign(basic_regex(__s.data(), __s.data() + __s.size(),
_M_loc, __flags));
_M_compile(__s.data(), __s.data() + __s.size(), __flags);
return *this;
}
/**
@ -674,7 +675,21 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
basic_regex&
assign(_InputIterator __first, _InputIterator __last,
flag_type __flags = ECMAScript)
{ return this->assign(string_type(__first, __last), __flags); }
{
#if __cplusplus >= 201703L
using _ValT = typename iterator_traits<_InputIterator>::value_type;
if constexpr (__detail::__is_contiguous_iter<_InputIterator>::value
&& is_same_v<_ValT, value_type>)
{
const auto __len = __last - __first;
const _Ch_type* __p = std::__to_address(__first);
_M_compile(__p, __p + __len, __flags);
}
else
#endif
this->assign(string_type(__first, __last), __flags);
return *this;
}
/**
* @brief Assigns a new regular expression to a regex object.
@ -689,7 +704,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
*/
basic_regex&
assign(initializer_list<_Ch_type> __l, flag_type __flags = ECMAScript)
{ return this->assign(__l.begin(), __l.end(), __flags); }
{
_M_compile(__l.begin(), __l.end(), __flags);
return *this;
}
// [7.8.4] const operations
/**
@ -757,13 +775,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
private:
typedef std::shared_ptr<const __detail::_NFA<_Rx_traits>> _AutomatonPtr;
template<typename _FwdIter>
basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
flag_type __f)
: _M_flags(__f), _M_loc(std::move(__loc)),
_M_automaton(__detail::__compile_nfa<_Rx_traits>(
std::move(__first), std::move(__last), _M_loc, _M_flags))
{ }
void
_M_compile(const _Ch_type* __first, const _Ch_type* __last,
flag_type __f)
{
__detail::_Compiler<_Rx_traits> __c(__first, __last, _M_loc, __f);
_M_automaton = __c._M_get_nfa();
_M_flags = __f;
}
template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
__detail::_RegexExecutorPolicy, bool>

View File

@ -152,47 +152,6 @@ namespace __detail
const _CtypeT& _M_ctype;
};
template<typename _Tp>
struct __is_contiguous_iter : is_pointer<_Tp>::type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>>
: true_type { };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_iter
= __enable_if_t< __is_contiguous_iter<_Iter>::value,
std::shared_ptr<const _NFA<_TraitsT>> >;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_iter
= __enable_if_t< !__is_contiguous_iter<_Iter>::value,
std::shared_ptr<const _NFA<_TraitsT>> >;
template<typename _TraitsT, typename _FwdIter>
inline __enable_if_contiguous_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last,
const typename _TraitsT::locale_type& __loc,
regex_constants::syntax_option_type __flags)
{
size_t __len = __last - __first;
const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
using _Cmplr = _Compiler<_TraitsT>;
return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa();
}
template<typename _TraitsT, typename _FwdIter>
inline __disable_if_contiguous_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last,
const typename _TraitsT::locale_type& __loc,
regex_constants::syntax_option_type __flags)
{
const basic_string<typename _TraitsT::char_type> __str(__first, __last);
return __compile_nfa<_TraitsT>(__str.data(), __str.data() + __str.size(),
__loc, __flags);
}
// [28.13.14]
template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslatorBase

View File

@ -0,0 +1,20 @@
// { dg-do run { target c++11 } }
#include <regex>
#include <testsuite_hooks.h>
int main()
{
const auto f = std::regex::ECMAScript|std::regex::icase;
std::regex re("abc", f);
try
{
re.assign("[", std::regex::extended);
VERIFY( false );
}
catch (const std::regex_error&)
{
// [re.regex.assign] "If an exception is thrown, *this is unchanged."
VERIFY( re.flags() == f );
VERIFY( std::regex_match("abc", re) );
}
}

View File

@ -0,0 +1,37 @@
// { dg-do run { target c++11 } }
#include <regex>
#include <testsuite_iterators.h>
#include <testsuite_hooks.h>
void
test01()
{
signed char s[] = { 'a', '.' };
std::regex re(s, s+2); // This used to fail up to GCC 11.2
// VERIFY( regex_match("an", re) );
std::wstring str = L"xx";
str[0] = '1';
str[1] = '2';
re.assign(str.begin(), str.end());
VERIFY( regex_match("12", re) );
}
void
test02()
{
int i[] = { 'a', '.', '[', 'x', 'y', 'z', ']' };
__gnu_test::forward_container<int> fwd(i);
std::regex re(fwd.begin(), fwd.end());
VERIFY( regex_match("any", re) );
__gnu_test::input_container<int> input(i);
re.assign(input.begin(), input.end(), std::regex::icase);
VERIFY( regex_match("ANY", re) );
}
int main()
{
test01();
test02();
}