libstdc++: Avoid unwanted allocations in filesystem::path

When using COW strings, accessing _M_pathname[0] and similar non-const
accessors can cause the string to "leak", meaning it reallocates itself
if it shares ownership with another string object.

This causes test failures for --enable-fully-dynamic-string builds:
/home/jwakely/src/gcc/libstdc++-v3/testsuite/experimental/filesystem/path/construct/90634.cc:62: void test01(): Assertion 'bytes_allocated == 0' failed.
FAIL: experimental/filesystem/path/construct/90634.cc execution test

This FAIL happens because the fully-dynamic move constructor results in
shared ownership, so for path(std::move(std::string("foo"))) the
_M_pathname member shares ownership with the temporary, and the
non-const accesses in _M_split_cmpts() cause a new copy of the string to
be allocated. This un-sharing is wasteful, and entirely unnecessary when
sharing ownership with an rvalue that is about to release its ownership
anyway. Even for lvalues, sharing ownership is not a problem and
reallocating a unique copy of the string is wasteful.

This removes non-const accesses of _M_pathname in the
path::_M_split_cmpts() members.

libstdc++-v3/ChangeLog:

	* src/c++17/fs_path.cc (path::_M_split_cmpts()): Remove
	micro-optimization for "/" path.
	* src/filesystem/path.cc (path::_M_split_cmpts()): Only access
	the contents of _M_pathname using const member functions.
This commit is contained in:
Jonathan Wakely 2021-11-30 16:18:23 +00:00
parent 1e625a44f6
commit e9089e4fa9
2 changed files with 17 additions and 19 deletions

View File

@ -1872,11 +1872,6 @@ path::_M_split_cmpts()
_M_cmpts.type(_Type::_Filename); _M_cmpts.type(_Type::_Filename);
return; return;
} }
if (_M_pathname.length() == 1 && _M_pathname[0] == preferred_separator)
{
_M_cmpts.type(_Type::_Root_dir);
return;
}
_Parser parser(_M_pathname); _Parser parser(_M_pathname);

View File

@ -337,15 +337,18 @@ path::_M_split_cmpts()
_M_type = _Type::_Multi; _M_type = _Type::_Multi;
_M_cmpts.clear(); _M_cmpts.clear();
if (_M_pathname.empty()) // Use const-reference to access _M_pathname, to avoid "leaking" COW string.
const auto& pathname = _M_pathname;
if (pathname.empty())
return; return;
{ {
// Approximate count of components, to reserve space in _M_cmpts vector: // Approximate count of components, to reserve space in _M_cmpts vector:
int count = 1; int count = 1;
bool saw_sep_last = _S_is_dir_sep(_M_pathname[0]); bool saw_sep_last = _S_is_dir_sep(pathname[0]);
bool saw_non_sep = !saw_sep_last; bool saw_non_sep = !saw_sep_last;
for (value_type c : _M_pathname) for (value_type c : pathname)
{ {
if (_S_is_dir_sep(c)) if (_S_is_dir_sep(c))
saw_sep_last = true; saw_sep_last = true;
@ -363,13 +366,13 @@ path::_M_split_cmpts()
} }
size_t pos = 0; size_t pos = 0;
const size_t len = _M_pathname.size(); const size_t len = pathname.size();
// look for root name or root directory // look for root name or root directory
if (_S_is_dir_sep(_M_pathname[0])) if (_S_is_dir_sep(pathname[0]))
{ {
// look for root name, such as "//" or "//foo" // look for root name, such as "//" or "//foo"
if (len > 1 && _M_pathname[1] == _M_pathname[0]) if (len > 1 && pathname[1] == pathname[0])
{ {
if (len == 2) if (len == 2)
{ {
@ -378,11 +381,11 @@ path::_M_split_cmpts()
return; return;
} }
if (!_S_is_dir_sep(_M_pathname[2])) if (!_S_is_dir_sep(pathname[2]))
{ {
// got root name, find its end // got root name, find its end
pos = 3; pos = 3;
while (pos < len && !_S_is_dir_sep(_M_pathname[pos])) while (pos < len && !_S_is_dir_sep(pathname[pos]))
++pos; ++pos;
if (pos == len) if (pos == len)
{ {
@ -409,7 +412,7 @@ path::_M_split_cmpts()
++pos; ++pos;
} }
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS #ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
else if (len > 1 && _M_pathname[1] == L':') else if (len > 1 && pathname[1] == L':')
{ {
// got disk designator // got disk designator
if (len == 2) if (len == 2)
@ -418,7 +421,7 @@ path::_M_split_cmpts()
return; return;
} }
_M_add_root_name(2); _M_add_root_name(2);
if (len > 2 && _S_is_dir_sep(_M_pathname[2])) if (len > 2 && _S_is_dir_sep(pathname[2]))
_M_add_root_dir(2); _M_add_root_dir(2);
pos = 2; pos = 2;
} }
@ -426,9 +429,9 @@ path::_M_split_cmpts()
else else
{ {
size_t n = 1; size_t n = 1;
for (; n < _M_pathname.size() && !_S_is_dir_sep(_M_pathname[n]); ++n) for (; n < pathname.size() && !_S_is_dir_sep(pathname[n]); ++n)
{ } { }
if (n == _M_pathname.size()) if (n == pathname.size())
{ {
_M_type = _Type::_Filename; _M_type = _Type::_Filename;
return; return;
@ -438,7 +441,7 @@ path::_M_split_cmpts()
size_t back = pos; size_t back = pos;
while (pos < len) while (pos < len)
{ {
if (_S_is_dir_sep(_M_pathname[pos])) if (_S_is_dir_sep(pathname[pos]))
{ {
if (back != pos) if (back != pos)
_M_add_filename(back, pos - back); _M_add_filename(back, pos - back);
@ -450,7 +453,7 @@ path::_M_split_cmpts()
if (back != pos) if (back != pos)
_M_add_filename(back, pos - back); _M_add_filename(back, pos - back);
else if (_S_is_dir_sep(_M_pathname.back())) else if (_S_is_dir_sep(pathname.back()))
{ {
// [path.itr]/8 // [path.itr]/8
// "Dot, if one or more trailing non-root slash characters are present." // "Dot, if one or more trailing non-root slash characters are present."