libstdc++: Improve generated man pages for libstdc++

The man pages generated by Doxygen show internal header files, not the
standard headers that users actually care about. The run_doxygen script
uses the doc/doxygen/stdheader.cc program to address that, but it
doesn't work. It only tries to fix headers with underscores in the
names, which doesn't work for <bits/align.h> or <bits/fsteam.tcc>.  It
isn't prepared for the strings like "bits/stl_set\&.h" that are produced
by Doxygen. It doesn't know about many headers that have been added
since it was written. And the run_doxygen script fails to use its output
correctly to modify the man pages. Additionally, run_doxygen doesn't
know about new nested namespaces like std::filesystem and std::ranges.

This change rewrites the stdheader.cc program to do a better job of
finding the right header. The run_doxygen script now uses the just-built
compiler to build stdheader.cc and actually uses its output. And the
script now knows about other nested namespaces.

The stdheader.cc program might be unnecessary if we consistently used
@headername tags in the Doxygen comments, but we don't (and probably
never will).

A problem that remains after this change is that all the free function
defined in namespace std get dumped into a single man page for std(3),
without detailed descriptions. We don't even install that std(3) page,
but remove it before installation. That means only classes are
documented in man pages (including many internal ones that should not be
publicly documented such as _Deque_base and _Tuple_impl).

libstdc++-v3/ChangeLog:

	* doc/doxygen/stdheader.cc: Refactor. Use C++23. Add new
	headers.
	* scripts/run_doxygen: Fix post-processing of #include
	directives in man pages. Use new xg++ to compile helper program.
This commit is contained in:
Jonathan Wakely 2021-10-21 12:26:18 +01:00
parent 5a5d7c2c80
commit 394f60e6ed
2 changed files with 207 additions and 106 deletions

View File

@ -1,171 +1,256 @@
// This is a slow larval-stage kludge to help massage the generated man
// pages. It's used like this:
const char* const usage =
"\nTakes on stdin, whitespace-separated words of the form\n"
"\n"
" [bits/]stl_foo.h\n"
" [bits/]std_foo.h\n"
"\n"
"and writes on stdout the nearest matching standard header name.\n"
"\n"
"Takes no command-line arguments.\n"
"\n";
#include <cstdlib>
#include <string>
#include <string_view>
#include <map>
#include <set>
#include <algorithm>
#include <iterator>
#include <iostream>
typedef std::map<std::string, std::string> Map;
// This is a slow larval-stage kludge to help massage the generated man
// pages. It's used like this:
const std::string_view usage = R"(
Takes on stdin, whitespace-separated words of the form
Map headers;
[bits/]stl_foo.h
[bits/]std_foo.h
and writes on stdout the nearest matching standard header name.
Takes no command-line arguments.
)";
// List of standard headers
std::set<std::string_view> std_headers;
// Map of partial header filenames to standard headers.
std::map<std::string_view, std::string_view> headers;
void init_map()
{
// Enter the glamourous world of data entry!! Maintain these!
// Because the map_header function removes common prefixes and suffixes,
// a header "bits/st[dl]_foo.h" will automatically map to "foo" if that
// is a standard header, so we don't need to list those cases here.
headers["atomic_base.h"] = "atomic";
headers["atomic_lockfree_defines.h"] = "atomic";
headers["atomic_timed_wait.h"] = "atomic";
headers["atomic_wait.h"] = "atomic";
headers["algorithmfwd.h"] = "algorithm";
headers["algo.h"] = "algorithm";
headers["algobase.h"] = "algorithm";
headers["algorithm.h"] = "algorithm";
headers["ranges_algo.h"] = "algorithm";
headers["ranges_algobase.h"] = "algorithm";
headers["heap.h"] = "algorithm";
headers["bitset.h"] = "bitset";
headers["complex.h"] = "complex";
//headers["construct.h"] stl_construct.h entirely internal
headers["deque.h"] = "deque";
headers["deque.tcc"] = "deque";
headers["fstream.h"] = "fstream";
headers["fstream.tcc"] = "fstream";
headers["exception_ptr.h"] = "exception";
headers["nested_exception.h"] = "exception";
headers["fs_dir.h"] = "filesystem";
headers["fs_fwd.h"] = "filesystem";
headers["fs_ops.h"] = "filesystem";
headers["fs_path.h"] = "filesystem";
headers["binders.h"] = "functional";
headers["function.h"] = "functional";
headers["functional.h"] = "functional";
headers["iomanip.h"] = "iomanip";
headers["functional_hash.h"] = "functional";
headers["mofunc_impl.h"] = "functional";
headers["move_only_function.h"] = "functional";
headers["invoke.h"] = "functional";
headers["refwrap.h"] = "functional";
headers["quoted_string.h"] = "iomanip";
headers["ios_base.h"] = "ios";
headers["basic_ios.h"] = "ios";
headers["basic_ios.tcc"] = "ios";
headers["ios.h"] = "ios";
headers["iosfwd.h"] = "iosfwd";
headers["iostream.h"] = "iostream";
headers["istream.h"] = "istream";
headers["istream.tcc"] = "istream";
headers["iterator.h"] = "iterator";
headers["iterator_base_funcs.h"] = "iterator";
headers["iterator_base_types.h"] = "iterator";
headers["stream_iterator.h"] = "iterator";
headers["streambuf_iterator.h"] = "iterator";
headers["limits.h"] = "limits";
headers["list.h"] = "list";
headers["list.tcc"] = "list";
headers["iterator_concepts.h"] = "iterator";
headers["range_access.h"] = "iterator";
headers["codecvt.h"] = "locale";
headers["locale.h"] = "locale";
headers["c++locale.h"] = "locale";
headers["localefwd.h"] = "locale";
headers["ctype_base.h"] = "locale";
headers["locale_classes.h"] = "locale";
headers["locale_classes.tcc"] = "locale";
headers["locale_facets.h"] = "locale";
headers["locale_facets.tcc"] = "locale";
headers["map.h"] = "map";
headers["locale_facets_nonio.h"] = "locale";
headers["locale_facets_nonio.tcc"] = "locale";
headers["locale_conv.h"] = "locale";
headers["multimap.h"] = "map";
headers["memory.h"] = "memory";
headers["memoryfwd.h"] = "memory";
headers["align.h"] = "memory";
headers["alloc_traits.h"] = "memory";
headers["auto_ptr.h"] = "memory";
headers["construct.h"] = "memory";
headers["allocator.h"] = "memory";
headers["raw_storage_iter.h"] = "memory";
headers["tempbuf.h"] = "memory";
headers["uninitialized.h"] = "memory";
headers["numeric.h"] = "numeric";
headers["ostream.h"] = "ostream";
headers["ostream.tcc"] = "ostream";
headers["queue.h"] = "queue";
headers["set.h"] = "set";
headers["shared_ptr.h"] = "memory";
headers["shared_ptr_base.h"] = "memory";
headers["shared_ptr_atomic.h"] = "memory";
headers["unique_ptr.h"] = "memory";
headers["ranges_uninitialized.h"] = "memory";
headers["ptr_traits.h"] = "memory";
headers["uses_allocator.h"] = "memory";
headers["uses_allocator_args.h"] = "memory";
headers["unique_lock.h"] = "mutex";
headers["uniform_int_dist.h"] = "random";
headers["ranges_base.h"] = "ranges";
headers["ranges_util.h"] = "ranges";
headers["ranges_cmp.h"] = "functional";
headers["regex_automaton.h"] = "regex";
headers["regex_automaton.tcc"] = "regex";
headers["regex_compiler.h"] = "regex";
headers["regex_compiler.tcc"] = "regex";
headers["regex_constants.h"] = "regex";
headers["regex_error.h"] = "regex";
headers["regex_executor.h"] = "regex";
headers["regex_executor.tcc"] = "regex";
headers["regex_scanner.h"] = "regex";
headers["regex_scanner.tcc"] = "regex";
headers["semaphore_base.h"] = "semaphore";
headers["multiset.h"] = "set";
headers["sstream.h"] = "sstream";
headers["sstream.tcc"] = "sstream";
headers["stack.h"] = "stack";
headers["node_handle.h"] = "set";
headers["functexcept.h"] = "stdexcept";
headers["stdexcept.h"] = "stdexcept";
headers["streambuf.h"] = "streambuf";
headers["streambuf.tcc"] = "streambuf";
headers["string.h"] = "string";
headers["char_traits.h"] = "string";
headers["stringfwd.h"] = "string";
headers["postypes.h"] = "string";
headers["basic_string.h"] = "string";
headers["basic_string.tcc"] = "string";
headers["tree.h"] = "backward/tree.h";
headers["cow_string.h"] = "string";
headers["string_view.tcc"] = "string_view";
headers["this_thread_sleep.h"] = "thread";
headers["tree.h"] = "map";
headers["pair.h"] = "utility";
headers["utility.h"] = "utility";
headers["relops.h"] = "utility";
headers["gslice.h"] = "valarray";
headers["gslice_array.h"] = "valarray";
headers["indirect_array.h"] = "valarray";
headers["mask_array.h"] = "valarray";
headers["slice_array.h"] = "valarray";
headers["valarray.h"] = "valarray";
headers["valarray_after.h"] = "valarray";
headers["valarray_before.h"] = "valarray";
headers["valarray_array.h"] = "valarray";
headers["valarray_array.tcc"] = "valarray";
headers["valarray_meta.h"] = "valarray";
headers["bvector.h"] = "vector";
headers["vector.h"] = "vector";
headers["vector.tcc"] = "vector";
//headers["concurrence.h"] who knows
//headers["atomicity.h"] who knows
// C wrappers -- probably was an easier way to do these, but oh well
headers["cassert.h"] = "cassert";
headers["cctype.h"] = "cctype";
headers["cerrno.h"] = "cerrno";
headers["cfloat.h"] = "cfloat";
headers["climits.h"] = "climits";
headers["clocale.h"] = "clocale";
headers["cmath.h"] = "cmath";
headers["csetjmp.h"] = "csetjmp";
headers["csignal.h"] = "csignal";
headers["cstdarg.h"] = "cstdarg";
headers["cstddef.h"] = "cstddef";
headers["cstdio.h"] = "cstdio";
headers["cstdlib.h"] = "cstdlib";
headers["cstring.h"] = "cstring";
headers["ctime.h"] = "ctime";
headers["cwchar.h"] = "cwchar";
headers["cwctype.h"] = "cwctype";
headers["abs.h"] = "cstdlib";
headers["specfun.h"] = "cmath";
// This list is complete as of the October 2021 working draft.
std_headers = {
"algorithm", "any", "array", "atomic",
"barrier", "bit", "bitset",
"charconv", "chrono", "codecvt", "compare", "complex",
"concepts", "condition_variable", "coroutine",
"deque",
"exception", "execution",
"filesystem", "format", "forward_list", "fstream",
"functional", "future",
"initializer_list", "iomanip", "ios", "iosfwd",
"iostream", "istream", "iterator",
"latch", "limits", "list", "locale",
"map", "memory", "memory_resource", "mutex",
"new", "numbers", "numeric",
"optional", "ostream",
"queue",
"random", "ranges", "ratio", "regex",
"scoped_allocator", "semaphore", "set", "shared_mutex",
"source_location", "span", "spanstream", "sstream",
"stack", "stacktrace", "stdexcept", "stop_token",
"streambuf", "string", "string_view", "strstream",
"syncstream", "system_error",
"thread", "tuple", "typeindex", "typeinfo", "type_traits",
"unordered_map", "unordered_set", "utility",
"valarray", "variant", "vector", "version",
"cassert", "cctype", "cerrno", "cfenv", "cfloat",
"cinttypes", "climits", "clocale", "cmath", "csetjmp",
"csignal", "cstdarg", "cstddef", "cstdint", "cstdio",
"cstdlib", "cstring", "ctime", "cuchar", "cwchar",
"cwctype",
"assert.h", "ctype.h", "errno.h", "fenv.h", "float.h",
"inttypes.h", "limits.h", "locale.h", "math.h", "setjmp.h",
"signal.h", "stdarg.h", "stddef.h", "stdint.h", "stdio.h",
"stdlib.h", "string.h", "time.h", "uchar.h", "wchar.h",
"wctype.h",
};
// In case we missed any:
for (const auto& h : headers)
std_headers.insert(h.second);
}
void do_word (std::string const& longheader)
std::string_view map_header (std::string_view header)
{
std::string::size_type start = 0;
// if it doesn't contain a "." then it's already a std header
if (longheader.find(".") == std::string::npos)
if (!header.contains('.'))
{
std::cout << longheader << '\n';
return;
// make sure it's in the set:
std_headers.insert(header);
return header;
}
if (longheader.substr(start,5) == "bits/") start += 5;
if ((longheader.substr(start,4) == "stl_") ||
(longheader.substr(start,4) == "std_"))
{
start += 4;
}
for (std::string_view prefix : {"bits/", "stl_", "std_"})
if (header.starts_with(prefix))
header.remove_prefix(prefix.size());
// come on, gdb, find `p' already...
const char* p = longheader.substr(start).c_str();
Map::iterator word = headers.find(p);
if (word != headers.end())
std::cout << word->second << '\n';
else std::cout << "MAYBE_AN_ERROR_MESSAGE_HERE\n";
if (auto it = headers.find(header); it != headers.end())
return it->second;
for (std::string_view ext : {".h", ".tcc"})
if (header.ends_with(ext))
{
header.remove_suffix(ext.size());
break;
}
if (auto it = std_headers.find(header); it != std_headers.end())
return *it;
return {};
}
std::string map_header_or_complain (std::string header)
{
// For <experimental/xxx.h> and <tr1/xxx.h> try to map <xxx.h>
// then add the directory back to it.
if (header.contains('.'))
for (std::string_view dir : {"experimental/", "tr1/"})
if (header.starts_with(dir))
{
auto h = map_header(header.substr(dir.size()));
if (!h.empty())
return std::string(dir) + std::string(h);
return std::string(header);
}
if (auto mapped = map_header(header); !mapped.empty())
return std::string(mapped);
std::cerr << "Could not map <" << header << "> to a standard header\n";
return std::string(header);
}
int main (int argc, char**)
int main (int argc, char** argv)
{
if (argc > 1)
{
std::cerr << usage;
std::exit(0);
std::cerr << "Usage: " << argv[0] << '\n' << usage;
return 1;
}
init_map();
std::string w;
while (std::cin >> w)
do_word (w);
std::transform(std::istream_iterator<std::string>(std::cin), {},
std::ostream_iterator<std::string>(std::cout),
map_header_or_complain);
}

View File

@ -269,6 +269,8 @@ find . -name "* *" -print0 | xargs -0r rm # requires GNU tools
# man pages are for functions/types/other entities, not source files
# directly. who the heck would type "man foo.h" anyhow?
# FIXME: This also removes std.3 which is the only place that a lot of
# functions are documented. Should we keep it?
find . -name "[a-z]*" -a ! -name "std_*" -print | xargs rm
rm -f *.h.3 *.hpp.3 *config* *.cc.3 *.tcc.3 *_t.3
#rm ext_*.3 tr1_*.3 debug_*.3
@ -278,18 +280,23 @@ rm -f *.h.3 *.hpp.3 *config* *.cc.3 *.tcc.3 *_t.3
#find . -name "[a-z]*" -a ! -name "std_*" -print | xargs -i mv {} trash
#mv *.h.3 *config* *.cc.3 *.tcc.3 *_t.3 trash
gxx=$($builddir/scripts/testsuite_flags --build-cxx)
cppflags=$($builddir/scripts/testsuite_flags --build-includes)
cxxflags="-Og -g -std=gnu++23"
# Standardize the displayed header names. If anyone who knows perl cares
# enough to rewrite all this, feel free. This only gets run once a century,
# and I'm off getting coffee then anyhow, so I didn't care enough to make
# this super-fast.
g++ ${srcdir}/doc/doxygen/stdheader.cc -o ./stdheader
problematic=`egrep -l '#include <.*_.*>' [a-z]*.3`
$gxx $cppflags $cxxflags ${srcdir}/doc/doxygen/stdheader.cc -o ./stdheader || exit 1
# Doxygen outputs something like "\fC#include <unique_lock\&.h>\fP" and
# we want that internal header to be replaced with something like <mutex>.
problematic=`egrep -l '#include <.*h>' [a-z]*.3`
for f in $problematic; do
# this is also slow, but safe and easy to debug
oldh=`sed -n '/fC#include </s/.*<\(.*\)>.*/\1/p' $f`
newh=`echo $oldh | ./stdheader`
sed 's=${oldh}=${newh}=' $f > TEMP
mv TEMP $f
newh=`echo $oldh | sed 's/\\\\&\\././g' | ./stdheader`
sed "s=${oldh/\\/.}=${newh}=" $f > TEMP && mv TEMP $f
done
rm stdheader
@ -323,10 +330,14 @@ cp ${srcdir}/doc/doxygen/Intro.3 C++Intro.3
# Why didn't I do this at the start? Were rabid weasels eating my brain?
# Who the fsck would "man std_vector" when the class isn't named that?
# If no files match a glob, skip the for-loop:
shopt -s nullglob
# First, deal with nested namespaces.
for f in *chrono_*; do
newname=`echo $f | sed 's/chrono_/chrono::/'`
mv $f $newname
for ns in chrono filesystem ranges views literals; do
for f in std_${ns}_*; do
newname=`echo $f | sed "s/std_${ns}_/std::${ns}::/"`
mv $f $newname
done
done
for f in *__debug_*; do
newname=`echo $f | sed 's/__debug_/__debug::/'`
@ -413,11 +424,16 @@ for f in *_src_*; do
rm $f
done
# Remove all internal implementation details?
# rm std::_[A-Z]*.3 std::__detail*.3
shopt -u nullglob
# Also, for some reason, typedefs don't get their own man pages. Sigh.
for f in ios streambuf istream ostream iostream stringbuf \
istringstream ostringstream stringstream filebuf ifstream \
ofstream fstream string;
ofstream fstream string
do
echo ".so man3/std::basic_${f}.3" > std::${f}.3
echo ".so man3/std::basic_${f}.3" > std::w${f}.3