From c0ace69ec677d1f85f6a433c8fae2d4df6f75714 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 18 Oct 2018 20:57:25 +0100 Subject: [PATCH] PR libstdc++/87642 handle multibyte thousands separators from libc If a locale's THOUSANDS_SEP or MON_THOUSANDS_SEP string is not a single character we either need to narrow it to a single char or ignore it (and therefore disable digit grouping for that facet). PR libstdc++/87642 * config/locale/gnu/monetary_members.cc (moneypunct::_M_initialize_moneypunct): Use __narrow_multibyte_chars to convert multibyte thousands separators to a single char. * config/locale/gnu/numeric_members.cc (numpunct::_M_initialize_numpunct): Likewise. (__narrow_multibyte_chars): New function. From-SVN: r265286 --- libstdc++-v3/ChangeLog | 9 +++ .../config/locale/gnu/monetary_members.cc | 11 +++- .../config/locale/gnu/numeric_members.cc | 60 ++++++++++++++++++- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index cb9aac1a7c3..b26e1fcfd29 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,5 +1,14 @@ 2018-10-18 Jonathan Wakely + PR libstdc++/87642 + * config/locale/gnu/monetary_members.cc + (moneypunct::_M_initialize_moneypunct): Use + __narrow_multibyte_chars to convert multibyte thousands separators + to a single char. + * config/locale/gnu/numeric_members.cc + (numpunct::_M_initialize_numpunct): Likewise. + (__narrow_multibyte_chars): New function. + PR libstdc++/87641 * include/bits/valarray_array.h (__valarray_sum): Use first element to initialize accumulator instead of value-initializing it. diff --git a/libstdc++-v3/config/locale/gnu/monetary_members.cc b/libstdc++-v3/config/locale/gnu/monetary_members.cc index b3e7645385a..212c68dd501 100644 --- a/libstdc++-v3/config/locale/gnu/monetary_members.cc +++ b/libstdc++-v3/config/locale/gnu/monetary_members.cc @@ -207,6 +207,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } #endif + extern char __narrow_multibyte_chars(const char* s, __locale_t cloc); + template<> void moneypunct::_M_initialize_moneypunct(__c_locale __cloc, @@ -241,8 +243,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Named locale. _M_data->_M_decimal_point = *(__nl_langinfo_l(__MON_DECIMAL_POINT, __cloc)); - _M_data->_M_thousands_sep = *(__nl_langinfo_l(__MON_THOUSANDS_SEP, - __cloc)); + const char* thousands_sep = __nl_langinfo_l(__MON_THOUSANDS_SEP, + __cloc); + if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0') + _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep, + __cloc); + else + _M_data->_M_thousands_sep = *thousands_sep; // Check for NULL, which implies no fractional digits. if (_M_data->_M_decimal_point == '\0') diff --git a/libstdc++-v3/config/locale/gnu/numeric_members.cc b/libstdc++-v3/config/locale/gnu/numeric_members.cc index 1ede8fadbd0..faa35777cf3 100644 --- a/libstdc++-v3/config/locale/gnu/numeric_members.cc +++ b/libstdc++-v3/config/locale/gnu/numeric_members.cc @@ -30,11 +30,62 @@ #include #include +#include namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION + extern char __narrow_multibyte_chars(const char* s, __locale_t cloc); + +// This file might be compiled twice, but we only want to define this once. +#if ! _GLIBCXX_USE_CXX11_ABI + char + __narrow_multibyte_chars(const char* s, __locale_t cloc) + { + const char* codeset = __nl_langinfo_l(CODESET, cloc); + if (!strcmp(codeset, "UTF-8")) + { + // optimize for some known cases + if (!strcmp(s, "\u202F")) // NARROW NO-BREAK SPACE + return ' '; + if (!strcmp(s, "\u2019")) // RIGHT SINGLE QUOTATION MARK + return '\''; + if (!strcmp(s, "\u066C")) // ARABIC THOUSANDS SEPARATOR + return '\''; + } + + iconv_t cd = iconv_open("ASCII//TRANSLIT", codeset); + if (cd != (iconv_t)-1) + { + char c1; + size_t inbytesleft = strlen(s); + size_t outbytesleft = 1; + char* inbuf = const_cast(s); + char* outbuf = &c1; + size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + iconv_close(cd); + if (n != (size_t)-1) + { + cd = iconv_open(codeset, "ASCII"); + if (cd != (iconv_t)-1) + { + char c2; + inbuf = &c1; + inbytesleft = 1; + outbuf = &c2; + outbytesleft = 1; + n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + iconv_close(cd); + if (n != (size_t)-1) + return c2; + } + } + } + return '\0'; + } +#endif + template<> void numpunct::_M_initialize_numpunct(__c_locale __cloc) @@ -63,8 +114,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Named locale. _M_data->_M_decimal_point = *(__nl_langinfo_l(DECIMAL_POINT, __cloc)); - _M_data->_M_thousands_sep = *(__nl_langinfo_l(THOUSANDS_SEP, - __cloc)); + const char* thousands_sep = __nl_langinfo_l(THOUSANDS_SEP, __cloc); + + if (thousands_sep[0] != '\0' && thousands_sep[1] != '\0') + _M_data->_M_thousands_sep = __narrow_multibyte_chars(thousands_sep, + __cloc); + else + _M_data->_M_thousands_sep = *thousands_sep; // Check for NULL, which implies no grouping. if (_M_data->_M_thousands_sep == '\0')