From 0fd1870171ae93d025808cd060ea6e9c47a31727 Mon Sep 17 00:00:00 2001 From: Paolo Carlini Date: Thu, 20 Nov 2003 19:24:03 +0000 Subject: [PATCH] re PR libstdc++/11602 (codecvt is slow.) 2003-11-20 Paolo Carlini PR libstdc++/11602 * config/locale/gnu/codecvt_members.cc (codecvt::do_out): Rewrite, using both wcsnrtombs and wcrtomb in a loop: the former is very fast, but stops if encounters a NUL. * testsuite/performance/wchar_t_out.cc: New, from the PR. From-SVN: r73769 --- libstdc++-v3/ChangeLog | 8 ++ .../config/locale/gnu/codecvt_members.cc | 95 ++++++++++--------- .../testsuite/performance/wchar_t_out.cc | 84 ++++++++++++++++ 3 files changed, 142 insertions(+), 45 deletions(-) create mode 100644 libstdc++-v3/testsuite/performance/wchar_t_out.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 330c0c49341..fad159db410 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,11 @@ +2003-11-20 Paolo Carlini + + PR libstdc++/11602 + * config/locale/gnu/codecvt_members.cc (codecvt::do_out): + Rewrite, using both wcsnrtombs and wcrtomb in a loop: the + former is very fast, but stops if encounters a NUL. + * testsuite/performance/wchar_t_out.cc: New, from the PR. + 2003-11-19 Paolo Carlini * config/locale/gnu/codecvt_members.cc (do_out): If diff --git a/libstdc++-v3/config/locale/gnu/codecvt_members.cc b/libstdc++-v3/config/locale/gnu/codecvt_members.cc index 54930fc0c4a..708941a9a35 100644 --- a/libstdc++-v3/config/locale/gnu/codecvt_members.cc +++ b/libstdc++-v3/config/locale/gnu/codecvt_members.cc @@ -48,56 +48,66 @@ namespace std extern_type*& __to_next) const { result __ret = ok; - // A temporary state must be used since the result of the last - // conversion may be thrown away. - state_type __tmp_state(__state); + state_type __tmp_state(__state); #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) __c_locale __old = __uselocale(_M_c_locale_codecvt); #endif - // The conversion must be done by calling wcrtomb in a loop rather - // than using wcsrtombs because wcsrtombs assumes that the input is - // zero-terminated. - - // Either we can upper bound the total number of external characters to - // something smaller than __to_end - __to or the conversion must be done - // using a temporary destination buffer since it is not possible to - // pass the size of the buffer to wcrtomb - if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0) - while (__from < __from_end) - { - const size_t __conv = wcrtomb(__to, *__from, &__tmp_state); - if (__conv == static_cast(-1)) - { - __ret = error; - break; - } - __state = __tmp_state; - __to += __conv; - __from++; - } - else + // wcsnrtombs is *very* fast but stops if encounters NUL characters: + // in case we fall back to wcrtomb and then continue, in a loop. + // NB: wcsnrtombs is a GNU extension + __from_next = __from; + __to_next = __to; + while (__from_next < __from_end && __to_next < __to_end + && __ret == ok) { - extern_type __buf[MB_LEN_MAX]; - while (__from < __from_end && __to < __to_end) + const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', + __from_end - __from_next); + if (!__from_chunk_end) + __from_chunk_end = __from_end; + + const intern_type* __tmp_from = __from_next; + const size_t __conv = wcsnrtombs(__to_next, &__from_next, + __from_chunk_end - __from_next, + __to_end - __to_next, &__state); + if (__conv == static_cast(-1)) { - const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state); + // In case of error, in order to stop at the exact place we + // have to start again from the beginning with a series of + // wcrtomb. + while (__tmp_from < __from_next) + __to_next += wcrtomb(__to_next, *__tmp_from++, &__tmp_state); + __state = __tmp_state; + __ret = error; + } + else if (__from_next && __from_next < __from_chunk_end) + { + __to_next += __conv; + __ret = partial; + } + else + { + __from_next = __from_chunk_end; + __to_next += __conv; + } + + if (__from_next < __from_end && __ret == ok) + { + extern_type __buf[MB_LEN_MAX]; + __tmp_state = __state; + const size_t __conv = wcrtomb(__buf, *__from_next, &__tmp_state); if (__conv == static_cast(-1)) + __ret = error; + else if (__conv > static_cast(__to_end - __to_next)) + __ret = partial; + else { - __ret = error; - break; + memcpy(__to_next, __buf, __conv); + __state = __tmp_state; + __to_next += __conv; + ++__from_next; } - else if (__conv > static_cast(__to_end - __to)) - { - __ret = partial; - break; - } - - memcpy(__to, __buf, __conv); - __state = __tmp_state; - __to += __conv; - __from++; } } @@ -105,11 +115,6 @@ namespace std __uselocale(__old); #endif - if (__ret == ok && __from < __from_end) - __ret = partial; - - __from_next = __from; - __to_next = __to; return __ret; } diff --git a/libstdc++-v3/testsuite/performance/wchar_t_out.cc b/libstdc++-v3/testsuite/performance/wchar_t_out.cc new file mode 100644 index 00000000000..4e5106817d0 --- /dev/null +++ b/libstdc++-v3/testsuite/performance/wchar_t_out.cc @@ -0,0 +1,84 @@ +// Copyright (C) 2003 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +#include +#include +#include +#include +#include +#include + +// libstdc++/11602 +int main(int argc, char** argv) +{ + using namespace std; + using namespace __gnu_test; + + time_counter time; + resource_counter resource; + const int iters = 300000; + + wchar_t wbuf[1024]; + char cbuf[1024]; + + wmemset(wbuf, L'a', 1024); + + // C (iconv) + iconv_t cd = iconv_open(nl_langinfo(CODESET), "WCHAR_T"); + start_counters(time, resource); + for (int i = 0; i < iters; ++i) + { + size_t inbytesleft = 1024 * sizeof(wchar_t); + size_t outbytesleft = 1024; + char* in = reinterpret_cast(wbuf); + char* out = cbuf; + iconv(cd, &in, &inbytesleft, &out, &outbytesleft); + } + stop_counters(time, resource); + iconv_close(cd); + report_performance(__FILE__, "C (iconv)", time, resource); + clear_counters(time, resource); + + // C++ (codecvt) + locale loc; + const codecvt& cvt = + use_facet >(loc); + mbstate_t state; + memset(&state, 0, sizeof(state)); + start_counters(time, resource); + for (int i = 0; i < iters; ++i) + { + const wchar_t* from_next; + char* to_next; + cvt.out(state, wbuf, wbuf + 1024, from_next, + cbuf, cbuf + 1024, to_next); + } + stop_counters(time, resource); + report_performance(__FILE__, "C++ (codecvt)", time, resource); + + return 0; +}