diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index f6dcbdc472d..6beffdbb124 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,134 @@ +2007-09-11 Johannes Singler + Leonor Frias Moya + Felix Putze + Marius Elvert + Felix Bondarenko + Robert Geisberger + Robin Dapp + Benjamin Kosnik + + Add parallel mode. + * include/parallel: New. + * include/parallel/iterator.h: New. + * include/parallel/multiway_merge.h: New. + * include/parallel/parallel.h: New. + * include/parallel/algorithm + * include/parallel/find_selectors.h: New. + * include/parallel/losertree.h: New. + * include/parallel/list_partition.h: New. + * include/parallel/types.h: New. + * include/parallel/for_each.h: New. + * include/parallel/multiseq_selection.h: New. + * include/parallel/workstealing.h: New. + * include/parallel/base.h: New. + * include/parallel/par_loop.h: New. + * include/parallel/numeric + * include/parallel/features.h: New. + * include/parallel/quicksort.h: New. + * include/parallel/algorithmfwd.h: New. + * include/parallel/equally_split.h: New. + * include/parallel/compiletime_settings.h: New. + * include/parallel/for_each_selectors.h: New. + * include/parallel/basic_iterator.h: New. + * include/parallel/omp_loop_static.h: New. + * include/parallel/random_shuffle.h: New. + * include/parallel/balanced_quicksort.h: New. + * include/parallel/set_operations.h: New. + * include/parallel/tags.h: New. + * include/parallel/merge.h: New. + * include/parallel/tree.h: New. + * include/parallel/settings.h: New. + * include/parallel/unique_copy.h: New. + * include/parallel/multiway_mergesort.h: New. + * include/parallel/numericfwd.h: New. + * include/parallel/search.h: New. + * include/parallel/partition.h: New. + * include/parallel/compatibility.h: New. + * include/parallel/algobase.h: New. + * include/parallel/find.h: New. + * include/parallel/partial_sum.h: New. + * include/parallel/algo.h: New. + * include/parallel/omp_loop.h: New. + * include/parallel/queue.h: New. + * include/parallel/timing.h: New. + * include/parallel/sort.h: New. + * include/parallel/checkers.h: New. + * include/parallel/random_number.h: New. + * include/bits/algorithmfwd.h: New. + + * acinclude.m4 (GLIBCXX_ENABLE_PARALLEL): New. + * configure.host: Add atomic_flags. + * configure.ac: Export ATOMIC_FLAGS, call GLIBCXX_ENABLE_PARALLEL. + * src/Makefile.am: Add parallel_list rules. + * include/Makefile.am: Add parallel files. + * testsuite/Makefile.am (check-parallel): Add. + (check-performance-parallel): Add. + * config.h.in: Regenerate. + * configure: Same. + * libsupc++/Makefile.in: Same. + * testsuite/Makefile.in: Same. + * Makefile.in: Same. + * libmath/Makefile.in: Same. + * include/Makefile.in: Same. + * src/Makefile.in: Same. + * po/Makefile.in: Same. + + * config/abi/pre/gnu.ver: Export parallel list bits. + + * docs/html/parallel_mode.html: New. + * docs/html/documentation.html: Add link. + * docs/doxygen/user.cfg.in: Adjust for new files and directory. + * docs/doxygen/doxygroups.cc: Adjust namespace markup. + + * include/debug/set.h: Adjust for _GLIBCXX_STD_D or _P change. + * include/debug/bitset: Same. + * include/debug/multiset.h: Same. + * include/debug/vector: Same. + * include/debug/map.h: Same. + * include/debug/deque: Same. + * include/debug/list: Same. + * include/debug/debug.h: Same. + * include/debug/multimap.h: Same. + * include/std/algorithm: Same. + * include/std/numeric: Same. + * include/std/bitset: Same. + * include/std/string: Same. + * include/ext/hash_map: Same. + * include/ext/hash_set: Same. + * include/bits/stl_list.h: Same. + * include/bits/stl_map.h: Same. + * include/bits/stl_algobase.h: Same. + * include/bits/stl_set.h: Same. + * include/bits/stl_multimap.h: Same. + * include/bits/stl_vector.h: Same. + * include/bits/stl_numeric.h: Same. + * include/bits/stl_deque.h: Same. + * include/bits/stl_multiset.h: Same. + * include/bits/char_traits.h: Same. + * include/bits/stl_algo.h: Same. + * include/bits/c++config: Same. + * include/bits/vector.tcc: Same. + * include/bits/deque.tcc: Same. + * include/bits/stl_bvector.h: Same. + * include/bits/list.tcc: Same. + * src/list.cc: Same. + * src/parallel_list.cc: New. + + * testsuite/lib/libstdc++.exp (check_v3_target_parallel_mode): New. + * testsuite/lib/dg-options.exp (dg-require-parallel-mode): New. + * scripts/testsuite_flags.in (--cxxparallelflags): New. + * scripts/check_performance: Adjust. + * testsuite/25_algorithms/headers/parallel_algorithm.cc: New. + * testsuite/25_algorithms/headers/algorithm_parallel_mode.cc: New. + * testsuite/25_algorithms/headers/parallel_algorithm_mixed1.cc: New. + * testsuite/25_algorithms/headers/parallel_algorithm_mixed2.cc: New. + * testsuite/26_numerics/headers/numeric/parallel_numeric.cc: New. + * testsuite/26_numerics/headers/numeric/numeric_parallel_mode.cc: New. + * testsuite/26_numerics/headers/numeric/ + parallel_numeric_mixed1.cc: New. + * testsuite/26_numerics/headers/numeric/ + parallel_numeric_mixed2.cc: New. + 2007-09-11 Daniel Jacobowitz * testsuite/lib/libstdc++.exp (libstdc++_init): Revert part of diff --git a/libstdc++-v3/Makefile.in b/libstdc++-v3/Makefile.in index ac125e8d6f2..c3d4cc592e0 100644 --- a/libstdc++-v3/Makefile.in +++ b/libstdc++-v3/Makefile.in @@ -103,6 +103,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -140,6 +141,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index d29b7932a3e..a6343edb4e3 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -1725,6 +1725,23 @@ AC_DEFUN([GLIBCXX_ENABLE_CONCEPT_CHECKS], [ fi ]) +dnl +dnl Check for parallel mode pre-requisites, including OpenMP support. +dnl +dnl + Usage: GLIBCXX_ENABLE_PARALLEL +dnl +AC_DEFUN([GLIBCXX_ENABLE_PARALLEL], [ + + enable_parallel=no; + if test -f "${glibcxx_builddir}/../libgomp/omp.h"; then + enable_parallel=yes; + fi + + AC_MSG_CHECKING([for parallel mode support]) + AC_MSG_RESULT([$enable_parallel]) + GLIBCXX_CONDITIONAL(ENABLE_PARALLEL, test $enable_parallel = yes) +]) + dnl dnl Check for which I/O library to use: stdio, or something specific. @@ -2072,7 +2089,7 @@ AC_DEFUN([GLIBCXX_ENABLE_PCH], [ GLIBCXX_CONDITIONAL(GLIBCXX_BUILD_PCH, test $enable_libstdcxx_pch = yes) if test $enable_libstdcxx_pch = yes; then - glibcxx_PCHFLAGS="-include bits/stdtr1c++.h" + glibcxx_PCHFLAGS="-include bits/stdc++.h" else glibcxx_PCHFLAGS="" fi diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in index 647657c4f27..4a9ffb763cf 100644 --- a/libstdc++-v3/config.h.in +++ b/libstdc++-v3/config.h.in @@ -63,6 +63,9 @@ /* Define if EBADMSG exists. */ #undef HAVE_EBADMSG +/* Define if ECANCELED exists. */ +#undef HAVE_ECANCELED + /* Define to 1 if you have the header file. */ #undef HAVE_ENDIAN_H diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 12d21f1c8ec..cc6252e91c7 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -770,6 +770,13 @@ GLIBCXX_3.4.10 { _ZNKSt4hashISsEclESs; _ZNKSt4hashISt10error_codeEclES0_; + # for parallel mode + _ZNSt9__cxx199815_List_node_base4hook*; + _ZNSt9__cxx199815_List_node_base4swap*; + _ZNSt9__cxx199815_List_node_base6unhookEv; + _ZNSt9__cxx199815_List_node_base7reverseEv; + _ZNSt9__cxx199815_List_node_base8transfer*; + } GLIBCXX_3.4.9; # Symbols in the support library (libsupc++) have their own tag. diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 4b4a4443341..74d0aaea0a9 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -458,7 +458,7 @@ ac_includes_default="\ # include #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS libtool_VERSION multi_basedir build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar glibcxx_builddir glibcxx_srcdir toplevel_srcdir CC ac_ct_CC EXEEXT OBJEXT CXX ac_ct_CXX CFLAGS CXXFLAGS LN_S AS ac_ct_AS AR ac_ct_AR RANLIB ac_ct_RANLIB MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT CPP CPPFLAGS EGREP LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN NM lt_ECHO LDFLAGS CXXCPP enable_shared enable_static GLIBCXX_HOSTED_TRUE GLIBCXX_HOSTED_FALSE GLIBCXX_BUILD_PCH_TRUE GLIBCXX_BUILD_PCH_FALSE glibcxx_PCHFLAGS CSTDIO_H BASIC_FILE_H BASIC_FILE_CC check_msgfmt glibcxx_MOFILES glibcxx_POFILES glibcxx_localedir USE_NLS CLOCALE_H CMESSAGES_H CCODECVT_CC CCOLLATE_CC CCTYPE_CC CMESSAGES_CC CMONEY_CC CNUMERIC_CC CTIME_H CTIME_CC CLOCALE_CC CLOCALE_INTERNAL_H ALLOCATOR_H ALLOCATOR_NAME C_INCLUDE_DIR GLIBCXX_C_HEADERS_C_TRUE GLIBCXX_C_HEADERS_C_FALSE GLIBCXX_C_HEADERS_C_STD_TRUE GLIBCXX_C_HEADERS_C_STD_FALSE GLIBCXX_C_HEADERS_C_GLOBAL_TRUE GLIBCXX_C_HEADERS_C_GLOBAL_FALSE GLIBCXX_C_HEADERS_COMPATIBILITY_TRUE GLIBCXX_C_HEADERS_COMPATIBILITY_FALSE GLIBCXX_C_HEADERS_EXTRA_TRUE GLIBCXX_C_HEADERS_EXTRA_FALSE DEBUG_FLAGS GLIBCXX_BUILD_DEBUG_TRUE GLIBCXX_BUILD_DEBUG_FALSE EXTRA_CXX_FLAGS glibcxx_thread_h WERROR SECTION_FLAGS SECTION_LDFLAGS OPT_LDFLAGS LIBMATHOBJS LIBICONV LTLIBICONV SYMVER_FILE port_specific_symbol_files ENABLE_SYMVERS_TRUE ENABLE_SYMVERS_FALSE ENABLE_SYMVERS_GNU_TRUE ENABLE_SYMVERS_GNU_FALSE ENABLE_SYMVERS_GNU_NAMESPACE_TRUE ENABLE_SYMVERS_GNU_NAMESPACE_FALSE ENABLE_SYMVERS_DARWIN_TRUE ENABLE_SYMVERS_DARWIN_FALSE ENABLE_VISIBILITY_TRUE ENABLE_VISIBILITY_FALSE GLIBCXX_LDBL_COMPAT_TRUE GLIBCXX_LDBL_COMPAT_FALSE baseline_dir ATOMICITY_SRCDIR ATOMIC_WORD_SRCDIR CPU_DEFINES_SRCDIR ABI_TWEAKS_SRCDIR OS_INC_SRCDIR ERROR_CONSTANTS_SRCDIR glibcxx_prefixdir gxx_include_dir glibcxx_toolexecdir glibcxx_toolexeclibdir GLIBCXX_INCLUDES TOPLEVEL_INCLUDES OPTIMIZE_CXXFLAGS WARN_FLAGS LIBSUPCXX_PICFLAGS LIBOBJS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS libtool_VERSION multi_basedir build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar glibcxx_builddir glibcxx_srcdir toplevel_srcdir CC ac_ct_CC EXEEXT OBJEXT CXX ac_ct_CXX CFLAGS CXXFLAGS LN_S AS ac_ct_AS AR ac_ct_AR RANLIB ac_ct_RANLIB MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT CPP CPPFLAGS EGREP LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN NM lt_ECHO LDFLAGS CXXCPP enable_shared enable_static GLIBCXX_HOSTED_TRUE GLIBCXX_HOSTED_FALSE GLIBCXX_BUILD_PCH_TRUE GLIBCXX_BUILD_PCH_FALSE glibcxx_PCHFLAGS CSTDIO_H BASIC_FILE_H BASIC_FILE_CC check_msgfmt glibcxx_MOFILES glibcxx_POFILES glibcxx_localedir USE_NLS CLOCALE_H CMESSAGES_H CCODECVT_CC CCOLLATE_CC CCTYPE_CC CMESSAGES_CC CMONEY_CC CNUMERIC_CC CTIME_H CTIME_CC CLOCALE_CC CLOCALE_INTERNAL_H ALLOCATOR_H ALLOCATOR_NAME C_INCLUDE_DIR GLIBCXX_C_HEADERS_C_TRUE GLIBCXX_C_HEADERS_C_FALSE GLIBCXX_C_HEADERS_C_STD_TRUE GLIBCXX_C_HEADERS_C_STD_FALSE GLIBCXX_C_HEADERS_C_GLOBAL_TRUE GLIBCXX_C_HEADERS_C_GLOBAL_FALSE GLIBCXX_C_HEADERS_COMPATIBILITY_TRUE GLIBCXX_C_HEADERS_COMPATIBILITY_FALSE GLIBCXX_C_HEADERS_EXTRA_TRUE GLIBCXX_C_HEADERS_EXTRA_FALSE DEBUG_FLAGS GLIBCXX_BUILD_DEBUG_TRUE GLIBCXX_BUILD_DEBUG_FALSE ENABLE_PARALLEL_TRUE ENABLE_PARALLEL_FALSE EXTRA_CXX_FLAGS glibcxx_thread_h WERROR SECTION_FLAGS SECTION_LDFLAGS OPT_LDFLAGS LIBMATHOBJS LIBICONV LTLIBICONV SYMVER_FILE port_specific_symbol_files ENABLE_SYMVERS_TRUE ENABLE_SYMVERS_FALSE ENABLE_SYMVERS_GNU_TRUE ENABLE_SYMVERS_GNU_FALSE ENABLE_SYMVERS_GNU_NAMESPACE_TRUE ENABLE_SYMVERS_GNU_NAMESPACE_FALSE ENABLE_SYMVERS_DARWIN_TRUE ENABLE_SYMVERS_DARWIN_FALSE ENABLE_VISIBILITY_TRUE ENABLE_VISIBILITY_FALSE GLIBCXX_LDBL_COMPAT_TRUE GLIBCXX_LDBL_COMPAT_FALSE baseline_dir ATOMICITY_SRCDIR ATOMIC_WORD_SRCDIR ATOMIC_FLAGS CPU_DEFINES_SRCDIR ABI_TWEAKS_SRCDIR OS_INC_SRCDIR ERROR_CONSTANTS_SRCDIR glibcxx_prefixdir gxx_include_dir glibcxx_toolexecdir glibcxx_toolexeclibdir GLIBCXX_INCLUDES TOPLEVEL_INCLUDES OPTIMIZE_CXXFLAGS WARN_FLAGS LIBSUPCXX_PICFLAGS LIBOBJS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -13949,7 +13949,7 @@ echo "${ECHO_T}$enable_libstdcxx_pch" >&6 if test $enable_libstdcxx_pch = yes; then - glibcxx_PCHFLAGS="-include bits/stdtr1c++.h" + glibcxx_PCHFLAGS="-include bits/stdc++.h" else glibcxx_PCHFLAGS="" fi @@ -16468,6 +16468,19 @@ fi; echo "${ECHO_T}$enable_libstdcxx_debug" >&6 + + + enable_parallel=no; + if test -f "${glibcxx_builddir}/../libgomp/omp.h"; then + enable_parallel=yes; + fi + + echo "$as_me:$LINENO: checking for parallel mode support" >&5 +echo $ECHO_N "checking for parallel mode support... $ECHO_C" >&6 + echo "$as_me:$LINENO: result: $enable_parallel" >&5 +echo "${ECHO_T}$enable_parallel" >&6 + + echo "$as_me:$LINENO: checking for extra compiler flags for building" >&5 echo $ECHO_N "checking for extra compiler flags for building... $ECHO_C" >&6 # Check whether --enable-cxx-flags or --disable-cxx-flags was given. @@ -17279,7 +17292,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu # Fake what AC_TRY_COMPILE does. XXX Look at redoing this new-style. cat > conftest.$ac_ext << EOF -#line 17282 "configure" +#line 17295 "configure" int main() { // NB: _Atomic_word not necessarily int. @@ -93897,6 +93910,7 @@ done # Propagate the target-specific source directories through the build chain. ATOMICITY_SRCDIR=config/${atomicity_dir} ATOMIC_WORD_SRCDIR=config/${atomic_word_dir} +ATOMIC_FLAGS=${atomic_flags} CPU_DEFINES_SRCDIR=config/${cpu_defines_dir} OS_INC_SRCDIR=config/${os_include_dir} ERROR_CONSTANTS_SRCDIR=config/${error_constants_dir} @@ -93909,6 +93923,7 @@ ABI_TWEAKS_SRCDIR=config/${abi_tweaks_dir} + # Determine cross-compile flags and AM_CONDITIONALs. #AC_SUBST(GLIBCXX_IS_NATIVE) #AM_CONDITIONAL(CANADIAN, test $CANADIAN = yes) @@ -94004,6 +94019,17 @@ fi +if test $enable_parallel = yes; then + ENABLE_PARALLEL_TRUE= + ENABLE_PARALLEL_FALSE='#' +else + ENABLE_PARALLEL_TRUE='#' + ENABLE_PARALLEL_FALSE= +fi + + + + if test $enable_symvers != no; then ENABLE_SYMVERS_TRUE= ENABLE_SYMVERS_FALSE='#' @@ -94429,6 +94455,13 @@ echo "$as_me: error: conditional \"GLIBCXX_BUILD_DEBUG\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi +if test -z "${ENABLE_PARALLEL_TRUE}" && test -z "${ENABLE_PARALLEL_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"ENABLE_PARALLEL\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"ENABLE_PARALLEL\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi if test -z "${ENABLE_SYMVERS_TRUE}" && test -z "${ENABLE_SYMVERS_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"ENABLE_SYMVERS\" was never defined. Usually this means the macro was only invoked conditionally." >&5 @@ -95474,6 +95507,8 @@ s,@GLIBCXX_C_HEADERS_EXTRA_FALSE@,$GLIBCXX_C_HEADERS_EXTRA_FALSE,;t t s,@DEBUG_FLAGS@,$DEBUG_FLAGS,;t t s,@GLIBCXX_BUILD_DEBUG_TRUE@,$GLIBCXX_BUILD_DEBUG_TRUE,;t t s,@GLIBCXX_BUILD_DEBUG_FALSE@,$GLIBCXX_BUILD_DEBUG_FALSE,;t t +s,@ENABLE_PARALLEL_TRUE@,$ENABLE_PARALLEL_TRUE,;t t +s,@ENABLE_PARALLEL_FALSE@,$ENABLE_PARALLEL_FALSE,;t t s,@EXTRA_CXX_FLAGS@,$EXTRA_CXX_FLAGS,;t t s,@glibcxx_thread_h@,$glibcxx_thread_h,;t t s,@WERROR@,$WERROR,;t t @@ -95500,6 +95535,7 @@ s,@GLIBCXX_LDBL_COMPAT_FALSE@,$GLIBCXX_LDBL_COMPAT_FALSE,;t t s,@baseline_dir@,$baseline_dir,;t t s,@ATOMICITY_SRCDIR@,$ATOMICITY_SRCDIR,;t t s,@ATOMIC_WORD_SRCDIR@,$ATOMIC_WORD_SRCDIR,;t t +s,@ATOMIC_FLAGS@,$ATOMIC_FLAGS,;t t s,@CPU_DEFINES_SRCDIR@,$CPU_DEFINES_SRCDIR,;t t s,@ABI_TWEAKS_SRCDIR@,$ABI_TWEAKS_SRCDIR,;t t s,@OS_INC_SRCDIR@,$OS_INC_SRCDIR,;t t diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac index a89e21df8a8..72471f15795 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac @@ -107,6 +107,7 @@ GLIBCXX_ENABLE_C99([yes]) GLIBCXX_ENABLE_CONCEPT_CHECKS([no]) GLIBCXX_ENABLE_DEBUG_FLAGS(["-g3 -O0"]) GLIBCXX_ENABLE_DEBUG([no]) +GLIBCXX_ENABLE_PARALLEL GLIBCXX_ENABLE_CXX_FLAGS GLIBCXX_ENABLE_FULLY_DYNAMIC_STRING([no]) @@ -327,12 +328,14 @@ GLIBCXX_CONFIGURE_TESTSUITE # Propagate the target-specific source directories through the build chain. ATOMICITY_SRCDIR=config/${atomicity_dir} ATOMIC_WORD_SRCDIR=config/${atomic_word_dir} +ATOMIC_FLAGS=${atomic_flags} CPU_DEFINES_SRCDIR=config/${cpu_defines_dir} OS_INC_SRCDIR=config/${os_include_dir} ERROR_CONSTANTS_SRCDIR=config/${error_constants_dir} ABI_TWEAKS_SRCDIR=config/${abi_tweaks_dir} AC_SUBST(ATOMICITY_SRCDIR) AC_SUBST(ATOMIC_WORD_SRCDIR) +AC_SUBST(ATOMIC_FLAGS) AC_SUBST(CPU_DEFINES_SRCDIR) AC_SUBST(ABI_TWEAKS_SRCDIR) AC_SUBST(OS_INC_SRCDIR) diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host index b916abacc7e..4031b4e1e25 100644 --- a/libstdc++-v3/configure.host +++ b/libstdc++-v3/configure.host @@ -39,6 +39,9 @@ # atomic_word_dir location of atomic_word.h # defaults to generic. # +# atomic_flags extra flags to pass to use atomic instructions +# defaults to nothing. +# # cpu_defines_dir location of cpu_defines.h # defaults to generic. # @@ -71,6 +74,7 @@ c_model=c_global c_compatibility=no atomic_word_dir=cpu/generic +atomic_flags="" atomicity_dir="cpu/generic" cpu_defines_dir="cpu/generic" try_cpu=generic @@ -130,7 +134,8 @@ case "${host_cpu}" in esac -# Set specific CPU overrides for atomic_word_dir. Most can just use generic. +# Set specific CPU overrides for atomic_word_dir and atomic_flags. +# Most can just use generic. # THIS TABLE IS SORTED. KEEP IT THAT WAY. case "${host_cpu}" in alpha*) @@ -142,11 +147,15 @@ case "${host_cpu}" in ia64) atomic_word_dir=cpu/ia64 ;; + i[567]86 | x86_64) + atomic_flags="-march=native" + ;; powerpc* | rs6000) atomic_word_dir=cpu/powerpc ;; sparc* | ultrasparc) atomic_word_dir=cpu/sparc + atomic_flags="-mcpu=v9" ;; esac diff --git a/libstdc++-v3/docs/doxygen/doxygroups.cc b/libstdc++-v3/docs/doxygen/doxygroups.cc index 9824754efc5..68d142fdbfb 100644 --- a/libstdc++-v3/docs/doxygen/doxygroups.cc +++ b/libstdc++-v3/docs/doxygen/doxygroups.cc @@ -45,7 +45,7 @@ * export. Used only when anonymous namespaces cannot be substituted. */ /** @namespace __gnu_debug - * @brief GNU debug mode classes for public use. + * @brief GNU debug classes for public use. */ // // // // // // // // // // // // // // // // // // // // // // // // /** @addtogroup SGIextensions STL extensions from SGI diff --git a/libstdc++-v3/docs/doxygen/user.cfg.in b/libstdc++-v3/docs/doxygen/user.cfg.in index 9854f76a7a7..ab22db0635e 100644 --- a/libstdc++-v3/docs/doxygen/user.cfg.in +++ b/libstdc++-v3/docs/doxygen/user.cfg.in @@ -478,6 +478,7 @@ INPUT = @srcdir@/docs/doxygen/doxygroups.cc \ include/@host_alias@/bits \ include/bits \ include/debug \ + include/parallel \ include/ext \ include/tr1 \ include/tr1_impl \ @@ -562,6 +563,8 @@ INPUT = @srcdir@/docs/doxygen/doxygroups.cc \ include/ext/slist \ include/ext/pb_ds \ include/ext/pb_ds/detail \ + include/parallel/algorithm \ + include/parallel/numeric \ include/tr1/array \ include/tr1/ccomplex \ include/tr1/cctype \ diff --git a/libstdc++-v3/docs/html/documentation.html b/libstdc++-v3/docs/html/documentation.html index f211165cca7..2881caff57d 100644 --- a/libstdc++-v3/docs/html/documentation.html +++ b/libstdc++-v3/docs/html/documentation.html @@ -222,13 +222,15 @@ -
  • Extensions to the Standard Library +
  • Extensions diff --git a/libstdc++-v3/docs/html/parallel_mode.html b/libstdc++-v3/docs/html/parallel_mode.html new file mode 100644 index 00000000000..74db8ca3771 --- /dev/null +++ b/libstdc++-v3/docs/html/parallel_mode.html @@ -0,0 +1,457 @@ + + + + + + + + + + The libstdc++ parallel mode + + + + + +

    The libstdc++ parallel mode

    + +

    + The latest version of this document is always available at + + http://gcc.gnu.org/onlinedocs/libstdc++/parallel_mode.html. +

    + +

    + To the libstdc++-v3 homepage. +

    + + +
    +

    The libstdc++ parallel mode is an experimental parallel +implementation of many algorithms the C++ Standard Library. +

    + +

    +Several of the standard algorithms, for instance +std::search, are made parallel using OpenMP +annotations. These parallel mode constructs and can be invoked by +explicit source declaration or by compiling existing sources with a +specific compiler flag. +

    + +

    The libstdc++ parallel mode

    + +

    The libstdc++ parallel mode performs parallization of algorithms, +function objects, classes, and functions in the C++ Standard.

    + +

    Using the libstdc++ parallel mode

    + +

    To use the libstdc++ parallel mode, compile your application with + the compiler flag -D_GLIBCXX_PARALLEL -fopenmp. This + will link in libgomp, the GNU OpenMP implementation, + whose presence is mandatory. In addition, hardware capable of atomic + operations is de rigueur. Actually activating these atomic + operations may require explicit compiler flags on some targets + (like sparc and x86), such as -march=i686, + -march=native or -mcpu=v9. +

    + +

    Note that the _GLIBCXX_PARALLEL define may change the + sizes and behavior of standard class templates such as + std::search, and therefore one can only link code + compiled with parallel mode and code compiled without parallel mode + if no instantiation of a container is passed between the two + translation units. Parallel mode functionality has distinct linkage, + and cannot be confused with normal mode symbols.

    + + +

    The following library components in the include +<numeric> are included in the parallel mode:

    +
      +
    • std::accumulate
    • +
    • std::adjacent_difference
    • +
    • std::inner_product
    • +
    • std::partial_sum
    • +
    + +

    The following library components in the include +<algorithm> are included in the parallel mode:

    +
      +
    • std::adjacent_find
    • +
    • std::count
    • +
    • std::count_if
    • +
    • std::equal
    • +
    • std::find
    • +
    • std::find_if
    • +
    • std::find_first_of
    • +
    • std::for_each
    • +
    • std::generate
    • +
    • std::generate_n
    • +
    • std::lexicographical_compare
    • +
    • std::mismatch
    • +
    • std::search
    • +
    • std::search_n
    • +
    • std::transform
    • +
    • std::replace
    • +
    • std::replace_if
    • +
    • std::max_element
    • +
    • std::merge
    • +
    • std::min_element
    • +
    • std::nth_element
    • +
    • std::partial_sort
    • +
    • std::partition
    • +
    • std::random_shuffle
    • +
    • std::set_union
    • +
    • std::set_intersection
    • +
    • std::set_symmetric_difference
    • +
    • std::set_difference
    • +
    • std::sort
    • +
    • std::stable_sort
    • +
    • std::unique_copy
    • +
    + + +

    Using the parallel algorithms without parallel mode

    + +

    When it is not feasible to recompile your entire application, or + only specific algorithms need to be parallel-aware, individual + parallel algorithms can be made available explicitly. These + parallel algorithms are functionally equivalent to the standard + drop-in algorithms used in parallel mode, but they are available in + a separate namespace as GNU extensions and may be used in programs + compiled with either release mode or with parallel mode. The + following table provides the names and headers of the parallel + algorithms: +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AlgorithmHeaderParallel algorithmParallel header
    std::accumulate<numeric>__gnu_parallel::accumulate<parallel/numeric>
    std::adjacent_difference<numeric>__gnu_parallel::adjacent_difference<parallel/numeric>
    std::inner_product<numeric>__gnu_parallel::inner_product<parallel/numeric>
    std::partial_sum<numeric>__gnu_parallel::partial_sum<parallel/numeric>
    std::adjacent_find<algorithm>__gnu_parallel::adjacent_find<parallel/algorithm>
    std::count<algorithm>__gnu_parallel::count<parallel/algorithm>
    std::count_if<algorithm>__gnu_parallel::count_if<parallel/algorithm>
    std::equal<algorithm>__gnu_parallel::equal<parallel/algorithm>
    std::find<algorithm>__gnu_parallel::find<parallel/algorithm>
    std::find_if<algorithm>__gnu_parallel::find_if<parallel/algorithm>
    std::find_first_of<algorithm>__gnu_parallel::find_first_of<parallel/algorithm>
    std::for_each<algorithm>__gnu_parallel::for_each<parallel/algorithm>
    std::generate<algorithm>__gnu_parallel::generate<parallel/algorithm>
    std::generate_n<algorithm>__gnu_parallel::generate_n<parallel/algorithm>
    std::lexicographical_compare<algorithm>__gnu_parallel::lexicographical_compare<parallel/algorithm>
    std::mismatch<algorithm>__gnu_parallel::mismatch<parallel/algorithm>
    std::search<algorithm>__gnu_parallel::search<parallel/algorithm>
    std::search_n<algorithm>__gnu_parallel::search_n<parallel/algorithm>
    std::transform<algorithm>__gnu_parallel::transform<parallel/algorithm>
    std::replace<algorithm>__gnu_parallel::replace<parallel/algorithm>
    std::replace_if<algorithm>__gnu_parallel::replace_if<parallel/algorithm>
    std::max_element<algorithm>__gnu_parallel::max_element<parallel/algorithm>
    std::merge<algorithm>__gnu_parallel::merge<parallel/algorithm>
    std::min_element<algorithm>__gnu_parallel::min_element<parallel/algorithm>
    std::nth_element<algorithm>__gnu_parallel::nth_element<parallel/algorithm>
    std::partial_sort<algorithm>__gnu_parallel::partial_sort<parallel/algorithm>
    std::partition<algorithm>__gnu_parallel::partition<parallel/algorithm>
    std::random_shuffle<algorithm>__gnu_parallel::random_shuffle<parallel/algorithm>
    std::set_union<algorithm>__gnu_parallel::set_union<parallel/algorithm>
    std::set_intersection<algorithm>__gnu_parallel::set_intersection<parallel/algorithm>
    std::set_symmetric_difference<algorithm>__gnu_parallel::set_symmetric_difference<parallel/algorithm>
    std::set_difference<algorithm>__gnu_parallel::set_difference<parallel/algorithm>
    std::sort<algorithm>__gnu_parallel::sort<parallel/algorithm>
    std::stable_sort<algorithm>__gnu_parallel::stable_sort<parallel/algorithm>
    std::unique_copy<algorithm>__gnu_parallel::unique_copy<parallel/algorithm>
    + + +

    Parallel mode semantics

    +

    Something about exception safety, interaction with threads, +etc. Goal is to have the usual constraints of the STL with respect to +exception safety and threads, but add in support for parallel +computing.

    + +

    Something about compile-time settings and configuration, ie using +__gnu_parallel::Settings. XXX Up in the air.

    + +

    Interface basics and relevant namespaces

    + +

    Two namespaces contain the parallel mode: +std::__parallel and __gnu_parallel. +

    + +

    One namespace contain versions of code that are explicitly sequential: +__gnu_serial. +

    + +

    Parallel implementations of the sequential standard components are +defined in namespace std::__parallel. For instance, +std::transform from <algorithm> has a parallel +counterpart in std::__parallel::transform from +<parallel/algorithm>. In addition, these parallel +implementatations are injected into namespace +__gnu_parallel with using declarations. +

    + +

    Support and infrastructure is in namespace __gnu_parallel. +

    + +

    More information, and an organized index of types and functions +related to the parallel mode on a per-namespace basis, can be found in +the generated source documentation. +

    + +

    Testing

    + +

    Both the normal conformance and regression tests and the +supplemental performance tests work.

    + +

    To run the conformance and regression tests with the parallel mode +active,

    +make check-parallel + +

    The log and summary files for conformance testing are in the +testsuite/parallel directory.

    + +

    To run the performance tests with the parallel mode active,

    +make check-performance-parallel + +

    The result file for performance testing are in the +testsuite directory, in the file +libstdc++_performance.sum. In addition, the policy-based +containers have their own visualizations, which have additional +software dependencies than the usual bare-boned text file, and can be +generated by using the make doc-performance rule in the +testsuite's Makefile.

    + +

    Return to the top of the page or + to the libstdc++ homepage. +

    + + + + +
    +

    +See license.html for copying conditions. +Comments and suggestions are welcome, and may be sent to +the libstdc++ mailing list. +

    + + + + diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index 53ede2bc3f5..3dd11b480e7 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -70,6 +70,7 @@ std_headers = \ bits_srcdir = ${glibcxx_srcdir}/include/bits bits_builddir = ./bits bits_headers = \ + ${bits_srcdir}/algorithmfwd.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ @@ -730,6 +731,58 @@ debug_headers = \ ${debug_srcdir}/string \ ${debug_srcdir}/vector +# Parallel mode headers +parallel_srcdir = ${glibcxx_srcdir}/include/parallel +parallel_builddir = ./parallel +parallel_headers = \ + ${parallel_srcdir}/algorithm \ + ${parallel_srcdir}/algobase.h \ + ${parallel_srcdir}/algo.h \ + ${parallel_srcdir}/algorithm \ + ${parallel_srcdir}/algorithmfwd.h \ + ${parallel_srcdir}/balanced_quicksort.h \ + ${parallel_srcdir}/base.h \ + ${parallel_srcdir}/basic_iterator.h \ + ${parallel_srcdir}/checkers.h \ + ${parallel_srcdir}/compatibility.h \ + ${parallel_srcdir}/compiletime_settings.h \ + ${parallel_srcdir}/equally_split.h \ + ${parallel_srcdir}/features.h \ + ${parallel_srcdir}/find.h \ + ${parallel_srcdir}/find_selectors.h \ + ${parallel_srcdir}/for_each.h \ + ${parallel_srcdir}/for_each_selectors.h \ + ${parallel_srcdir}/iterator.h \ + ${parallel_srcdir}/list_partition.h \ + ${parallel_srcdir}/losertree.h \ + ${parallel_srcdir}/merge.h \ + ${parallel_srcdir}/multiseq_selection.h \ + ${parallel_srcdir}/multiway_merge.h \ + ${parallel_srcdir}/multiway_mergesort.h \ + ${parallel_srcdir}/numeric \ + ${parallel_srcdir}/numericfwd.h \ + ${parallel_srcdir}/omp_loop.h \ + ${parallel_srcdir}/omp_loop_static.h \ + ${parallel_srcdir}/parallel.h \ + ${parallel_srcdir}/par_loop.h \ + ${parallel_srcdir}/partial_sum.h \ + ${parallel_srcdir}/partition.h \ + ${parallel_srcdir}/queue.h \ + ${parallel_srcdir}/quicksort.h \ + ${parallel_srcdir}/random_number.h \ + ${parallel_srcdir}/random_shuffle.h \ + ${parallel_srcdir}/search.h \ + ${parallel_srcdir}/set_operations.h \ + ${parallel_srcdir}/settings.h \ + ${parallel_srcdir}/sort.h \ + ${parallel_srcdir}/tags.h \ + ${parallel_srcdir}/timing.h \ + ${parallel_srcdir}/tree.h \ + ${parallel_srcdir}/types.h \ + ${parallel_srcdir}/unique_copy.h \ + ${parallel_srcdir}/workstealing.h + + # Some of the different "C" header models need extra files. # Some "C" header schemes require the "C" compatibility headers. # For --enable-cheaders=c_std @@ -791,7 +844,8 @@ pch1_output_anchor = ${host_builddir}/stdc++.h pch1_output_installdir = ${host_installdir}/stdc++.h.gch pch1a_output = ${pch1_output_builddir}/O0g.gch pch1b_output = ${pch1_output_builddir}/O2g.gch -pch1_output = ${pch1a_output} ${pch1b_output} +pch1c_output = ${pch1_output_builddir}/O2gp.gch +pch1_output = ${pch1a_output} ${pch1b_output} ${pch1c_output} pch2_source = ${glibcxx_srcdir}/include/precompiled/stdtr1c++.h pch2_output_builddir = ${host_builddir}/stdtr1c++.h.gch @@ -823,7 +877,7 @@ endif allstamped = \ stamp-std stamp-bits stamp-c_base stamp-c_base_extra \ stamp-c_compatibility stamp-backward stamp-ext stamp-pb \ - stamp-tr1 stamp-tr1-impl stamp-debug stamp-host + stamp-tr1 stamp-tr1-impl stamp-debug stamp-parallel stamp-host # List of all files that are created by explicit building, editing, or # catenation. @@ -980,6 +1034,15 @@ stamp-debug: ${debug_headers} fi ;\ $(STAMP) stamp-debug +stamp-parallel: ${parallel_headers} + @if [ ! -d "${parallel_builddir}" ]; then \ + mkdir -p ${parallel_builddir} ;\ + fi ;\ + if [ ! -f stamp-parallel ]; then \ + (cd ${parallel_builddir} && @LN_S@ $? . || true) ;\ + fi ;\ + $(STAMP) stamp-parallel + stamp-${host_alias}: @if [ ! -d ${host_builddir} ]; then \ mkdir -p ${host_builddir} ;\ @@ -1091,7 +1154,7 @@ ${host_builddir}/gthr-default.h: ${toplevel_srcdir}/gcc/${glibcxx_thread_h} \ -e 's,^#include "\(.*\)",#include ,g' \ < ${toplevel_srcdir}/gcc/${glibcxx_thread_h} > $@ -# Build two precompiled C++ includes, stdc++.h.gch/*.gch +# Build three precompiled C++ includes, stdc++.h.gch/*.gch ${pch1a_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} if [ ! -d "${pch1_output_builddir}" ]; then \ mkdir -p ${pch1_output_builddir}; \ @@ -1106,6 +1169,14 @@ ${pch1b_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} $(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g ${pch1_source} -o $@ touch ${pch1_output_anchor} +${pch1c_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} + if [ ! -d "${pch1_output_builddir}" ]; then \ + mkdir -p ${pch1_output_builddir}; \ + fi; \ + CXX_PARALLEL_FLAGS="-fgomp -march=native"; + $(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g $(CXX_PARALLEL_FLAGS) ${pch1_source} -o $@ + touch ${pch1_output_anchor} + # Build a precompiled TR1 include, stdtr1c++.h.gch/O2.gch ${pch2_output}: ${pch2_source} ${pch1_output} if [ ! -d "${pch2_output_builddir}" ]; then \ @@ -1217,6 +1288,9 @@ install-headers: $(mkinstalldirs) $(DESTDIR)${gxx_include_dir}/${debug_builddir} for file in ${debug_headers}; do \ $(INSTALL_DATA) $${file} $(DESTDIR)${gxx_include_dir}/${debug_builddir}; done + $(mkinstalldirs) $(DESTDIR)${gxx_include_dir}/${parallel_builddir} + for file in ${parallel_headers}; do \ + $(INSTALL_DATA) $${file} $(DESTDIR)${gxx_include_dir}/${parallel_builddir}; done $(mkinstalldirs) $(DESTDIR)${host_installdir} for file in ${host_headers} ${host_headers_extra} \ ${thread_host_headers}; do \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 4f779a0de62..80a7c2409dd 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -71,6 +71,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -108,6 +109,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ @@ -316,6 +319,7 @@ std_headers = \ bits_srcdir = ${glibcxx_srcdir}/include/bits bits_builddir = ./bits bits_headers = \ + ${bits_srcdir}/algorithmfwd.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ @@ -965,6 +969,58 @@ debug_headers = \ ${debug_srcdir}/string \ ${debug_srcdir}/vector + +# Parallel mode headers +parallel_srcdir = ${glibcxx_srcdir}/include/parallel +parallel_builddir = ./parallel +parallel_headers = \ + ${parallel_srcdir}/algorithm \ + ${parallel_srcdir}/algobase.h \ + ${parallel_srcdir}/algo.h \ + ${parallel_srcdir}/algorithm \ + ${parallel_srcdir}/algorithmfwd.h \ + ${parallel_srcdir}/balanced_quicksort.h \ + ${parallel_srcdir}/base.h \ + ${parallel_srcdir}/basic_iterator.h \ + ${parallel_srcdir}/checkers.h \ + ${parallel_srcdir}/compatibility.h \ + ${parallel_srcdir}/compiletime_settings.h \ + ${parallel_srcdir}/equally_split.h \ + ${parallel_srcdir}/features.h \ + ${parallel_srcdir}/find.h \ + ${parallel_srcdir}/find_selectors.h \ + ${parallel_srcdir}/for_each.h \ + ${parallel_srcdir}/for_each_selectors.h \ + ${parallel_srcdir}/iterator.h \ + ${parallel_srcdir}/list_partition.h \ + ${parallel_srcdir}/losertree.h \ + ${parallel_srcdir}/merge.h \ + ${parallel_srcdir}/multiseq_selection.h \ + ${parallel_srcdir}/multiway_merge.h \ + ${parallel_srcdir}/multiway_mergesort.h \ + ${parallel_srcdir}/numeric \ + ${parallel_srcdir}/numericfwd.h \ + ${parallel_srcdir}/omp_loop.h \ + ${parallel_srcdir}/omp_loop_static.h \ + ${parallel_srcdir}/parallel.h \ + ${parallel_srcdir}/par_loop.h \ + ${parallel_srcdir}/partial_sum.h \ + ${parallel_srcdir}/partition.h \ + ${parallel_srcdir}/queue.h \ + ${parallel_srcdir}/quicksort.h \ + ${parallel_srcdir}/random_number.h \ + ${parallel_srcdir}/random_shuffle.h \ + ${parallel_srcdir}/search.h \ + ${parallel_srcdir}/set_operations.h \ + ${parallel_srcdir}/settings.h \ + ${parallel_srcdir}/sort.h \ + ${parallel_srcdir}/tags.h \ + ${parallel_srcdir}/timing.h \ + ${parallel_srcdir}/tree.h \ + ${parallel_srcdir}/types.h \ + ${parallel_srcdir}/unique_copy.h \ + ${parallel_srcdir}/workstealing.h + @GLIBCXX_C_HEADERS_EXTRA_FALSE@c_base_headers_extra = # Some of the different "C" header models need extra files. @@ -1020,7 +1076,8 @@ pch1_output_anchor = ${host_builddir}/stdc++.h pch1_output_installdir = ${host_installdir}/stdc++.h.gch pch1a_output = ${pch1_output_builddir}/O0g.gch pch1b_output = ${pch1_output_builddir}/O2g.gch -pch1_output = ${pch1a_output} ${pch1b_output} +pch1c_output = ${pch1_output_builddir}/O2gp.gch +pch1_output = ${pch1a_output} ${pch1b_output} ${pch1c_output} pch2_source = ${glibcxx_srcdir}/include/precompiled/stdtr1c++.h pch2_output_builddir = ${host_builddir}/stdtr1c++.h.gch pch2_output_anchor = ${host_builddir}/stdtr1c++.h @@ -1047,7 +1104,7 @@ PCHFLAGS = -Winvalid-pch -Wno-deprecated -x c++-header $(CXXFLAGS) allstamped = \ stamp-std stamp-bits stamp-c_base stamp-c_base_extra \ stamp-c_compatibility stamp-backward stamp-ext stamp-pb \ - stamp-tr1 stamp-tr1-impl stamp-debug stamp-host + stamp-tr1 stamp-tr1-impl stamp-debug stamp-parallel stamp-host # List of all files that are created by explicit building, editing, or @@ -1374,6 +1431,15 @@ stamp-debug: ${debug_headers} fi ;\ $(STAMP) stamp-debug +stamp-parallel: ${parallel_headers} + @if [ ! -d "${parallel_builddir}" ]; then \ + mkdir -p ${parallel_builddir} ;\ + fi ;\ + if [ ! -f stamp-parallel ]; then \ + (cd ${parallel_builddir} && @LN_S@ $? . || true) ;\ + fi ;\ + $(STAMP) stamp-parallel + stamp-${host_alias}: @if [ ! -d ${host_builddir} ]; then \ mkdir -p ${host_builddir} ;\ @@ -1476,7 +1542,7 @@ ${host_builddir}/gthr-default.h: ${toplevel_srcdir}/gcc/${glibcxx_thread_h} \ -e 's,^#include "\(.*\)",#include ,g' \ < ${toplevel_srcdir}/gcc/${glibcxx_thread_h} > $@ -# Build two precompiled C++ includes, stdc++.h.gch/*.gch +# Build three precompiled C++ includes, stdc++.h.gch/*.gch ${pch1a_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} if [ ! -d "${pch1_output_builddir}" ]; then \ mkdir -p ${pch1_output_builddir}; \ @@ -1491,6 +1557,14 @@ ${pch1b_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} $(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g ${pch1_source} -o $@ touch ${pch1_output_anchor} +${pch1c_output}: ${allstamped} ${host_builddir}/c++config.h ${pch1_source} + if [ ! -d "${pch1_output_builddir}" ]; then \ + mkdir -p ${pch1_output_builddir}; \ + fi; \ + CXX_PARALLEL_FLAGS="-fgomp -march=native"; + $(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g $(CXX_PARALLEL_FLAGS) ${pch1_source} -o $@ + touch ${pch1_output_anchor} + # Build a precompiled TR1 include, stdtr1c++.h.gch/O2.gch ${pch2_output}: ${pch2_source} ${pch1_output} if [ ! -d "${pch2_output_builddir}" ]; then \ @@ -1599,6 +1673,9 @@ install-headers: $(mkinstalldirs) $(DESTDIR)${gxx_include_dir}/${debug_builddir} for file in ${debug_headers}; do \ $(INSTALL_DATA) $${file} $(DESTDIR)${gxx_include_dir}/${debug_builddir}; done + $(mkinstalldirs) $(DESTDIR)${gxx_include_dir}/${parallel_builddir} + for file in ${parallel_headers}; do \ + $(INSTALL_DATA) $${file} $(DESTDIR)${gxx_include_dir}/${parallel_builddir}; done $(mkinstalldirs) $(DESTDIR)${host_installdir} for file in ${host_headers} ${host_headers_extra} \ ${thread_host_headers}; do \ diff --git a/libstdc++-v3/include/bits/algorithmfwd.h b/libstdc++-v3/include/bits/algorithmfwd.h new file mode 100644 index 00000000000..1b2bf23ace6 --- /dev/null +++ b/libstdc++-v3/include/bits/algorithmfwd.h @@ -0,0 +1,597 @@ +// declarations -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +/** @file bits/algorithmfwd.h + * This is an internal header file, included by other library headers. + * You should not attempt to use it directly. + */ + + +/* + adjacent_find + binary_search + copy + copy_backward + count + count_if + equal + equal_range + fill + fill_n + find + find_end + find_first_of + find_if + for_each + generate + generate_n + includes + inplace_merge + iter_swap + lexicographical_compare + lower_bound + make_heap + max + max_element + merge + min + min_element + mismatch + next_permutation + nth_element + parital_sort + partial_sort_copy + partition + pop_heap + prev_permutation + push_heap + random_shuffle + remove + remove_copy + remove_copy_if + remove_if + replace + replace_copy + replace_copy_if + replace_if + reverse + reverse_copy + rotate + rotate_copy + search + search_n + set_differernce + set_intersection + set_symmetric_difference + set_union + sort + sort_heap + stable_partition + stable_sort + stable_sort + swap + swap_ranges + transform + unique + unique_copy + upper_bound +*/ + +#ifndef _GLIBCXX_ALGORITHMFWD_H +#define _GLIBCXX_ALGORITHMFWD_H 1 + +#pragma GCC system_header + +#include +#include +#include + +_GLIBCXX_BEGIN_NAMESPACE(std) + + // adjacent_find + + template + bool + binary_search(_FIter, _FIter, const _Tp&); + + template + bool + binary_search(_FIter, _FIter, const _Tp&, _Compare); + + template + _OIter + copy(_IIter, _IIter, _OIter); + + template + _BIter2 + copy_backward (_BIter1, _BIter1, _BIter2); + + // count + // count_if + + template + pair<_FIter, _FIter> + equal_range(_FIter, _FIter, const _Tp&); + + template + pair<_FIter, _FIter> + equal_range(_FIter, _FIter, const _Tp&, _Compare); + + template + void + fill(_FIter, _FIter, const _Tp&); + +/* + XXX NB: return type different from ISO C++. + template + void + fill_n(_OIter, _Size, const _Tp&); +*/ + + template + _OIter + fill_n(_OIter, _Size, const _Tp&); + + // find + // find_end + // find_first_of + // find_if + // for_each + // generate + // generate_n + + template + bool + includes(_IIter1, _IIter1, _IIter2, _IIter2); + + template + bool + includes(_IIter1, _IIter1, _IIter2, _IIter2, _Compare); + + template + void + inplace_merge(_BIter, _BIter, _BIter); + + template + void + inplace_merge(_BIter, _BIter, _BIter, _Compare); + + template + void + iter_swap(_FIter1, _FIter2); + + // Specializations for char and unsigned char. + inline bool + lexicographical_compare(const unsigned char*, const unsigned char*, + const unsigned char*, const unsigned char*); + + inline bool + lexicographical_compare(const char*, const char*, const char*, const char*); + + template + _FIter + lower_bound(_FIter, _FIter, const _Tp&); + + template + _FIter + lower_bound(_FIter, _FIter, const _Tp&, _Compare); + + template + void + make_heap(_RAIter, _RAIter); + + template + void + make_heap(_RAIter, _RAIter, _Compare); + + template + const _Tp& + max(const _Tp&, const _Tp&); + + template + const _Tp& + max(const _Tp&, const _Tp&, _Compare); + + // max_element + // merge + + template + const _Tp& + min(const _Tp&, const _Tp&); + + template + const _Tp& + min(const _Tp&, const _Tp&, _Compare); + + // min_element + // mismatch + + template + bool + next_permutation(_BIter, _BIter); + + template + bool + next_permutation(_BIter, _BIter, _Compare); + + // nth_element + // partial_sort + + template + _RAIter + partial_sort_copy(_IIter, _IIter, _RAIter, _RAIter); + + template + _RAIter + partial_sort_copy(_IIter, _IIter, _RAIter, _RAIter, _Compare); + + template + void + pop_heap(_RAIter, _RAIter); + + template + void + pop_heap(_RAIter, _RAIter, _Compare); + + template + bool + prev_permutation(_BIter, _BIter); + + template + bool + prev_permutation(_BIter, _BIter, _Compare); + + template + void + push_heap(_RAIter, _RAIter); + + template + void + push_heap(_RAIter, _RAIter, _Compare); + + // random_shuffle + + template + _FIter + remove(_FIter, _FIter, const _Tp&); + + template + _FIter + remove_if(_FIter, _FIter, _Predicate); + + template + _OIter + remove_copy(_IIter, _IIter, _OIter, const _Tp&); + + template + _OIter + remove_copy_if(_IIter, _IIter, _OIter, _Predicate); + + // replace + + template + _OIter + replace_copy(_IIter, _IIter, _OIter, const _Tp&, const _Tp&); + + template + _OIter + replace_copy_if(_Iter, _Iter, _OIter, _Predicate, const _Tp&); + + // replace_if + + template + void + reverse(_BIter, _BIter); + + template + _OIter + reverse_copy(_BIter, _BIter, _OIter); + + template + void + rotate(_FIter, _FIter, _FIter); + + template + _OIter + rotate_copy (_FIter, _FIter, _FIter, _OIter); + + // search + // search_n + // set_difference + // set_intersection + // set_symmetric_difference + // set_union + + template + void + sort_heap(_RAIter, _RAIter); + + template + void + sort_heap(_RAIter, _RAIter, _Compare); + + template + _BIter + stable_partition(_BIter, _BIter, _Predicate); + + template + void + swap(_Tp&, _Tp& b); + + template + _FIter2 + swap_ranges(_FIter1 first1, _FIter1, _FIter2); + + // transform + + template + _FIter + unique(_FIter, _FIter); + + template + _FIter + unique(_FIter, _FIter, _BinaryPredicate); + + // unique_copy + + template + _FIter + upper_bound(_FIter, _FIter, const _Tp&); + + template + _FIter + upper_bound(_FIter, _FIter, const _Tp&, _Compare); + +_GLIBCXX_END_NAMESPACE + +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_P) + + template + _FIter + adjacent_find(_FIter, _FIter); + + template + _FIter + adjacent_find(_FIter, _FIter, _BinaryPredicate); + + template + typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const _Tp&); + + template + typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, _Predicate); + + template + bool + equal(_IIter1, _IIter1, _IIter2); + + template + bool + equal(_IIter1, _IIter1, _IIter2, _BinaryPredicate); + + template + _IIter + find(_IIter, _IIter, const _Tp&); + + template + _FIter1 + find_end(_FIter1, _FIter1, _FIter2, _FIter2); + + template + _FIter1 + find_end(_FIter1, _FIter1, _FIter2, _FIter2, _BinaryPredicate); + + template + _FIter1 + find_first_of(_FIter1, _FIter1, _FIter2, _FIter2); + + template + _FIter1 + find_first_of(_FIter1, _FIter1, _FIter2, _FIter2, _BinaryPredicate); + + template + _IIter + find_if(_IIter, _IIter, _Predicate); + + template + _Funct + for_each(_IIter, _IIter, _Funct); + + template + void + generate(_FIter, _FIter, _Generator); + + template + void + generate_n(_OIter, _Size, _Generator); + + template + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2); + + template + bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, _Compare); + + template + _FIter + max_element(_FIter, _FIter); + + template + _FIter + max_element(_FIter, _FIter, _Compare); + + template + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template + _FIter + min_element(_FIter, _FIter); + + template + _FIter + min_element(_FIter, _FIter, _Compare); + + template + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2); + + template + pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, _BinaryPredicate); + + template + void + nth_element(_RAIter, _RAIter, _RAIter); + + template + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare); + + template + _BIter + partition(_BIter, _BIter, _Predicate); + + template + void + random_shuffle(_RAIter, _RAIter); + + template + void + random_shuffle(_RAIter, _RAIter, _Generator&); + + template + void + replace(_FIter, _FIter, const _Tp&, const _Tp&); + + template + void + replace_if(_FIter, _FIter, _Predicate, const _Tp&); + + template + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2); + + template + _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, _BinaryPredicate); + + template + _FIter + search_n(_FIter, _FIter, _Size, const _Tp&); + + template + _FIter + search_n(_FIter, _FIter, _Size, const _Tp&, _BinaryPredicate); + + template + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, + _OIter, _Compare); + + template + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template + void + sort(_RAIter, _RAIter); + + template + void + sort(_RAIter, _RAIter, _Compare); + + template + void + stable_sort(_RAIter, _RAIter); + + template + void + stable_sort(_RAIter, _RAIter, _Compare); + + template + _OIter + transform(_IIter, _IIter, _OIter, _UnaryOperation); + + template + _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, _BinaryOperation); + + template + _OIter + unique_copy(_IIter, _IIter, _OIter); + + template + _OIter + unique_copy(_IIter, _IIter, _OIter, _BinaryPredicate); + +_GLIBCXX_END_NESTED_NAMESPACE + +#ifdef _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL +# include +#endif + +#endif + diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config index a51901821ac..262834560d1 100644 --- a/libstdc++-v3/include/bits/c++config +++ b/libstdc++-v3/include/bits/c++config @@ -45,16 +45,7 @@ // The current version of the C++ library in compressed ISO date format. #define __GLIBCXX__ -// Macro used to indicate that the native "C" includes, when compiled -// as "C++", have declarations in namespace std and not the global -// namespace. Note, this is unrelated to possible "C" compatibility -// includes that inject C90/C99 names into the global namespace. -// XXX May not be necessary -#if __cplusplus == 199711L -# define _GLIBCXX_NAMESPACE_GLOBAL_INJECTION 1 -#endif - -// Macros for visibility. +// Macros for visibility support. #define _GLIBCXX_HAVE_ATTRIBUTE_VISIBILITY #if _GLIBCXX_HAVE_ATTRIBUTE_VISIBILITY @@ -63,36 +54,178 @@ # define _GLIBCXX_VISIBILITY(V) #endif -// Macros for controlling various namespace association schemes and modes. +// Guide to libstdc++ namespaces. +/* + namespace std + { + namespace __debug { } + namespace __parallel { } + namespace __norm { } // __normative, __shadow, __replaced + namespace __cxx1998 { } + + namespace tr1 { } + } + + namespace __gnu_cxx + { + namespace __debug { } + namespace __norm { } + } +*/ + +// Macros for activating various namespace association modes. // _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG -// _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION -// _GLIBCXX_NAMESPACE_ASSOCIATION_CXX200x +// _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL +// _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION + #ifdef _GLIBCXX_DEBUG # define _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG 1 #endif +#ifdef _GLIBCXX_PARALLEL +# define _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL 1 +#endif + #define _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION -// Macros for namespace scope. +// Defined if any namespace association modes are active. +#if _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG \ + || _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL \ + || _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION +# define _GLIBCXX_USE_NAMESPACE_ASSOCIATION 1 +#endif + +// Macros for namespace scope. Either namespace std:: or __gnu_cxx::, +// or the name of some nested namespace within it. +// _GLIBCXX_STD +// _GLIBCXX_STD_D +// _GLIBCXX_STD_P +// _GLIBCXX_EXT +// _GLIBCXX_EXT_D +// _GLIBCXX_EXT_P + +// +// Macros for enclosing namepaces and possibly nested namespaces. // _GLIBCXX_BEGIN_NAMESPACE // _GLIBCXX_END_NAMESPACE // _GLIBCXX_BEGIN_NESTED_NAMESPACE // _GLIBCXX_END_NESTED_NAMESPACE -#if _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION +// _GLIBCXX_BEGIN_POTENTIAL_NESTED_NAMESPACE +// _GLIBCXX_END_POTENTIAL_NESTED_NAMESPACE +#ifndef _GLIBCXX_USE_NAMESPACE_ASSOCIATION +# define _GLIBCXX_STD_D _GLIBCXX_STD +# define _GLIBCXX_STD_P _GLIBCXX_STD +# define _GLIBCXX_STD std +# define _GLIBCXX_EXT_D _GLIBCXX_EXT +# define _GLIBCXX_EXT_P _GLIBCXX_EXT +# define _GLIBCXX_EXT __gnu_cxx +# define _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, Y) _GLIBCXX_BEGIN_NAMESPACE(X) +# define _GLIBCXX_END_NESTED_NAMESPACE _GLIBCXX_END_NAMESPACE +# define _GLIBCXX_BEGIN_NAMESPACE(X) namespace X _GLIBCXX_VISIBILITY(default) { +# define _GLIBCXX_END_NAMESPACE } +#else + +# if _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION // && not anything else +# define _GLIBCXX_STD_D _GLIBCXX_STD +# define _GLIBCXX_STD_P _GLIBCXX_STD +# define _GLIBCXX_STD _6 +# define _GLIBCXX_EXT _6 +# define _GLIBCXX_BEGIN_NAMESPACE(X) _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, _6) +# define _GLIBCXX_END_NAMESPACE _GLIBCXX_END_NESTED_NAMESPACE +# endif + +// debug +# if _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG && !_GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL +# define _GLIBCXX_STD_D __norm +# define _GLIBCXX_STD_P _GLIBCXX_STD +# define _GLIBCXX_STD __cxx1998 +# define _GLIBCXX_EXT_D __norm +# define _GLIBCXX_EXT_P _GLIBCXX_EXT +# define _GLIBCXX_EXT __cxx1998 +# define _GLIBCXX_BEGIN_NAMESPACE(X) namespace X _GLIBCXX_VISIBILITY(default) { +# define _GLIBCXX_END_NAMESPACE } +# define _GLIBCXX_EXTERN_TEMPLATE 0 +# endif + +// parallel +# if _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL && !_GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG +# define _GLIBCXX_STD_D _GLIBCXX_STD +# define _GLIBCXX_STD_P __norm +# define _GLIBCXX_STD __cxx1998 +# define _GLIBCXX_EXT_D _GLIBCXX_EXT +# define _GLIBCXX_EXT_P __norm +# define _GLIBCXX_EXT __cxx1998 +# define _GLIBCXX_BEGIN_NAMESPACE(X) namespace X _GLIBCXX_VISIBILITY(default) { +# define _GLIBCXX_END_NAMESPACE } +# define _GLIBCXX_EXTERN_TEMPLATE 0 +# endif + +// debug + parallel +# if _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL && _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG +# define _GLIBCXX_STD_D __norm +# define _GLIBCXX_STD_P __norm +# define _GLIBCXX_STD __cxx1998 +# define _GLIBCXX_EXT_D __norm +# define _GLIBCXX_EXT_P __norm +# define _GLIBCXX_EXT __gnu_cxx +# define _GLIBCXX_BEGIN_NAMESPACE(X) namespace X _GLIBCXX_VISIBILITY(default) { +# define _GLIBCXX_END_NAMESPACE } +# define _GLIBCXX_EXTERN_TEMPLATE 0 +# endif + +# if __NO_INLINE__ && !__GXX_WEAK__ +# warning currently using namepace associated mode which may fail \ + without inlining due to lack of weak symbols +# endif + # define _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, Y) namespace X { namespace Y _GLIBCXX_VISIBILITY(default) { # define _GLIBCXX_END_NESTED_NAMESPACE } } -# define _GLIBCXX_BEGIN_NAMESPACE(X) _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, _6) -# define _GLIBCXX_END_NAMESPACE _GLIBCXX_END_NESTED_NAMESPACE -#else -# define _GLIBCXX_BEGIN_NAMESPACE(X) namespace X _GLIBCXX_VISIBILITY(default) { -# define _GLIBCXX_END_NAMESPACE } -# if _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG -# define _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, Y) namespace X { namespace Y _GLIBCXX_VISIBILITY(default) { -# define _GLIBCXX_END_NESTED_NAMESPACE } } -# else -# define _GLIBCXX_BEGIN_NESTED_NAMESPACE(X, Y) _GLIBCXX_BEGIN_NAMESPACE(X) -# define _GLIBCXX_END_NESTED_NAMESPACE _GLIBCXX_END_NAMESPACE -# endif +#endif + +// Namespace associations for debug mode. +#if _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG +namespace std +{ + namespace __norm { } + namespace __debug { } + namespace __cxx1998 { } + + using namespace __debug __attribute__ ((strong)); + using namespace __cxx1998 __attribute__ ((strong)); +} + +namespace __gnu_cxx +{ + namespace __norm { } + namespace __debug { } + namespace __cxx1998 { } + + using namespace __debug __attribute__ ((strong)); + using namespace __cxx1998 __attribute__ ((strong)); +} +#endif + +// Namespace associations for parallel mode. +#if _GLIBCXX_NAMESPACE_ASSOCIATION_PARALLEL +namespace std +{ + namespace __norm { } + namespace __parallel { } + namespace __cxx1998 { } + + using namespace __parallel __attribute__ ((strong)); + using namespace __cxx1998 __attribute__ ((strong)); +} + +namespace __gnu_cxx +{ + namespace __norm { } + namespace __parallel { } + namespace __cxx1998 { } + + using namespace __parallel __attribute__ ((strong)); + using namespace __cxx1998 __attribute__ ((strong)); +} #endif // Namespace associations for versioning mode. @@ -119,38 +252,6 @@ namespace std } #endif -// Namespace associations for debug mode. -#if _GLIBCXX_NAMESPACE_ASSOCIATION_DEBUG -namespace std -{ - namespace __norm { } - namespace __debug { } - using namespace __debug __attribute__ ((strong)); -} - -namespace __gnu_cxx -{ - namespace __norm { } - namespace __debug { } - using namespace __debug __attribute__ ((strong)); -} - -# define _GLIBCXX_STD __norm -# define _GLIBCXX_EXT __norm -# define _GLIBCXX_EXTERN_TEMPLATE 0 -# if __NO_INLINE__ && !__GXX_WEAK__ -# warning debug mode without inlining may fail due to lack of weak symbols -# endif -#else -#if _GLIBCXX_NAMESPACE_ASSOCIATION_VERSION -# define _GLIBCXX_STD _6 -# define _GLIBCXX_EXT _6 -#else -# define _GLIBCXX_STD std -# define _GLIBCXX_EXT __gnu_cxx -#endif -#endif - // Define if compatibility should be provided for -mlong-double-64. #undef _GLIBCXX_LONG_DOUBLE_COMPAT @@ -193,6 +294,14 @@ _GLIBCXX_END_NAMESPACE # define _GLIBCXX_WEAK_DEFINITION #endif +// Macro used to indicate that the native "C" includes, when compiled +// as "C++", have declarations in namespace std and not the global +// namespace. Note, this is unrelated to possible "C" compatibility +// includes that inject C90/C99 names into the global namespace. +#if __cplusplus == 199711L +# define _GLIBCXX_NAMESPACE_GLOBAL_INJECTION 1 +#endif + // The remainder of the prewritten config is automatic; all the // user hooks are listed above. diff --git a/libstdc++-v3/include/bits/char_traits.h b/libstdc++-v3/include/bits/char_traits.h index 250faf59204..daa13b7558c 100644 --- a/libstdc++-v3/include/bits/char_traits.h +++ b/libstdc++-v3/include/bits/char_traits.h @@ -43,7 +43,7 @@ #pragma GCC system_header -#include // For copy, fill_n +#include // std::copy, std::fill_n #include // For streampos #include // For EOF #include // For WEOF, wmemmove, wmemset, etc. @@ -194,6 +194,7 @@ _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx) char_traits<_CharT>:: copy(char_type* __s1, const char_type* __s2, std::size_t __n) { + // NB: Inline std::copy so no recursive dependencies. std::copy(__s2, __s2 + __n, __s1); return __s1; } @@ -203,6 +204,7 @@ _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx) char_traits<_CharT>:: assign(char_type* __s, std::size_t __n, char_type __a) { + // NB: Inline std::fill_n so no recursive dependencies. std::fill_n(__s, __n, __a); return __s; } diff --git a/libstdc++-v3/include/bits/deque.tcc b/libstdc++-v3/include/bits/deque.tcc index 3f53f20bd9b..b5cacc1e577 100644 --- a/libstdc++-v3/include/bits/deque.tcc +++ b/libstdc++-v3/include/bits/deque.tcc @@ -62,7 +62,7 @@ #ifndef _DEQUE_TCC #define _DEQUE_TCC 1 -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) template deque<_Tp, _Alloc>& diff --git a/libstdc++-v3/include/bits/list.tcc b/libstdc++-v3/include/bits/list.tcc index 6bde3b77d00..3fdc5bbac55 100644 --- a/libstdc++-v3/include/bits/list.tcc +++ b/libstdc++-v3/include/bits/list.tcc @@ -62,7 +62,7 @@ #ifndef _LIST_TCC #define _LIST_TCC 1 -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) template void diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h index 160dc2f8abc..d03ae139e56 100644 --- a/libstdc++-v3/include/bits/stl_algo.h +++ b/libstdc++-v3/include/bits/stl_algo.h @@ -62,9 +62,10 @@ #ifndef _STL_ALGO_H #define _STL_ALGO_H 1 +#include // for rand #include #include // for _Temporary_buffer -#include // for rand +#include #include // See concept_check.h for the __glibcxx_*_requires macros. @@ -138,28 +139,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __b; } - /** - * @brief Apply a function to every element of a sequence. - * @param first An input iterator. - * @param last An input iterator. - * @param f A unary function object. - * @return @p f. - * - * Applies the function object @p f to each element in the range - * @p [first,last). @p f must not modify the order of the sequence. - * If @p f has a return value it is ignored. - */ - template - _Function - for_each(_InputIterator __first, _InputIterator __last, _Function __f) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_requires_valid_range(__first, __last); - for (; __first != __last; ++__first) - __f(*__first); - return __f; - } + // for_each /** * @if maint @@ -295,450 +275,18 @@ _GLIBCXX_BEGIN_NAMESPACE(std) } } - /** - * @brief Find the first occurrence of a value in a sequence. - * @param first An input iterator. - * @param last An input iterator. - * @param val The value to find. - * @return The first iterator @c i in the range @p [first,last) - * such that @c *i == @p val, or @p last if no such iterator exists. - */ - template - inline _InputIterator - find(_InputIterator __first, _InputIterator __last, - const _Tp& __val) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_InputIterator>::value_type, _Tp>) - __glibcxx_requires_valid_range(__first, __last); - return std::__find(__first, __last, __val, - std::__iterator_category(__first)); - } - - /** - * @brief Find the first element in a sequence for which a predicate is true. - * @param first An input iterator. - * @param last An input iterator. - * @param pred A predicate. - * @return The first iterator @c i in the range @p [first,last) - * such that @p pred(*i) is true, or @p last if no such iterator exists. - */ - template - inline _InputIterator - find_if(_InputIterator __first, _InputIterator __last, - _Predicate __pred) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - return std::__find_if(__first, __last, __pred, - std::__iterator_category(__first)); - } - - /** - * @brief Find element from a set in a sequence. - * @param first1 Start of range to search. - * @param last1 End of range to search. - * @param first2 Start of match candidates. - * @param last2 End of match candidates. - * @return The first iterator @c i in the range - * @p [first1,last1) such that @c *i == @p *(i2) such that i2 is an - * interator in [first2,last2), or @p last1 if no such iterator exists. - * - * Searches the range @p [first1,last1) for an element that is equal to - * some element in the range [first2,last2). If found, returns an iterator - * in the range [first1,last1), otherwise returns @p last1. - */ - template - _InputIterator - find_first_of(_InputIterator __first1, _InputIterator __last1, - _ForwardIterator __first2, _ForwardIterator __last2) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_InputIterator>::value_type, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); - - for (; __first1 != __last1; ++__first1) - for (_ForwardIterator __iter = __first2; __iter != __last2; ++__iter) - if (*__first1 == *__iter) - return __first1; - return __last1; - } - - /** - * @brief Find element from a set in a sequence using a predicate. - * @param first1 Start of range to search. - * @param last1 End of range to search. - * @param first2 Start of match candidates. - * @param last2 End of match candidates. - * @param comp Predicate to use. - * @return The first iterator @c i in the range - * @p [first1,last1) such that @c comp(*i, @p *(i2)) is true and i2 is an - * interator in [first2,last2), or @p last1 if no such iterator exists. - * - * Searches the range @p [first1,last1) for an element that is equal to - * some element in the range [first2,last2). If found, returns an iterator in - * the range [first1,last1), otherwise returns @p last1. - */ - template - _InputIterator - find_first_of(_InputIterator __first1, _InputIterator __last1, - _ForwardIterator __first2, _ForwardIterator __last2, - _BinaryPredicate __comp) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, - typename iterator_traits<_InputIterator>::value_type, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); - - for (; __first1 != __last1; ++__first1) - for (_ForwardIterator __iter = __first2; __iter != __last2; ++__iter) - if (__comp(*__first1, *__iter)) - return __first1; - return __last1; - } - - /** - * @brief Find two adjacent values in a sequence that are equal. - * @param first A forward iterator. - * @param last A forward iterator. - * @return The first iterator @c i such that @c i and @c i+1 are both - * valid iterators in @p [first,last) and such that @c *i == @c *(i+1), - * or @p last if no such iterator exists. - */ - template - _ForwardIterator - adjacent_find(_ForwardIterator __first, _ForwardIterator __last) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_EqualityComparableConcept< - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - if (__first == __last) - return __last; - _ForwardIterator __next = __first; - while(++__next != __last) - { - if (*__first == *__next) - return __first; - __first = __next; - } - return __last; - } - - /** - * @brief Find two adjacent values in a sequence using a predicate. - * @param first A forward iterator. - * @param last A forward iterator. - * @param binary_pred A binary predicate. - * @return The first iterator @c i such that @c i and @c i+1 are both - * valid iterators in @p [first,last) and such that - * @p binary_pred(*i,*(i+1)) is true, or @p last if no such iterator - * exists. - */ - template - _ForwardIterator - adjacent_find(_ForwardIterator __first, _ForwardIterator __last, - _BinaryPredicate __binary_pred) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, - typename iterator_traits<_ForwardIterator>::value_type, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - if (__first == __last) - return __last; - _ForwardIterator __next = __first; - while(++__next != __last) - { - if (__binary_pred(*__first, *__next)) - return __first; - __first = __next; - } - return __last; - } - - /** - * @brief Count the number of copies of a value in a sequence. - * @param first An input iterator. - * @param last An input iterator. - * @param value The value to be counted. - * @return The number of iterators @c i in the range @p [first,last) - * for which @c *i == @p value - */ - template - typename iterator_traits<_InputIterator>::difference_type - count(_InputIterator __first, _InputIterator __last, const _Tp& __value) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_InputIterator>::value_type, _Tp>) - __glibcxx_requires_valid_range(__first, __last); - typename iterator_traits<_InputIterator>::difference_type __n = 0; - for (; __first != __last; ++__first) - if (*__first == __value) - ++__n; - return __n; - } - - /** - * @brief Count the elements of a sequence for which a predicate is true. - * @param first An input iterator. - * @param last An input iterator. - * @param pred A predicate. - * @return The number of iterators @c i in the range @p [first,last) - * for which @p pred(*i) is true. - */ - template - typename iterator_traits<_InputIterator>::difference_type - count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - typename iterator_traits<_InputIterator>::difference_type __n = 0; - for (; __first != __last; ++__first) - if (__pred(*__first)) - ++__n; - return __n; - } - - /** - * @brief Finds the places in ranges which don't match. - * @param first1 An input iterator. - * @param last1 An input iterator. - * @param first2 An input iterator. - * @return A pair of iterators pointing to the first mismatch. - * - * This compares the elements of two ranges using @c == and returns a pair - * of iterators. The first iterator points into the first range, the - * second iterator points into the second range, and the elements pointed - * to by the iterators are not equal. - */ - template - pair<_InputIterator1, _InputIterator2> - mismatch(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_InputIterator1>::value_type, - typename iterator_traits<_InputIterator2>::value_type>) - __glibcxx_requires_valid_range(__first1, __last1); - - while (__first1 != __last1 && *__first1 == *__first2) - { - ++__first1; - ++__first2; - } - return pair<_InputIterator1, _InputIterator2>(__first1, __first2); - } - - /** - * @brief Finds the places in ranges which don't match. - * @param first1 An input iterator. - * @param last1 An input iterator. - * @param first2 An input iterator. - * @param binary_pred A binary predicate @link s20_3_1_base functor@endlink. - * @return A pair of iterators pointing to the first mismatch. - * - * This compares the elements of two ranges using the binary_pred - * parameter, and returns a pair - * of iterators. The first iterator points into the first range, the - * second iterator points into the second range, and the elements pointed - * to by the iterators are not equal. - */ - template - pair<_InputIterator1, _InputIterator2> - mismatch(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _BinaryPredicate __binary_pred) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) - __glibcxx_requires_valid_range(__first1, __last1); - - while (__first1 != __last1 && bool(__binary_pred(*__first1, *__first2))) - { - ++__first1; - ++__first2; - } - return pair<_InputIterator1, _InputIterator2>(__first1, __first2); - } - - /** - * @brief Search a sequence for a matching sub-sequence. - * @param first1 A forward iterator. - * @param last1 A forward iterator. - * @param first2 A forward iterator. - * @param last2 A forward iterator. - * @return The first iterator @c i in the range - * @p [first1,last1-(last2-first2)) such that @c *(i+N) == @p *(first2+N) - * for each @c N in the range @p [0,last2-first2), or @p last1 if no - * such iterator exists. - * - * Searches the range @p [first1,last1) for a sub-sequence that compares - * equal value-by-value with the sequence given by @p [first2,last2) and - * returns an iterator to the first element of the sub-sequence, or - * @p last1 if the sub-sequence is not found. - * - * Because the sub-sequence must lie completely within the range - * @p [first1,last1) it must start at a position less than - * @p last1-(last2-first2) where @p last2-first2 is the length of the - * sub-sequence. - * This means that the returned iterator @c i will be in the range - * @p [first1,last1-(last2-first2)) - */ - template - _ForwardIterator1 - search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>) - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_ForwardIterator1>::value_type, - typename iterator_traits<_ForwardIterator2>::value_type>) - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); - - // Test for empty ranges - if (__first1 == __last1 || __first2 == __last2) - return __first1; - - // Test for a pattern of length 1. - _ForwardIterator2 __p1(__first2); - if (++__p1 == __last2) - return std::find(__first1, __last1, *__first2); - - // General case. - _ForwardIterator2 __p; - _ForwardIterator1 __current = __first1; - - for (;;) - { - __first1 = std::find(__first1, __last1, *__first2); - if (__first1 == __last1) - return __last1; - - __p = __p1; - __current = __first1; - if (++__current == __last1) - return __last1; - - while (*__current == *__p) - { - if (++__p == __last2) - return __first1; - if (++__current == __last1) - return __last1; - } - ++__first1; - } - return __first1; - } - - /** - * @brief Search a sequence for a matching sub-sequence using a predicate. - * @param first1 A forward iterator. - * @param last1 A forward iterator. - * @param first2 A forward iterator. - * @param last2 A forward iterator. - * @param predicate A binary predicate. - * @return The first iterator @c i in the range - * @p [first1,last1-(last2-first2)) such that - * @p predicate(*(i+N),*(first2+N)) is true for each @c N in the range - * @p [0,last2-first2), or @p last1 if no such iterator exists. - * - * Searches the range @p [first1,last1) for a sub-sequence that compares - * equal value-by-value with the sequence given by @p [first2,last2), - * using @p predicate to determine equality, and returns an iterator - * to the first element of the sub-sequence, or @p last1 if no such - * iterator exists. - * - * @see search(_ForwardIter1, _ForwardIter1, _ForwardIter2, _ForwardIter2) - */ - template - _ForwardIterator1 - search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __predicate) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>) - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>) - __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, - typename iterator_traits<_ForwardIterator1>::value_type, - typename iterator_traits<_ForwardIterator2>::value_type>) - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); - - // Test for empty ranges - if (__first1 == __last1 || __first2 == __last2) - return __first1; - - // Test for a pattern of length 1. - _ForwardIterator2 __p1(__first2); - if (++__p1 == __last2) - { - while (__first1 != __last1 - && !bool(__predicate(*__first1, *__first2))) - ++__first1; - return __first1; - } - - // General case. - _ForwardIterator2 __p; - _ForwardIterator1 __current = __first1; - - for (;;) - { - while (__first1 != __last1 - && !bool(__predicate(*__first1, *__first2))) - ++__first1; - if (__first1 == __last1) - return __last1; - - __p = __p1; - __current = __first1; - if (++__current == __last1) - return __last1; - - while (__predicate(*__current, *__p)) - { - if (++__p == __last2) - return __first1; - if (++__current == __last1) - return __last1; - } - ++__first1; - } - return __first1; - } + // set_difference + // set_intersection + // set_symmetric_difference + // set_union + // for_each + // find + // find_if + // find_first_of + // adjacent_find + // count + // count_if + // search /** * @if maint @@ -753,7 +301,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) _Integer __count, const _Tp& __val, std::forward_iterator_tag) { - __first = std::find(__first, __last, __val); + __first = _GLIBCXX_STD_P::find(__first, __last, __val); while (__first != __last) { typename iterator_traits<_ForwardIterator>::difference_type @@ -769,7 +317,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __first; if (__i == __last) return __last; - __first = std::find(++__i, __last, __val); + __first = _GLIBCXX_STD_P::find(++__i, __last, __val); } return __last; } @@ -826,37 +374,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) } } - /** - * @brief Search a sequence for a number of consecutive values. - * @param first A forward iterator. - * @param last A forward iterator. - * @param count The number of consecutive values. - * @param val The value to find. - * @return The first iterator @c i in the range @p [first,last-count) - * such that @c *(i+N) == @p val for each @c N in the range @p [0,count), - * or @p last if no such iterator exists. - * - * Searches the range @p [first,last) for @p count consecutive elements - * equal to @p val. - */ - template - _ForwardIterator - search_n(_ForwardIterator __first, _ForwardIterator __last, - _Integer __count, const _Tp& __val) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_ForwardIterator>::value_type, _Tp>) - __glibcxx_requires_valid_range(__first, __last); - - if (__count <= 0) - return __first; - if (__count == 1) - return std::find(__first, __last, __val); - return std::__search_n(__first, __last, __count, __val, - std::__iterator_category(__first)); - } + // search_n /** * @if maint @@ -953,46 +471,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) } } - /** - * @brief Search a sequence for a number of consecutive values using a - * predicate. - * @param first A forward iterator. - * @param last A forward iterator. - * @param count The number of consecutive values. - * @param val The value to find. - * @param binary_pred A binary predicate. - * @return The first iterator @c i in the range @p [first,last-count) - * such that @p binary_pred(*(i+N),val) is true for each @c N in the - * range @p [0,count), or @p last if no such iterator exists. - * - * Searches the range @p [first,last) for @p count consecutive elements - * for which the predicate returns true. - */ - template - _ForwardIterator - search_n(_ForwardIterator __first, _ForwardIterator __last, - _Integer __count, const _Tp& __val, - _BinaryPredicate __binary_pred) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, - typename iterator_traits<_ForwardIterator>::value_type, _Tp>) - __glibcxx_requires_valid_range(__first, __last); - - if (__count <= 0) - return __first; - if (__count == 1) - { - while (__first != __last && !bool(__binary_pred(*__first, __val))) - ++__first; - return __first; - } - return std::__search_n(__first, __last, __count, __val, __binary_pred, - std::__iterator_category(__first)); - } - // find_end for forward iterators. template _ForwardIterator1 @@ -1008,7 +486,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) while (1) { _ForwardIterator1 __new_result - = std::search(__first1, __last1, __first2, __last2); + = _GLIBCXX_STD_P::search(__first1, __last1, __first2, __last2); if (__new_result == __last1) return __result; else @@ -1037,7 +515,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) while (1) { _ForwardIterator1 __new_result - = std::search(__first1, __last1, __first2, __last2, __comp); + = _GLIBCXX_STD_P::search(__first1, __last1, __first2, __last2, __comp); if (__new_result == __last1) return __result; else @@ -1070,8 +548,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) _RevIterator1 __rlast1(__first1); _RevIterator2 __rlast2(__first2); - _RevIterator1 __rresult = std::search(_RevIterator1(__last1), __rlast1, - _RevIterator2(__last2), __rlast2); + _RevIterator1 __rresult = _GLIBCXX_STD_P::search(_RevIterator1(__last1), __rlast1, _RevIterator2(__last2), __rlast2); if (__rresult == __rlast1) return __last1; @@ -1209,261 +686,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __comp); } - /** - * @brief Perform an operation on a sequence. - * @param first An input iterator. - * @param last An input iterator. - * @param result An output iterator. - * @param unary_op A unary operator. - * @return An output iterator equal to @p result+(last-first). - * - * Applies the operator to each element in the input range and assigns - * the results to successive elements of the output sequence. - * Evaluates @p *(result+N)=unary_op(*(first+N)) for each @c N in the - * range @p [0,last-first). - * - * @p unary_op must not alter its argument. - */ - template - _OutputIterator - transform(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - // "the type returned by a _UnaryOperation" - __typeof__(__unary_op(*__first))>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first, ++__result) - *__result = __unary_op(*__first); - return __result; - } - - /** - * @brief Perform an operation on corresponding elements of two sequences. - * @param first1 An input iterator. - * @param last1 An input iterator. - * @param first2 An input iterator. - * @param result An output iterator. - * @param binary_op A binary operator. - * @return An output iterator equal to @p result+(last-first). - * - * Applies the operator to the corresponding elements in the two - * input ranges and assigns the results to successive elements of the - * output sequence. - * Evaluates @p *(result+N)=binary_op(*(first1+N),*(first2+N)) for each - * @c N in the range @p [0,last1-first1). - * - * @p binary_op must not alter either of its arguments. - */ - template - _OutputIterator - transform(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _OutputIterator __result, - _BinaryOperation __binary_op) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - // "the type returned by a _BinaryOperation" - __typeof__(__binary_op(*__first1,*__first2))>) - __glibcxx_requires_valid_range(__first1, __last1); - - for (; __first1 != __last1; ++__first1, ++__first2, ++__result) - *__result = __binary_op(*__first1, *__first2); - return __result; - } - - /** - * @brief Replace each occurrence of one value in a sequence with another - * value. - * @param first A forward iterator. - * @param last A forward iterator. - * @param old_value The value to be replaced. - * @param new_value The replacement value. - * @return replace() returns no value. - * - * For each iterator @c i in the range @p [first,last) if @c *i == - * @p old_value then the assignment @c *i = @p new_value is performed. - */ - template - void - replace(_ForwardIterator __first, _ForwardIterator __last, - const _Tp& __old_value, const _Tp& __new_value) - { - // concept requirements - __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< - _ForwardIterator>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_ForwardIterator>::value_type, _Tp>) - __glibcxx_function_requires(_ConvertibleConcept<_Tp, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first) - if (*__first == __old_value) - *__first = __new_value; - } - - /** - * @brief Replace each value in a sequence for which a predicate returns - * true with another value. - * @param first A forward iterator. - * @param last A forward iterator. - * @param pred A predicate. - * @param new_value The replacement value. - * @return replace_if() returns no value. - * - * For each iterator @c i in the range @p [first,last) if @p pred(*i) - * is true then the assignment @c *i = @p new_value is performed. - */ - template - void - replace_if(_ForwardIterator __first, _ForwardIterator __last, - _Predicate __pred, const _Tp& __new_value) - { - // concept requirements - __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< - _ForwardIterator>) - __glibcxx_function_requires(_ConvertibleConcept<_Tp, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first) - if (__pred(*__first)) - *__first = __new_value; - } - - /** - * @brief Copy a sequence, replacing each element of one value with another - * value. - * @param first An input iterator. - * @param last An input iterator. - * @param result An output iterator. - * @param old_value The value to be replaced. - * @param new_value The replacement value. - * @return The end of the output sequence, @p result+(last-first). - * - * Copies each element in the input range @p [first,last) to the - * output range @p [result,result+(last-first)) replacing elements - * equal to @p old_value with @p new_value. - */ - template - _OutputIterator - replace_copy(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, - const _Tp& __old_value, const _Tp& __new_value) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_function_requires(_EqualOpConcept< - typename iterator_traits<_InputIterator>::value_type, _Tp>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first, ++__result) - if (*__first == __old_value) - *__result = __new_value; - else - *__result = *__first; - return __result; - } - - /** - * @brief Copy a sequence, replacing each value for which a predicate - * returns true with another value. - * @param first An input iterator. - * @param last An input iterator. - * @param result An output iterator. - * @param pred A predicate. - * @param new_value The replacement value. - * @return The end of the output sequence, @p result+(last-first). - * - * Copies each element in the range @p [first,last) to the range - * @p [result,result+(last-first)) replacing elements for which - * @p pred returns true with @p new_value. - */ - template - _OutputIterator - replace_copy_if(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, - _Predicate __pred, const _Tp& __new_value) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first, ++__result) - if (__pred(*__first)) - *__result = __new_value; - else - *__result = *__first; - return __result; - } - - /** - * @brief Assign the result of a function object to each value in a - * sequence. - * @param first A forward iterator. - * @param last A forward iterator. - * @param gen A function object taking no arguments. - * @return generate() returns no value. - * - * Performs the assignment @c *i = @p gen() for each @c i in the range - * @p [first,last). - */ - template - void - generate(_ForwardIterator __first, _ForwardIterator __last, - _Generator __gen) - { - // concept requirements - __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) - __glibcxx_function_requires(_GeneratorConcept<_Generator, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - for (; __first != __last; ++__first) - *__first = __gen(); - } - - /** - * @brief Assign the result of a function object to each value in a - * sequence. - * @param first A forward iterator. - * @param n The length of the sequence. - * @param gen A function object taking no arguments. - * @return The end of the sequence, @p first+n - * - * Performs the assignment @c *i = @p gen() for each @c i in the range - * @p [first,first+n). - */ - template - _OutputIterator - generate_n(_OutputIterator __first, _Size __n, _Generator __gen) - { - // concept requirements - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - // "the type returned by a _Generator" - __typeof__(__gen())>) - - for (; __n > 0; --__n, ++__first) - *__first = __gen(); - return __first; - } /** * @brief Copy a sequence, removing elements of a given value. @@ -1565,11 +787,10 @@ _GLIBCXX_BEGIN_NAMESPACE(std) typename iterator_traits<_ForwardIterator>::value_type, _Tp>) __glibcxx_requires_valid_range(__first, __last); - __first = std::find(__first, __last, __value); + __first = _GLIBCXX_STD_P::find(__first, __last, __value); _ForwardIterator __i = __first; return __first == __last ? __first - : std::remove_copy(++__i, __last, - __first, __value); + : std::remove_copy(++__i, __last, __first, __value); } /** @@ -1600,7 +821,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) typename iterator_traits<_ForwardIterator>::value_type>) __glibcxx_requires_valid_range(__first, __last); - __first = std::find_if(__first, __last, __pred); + __first = _GLIBCXX_STD_P::find_if(__first, __last, __pred); _ForwardIterator __i = __first; return __first == __last ? __first : std::remove_copy_if(++__i, __last, @@ -1632,7 +853,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __glibcxx_requires_valid_range(__first, __last); // Skip the beginning, if already unique. - __first = std::adjacent_find(__first, __last); + __first = _GLIBCXX_STD_P::adjacent_find(__first, __last); if (__first == __last) return __last; @@ -1673,7 +894,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __glibcxx_requires_valid_range(__first, __last); // Skip the beginning, if already unique. - __first = std::adjacent_find(__first, __last, __binary_pred); + __first = _GLIBCXX_STD_P::adjacent_find(__first, __last, __binary_pred); if (__first == __last) return __last; @@ -1846,88 +1067,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return ++__result; } - /** - * @brief Copy a sequence, removing consecutive duplicate values. - * @param first An input iterator. - * @param last An input iterator. - * @param result An output iterator. - * @return An iterator designating the end of the resulting sequence. - * - * Copies each element in the range @p [first,last) to the range - * beginning at @p result, except that only the first element is copied - * from groups of consecutive elements that compare equal. - * unique_copy() is stable, so the relative order of elements that are - * copied is unchanged. - * - * @if maint - * _GLIBCXX_RESOLVE_LIB_DEFECTS - * DR 241. Does unique_copy() require CopyConstructible and Assignable? - * - * _GLIBCXX_RESOLVE_LIB_DEFECTS - * DR 538. 241 again: Does unique_copy() require CopyConstructible and - * Assignable? - * @endif - */ - template - inline _OutputIterator - unique_copy(_InputIterator __first, _InputIterator __last, - _OutputIterator __result) - { - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_function_requires(_EqualityComparableConcept< - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return __result; - return std::__unique_copy(__first, __last, __result, - std::__iterator_category(__first), - std::__iterator_category(__result)); - } - - /** - * @brief Copy a sequence, removing consecutive values using a predicate. - * @param first An input iterator. - * @param last An input iterator. - * @param result An output iterator. - * @param binary_pred A binary predicate. - * @return An iterator designating the end of the resulting sequence. - * - * Copies each element in the range @p [first,last) to the range - * beginning at @p result, except that only the first element is copied - * from groups of consecutive elements for which @p binary_pred returns - * true. - * unique_copy() is stable, so the relative order of elements that are - * copied is unchanged. - * - * @if maint - * _GLIBCXX_RESOLVE_LIB_DEFECTS - * DR 241. Does unique_copy() require CopyConstructible and Assignable? - * @endif - */ - template - inline _OutputIterator - unique_copy(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, - _BinaryPredicate __binary_pred) - { - // concept requirements -- predicates checked later - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - typename iterator_traits<_InputIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return __result; - return std::__unique_copy(__first, __last, __result, __binary_pred, - std::__iterator_category(__first), - std::__iterator_category(__result)); - } - /** * @if maint * This is an uglified reverse(_BidirectionalIterator, @@ -2265,60 +1404,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) std::copy(__middle, __last, __result)); } - /** - * @brief Randomly shuffle the elements of a sequence. - * @param first A forward iterator. - * @param last A forward iterator. - * @return Nothing. - * - * Reorder the elements in the range @p [first,last) using a random - * distribution, so that every possible ordering of the sequence is - * equally likely. - */ - template - inline void - random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last) - { - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first != __last) - for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) - std::iter_swap(__i, __first + (std::rand() % ((__i - __first) + 1))); - } - - /** - * @brief Shuffle the elements of a sequence using a random number - * generator. - * @param first A forward iterator. - * @param last A forward iterator. - * @param rand The RNG functor or function. - * @return Nothing. - * - * Reorders the elements in the range @p [first,last) using @p rand to - * provide a random distribution. Calling @p rand(N) for a positive - * integer @p N should return a randomly chosen integer from the - * range [0,N). - */ - template - void - random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, - _RandomNumberGenerator& __rand) - { - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return; - for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) - std::iter_swap(__i, __first + __rand((__i - __first) + 1)); - } - - /** * @if maint * This is a helper function... @@ -2382,36 +1467,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) } } - /** - * @brief Move elements for which a predicate is true to the beginning - * of a sequence. - * @param first A forward iterator. - * @param last A forward iterator. - * @param pred A predicate functor. - * @return An iterator @p middle such that @p pred(i) is true for each - * iterator @p i in the range @p [first,middle) and false for each @p i - * in the range @p [middle,last). - * - * @p pred must not modify its operand. @p partition() does not preserve - * the relative ordering of elements in each group, use - * @p stable_partition() if this is needed. - */ - template - inline _ForwardIterator - partition(_ForwardIterator __first, _ForwardIterator __last, - _Predicate __pred) - { - // concept requirements - __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< - _ForwardIterator>) - __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, - typename iterator_traits<_ForwardIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - return std::__partition(__first, __last, __pred, - std::__iterator_category(__first)); - } - + // partition /** * @if maint @@ -2583,80 +1639,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) std::__pop_heap(__first, __middle, __i, _ValueType(*__i), __comp); } - /** - * @brief Sort the smallest elements of a sequence. - * @param first An iterator. - * @param middle Another iterator. - * @param last Another iterator. - * @return Nothing. - * - * Sorts the smallest @p (middle-first) elements in the range - * @p [first,last) and moves them to the range @p [first,middle). The - * order of the remaining elements in the range @p [middle,last) is - * undefined. - * After the sort if @p i and @j are iterators in the range - * @p [first,middle) such that @i precedes @j and @k is an iterator in - * the range @p [middle,last) then @p *j<*i and @p *k<*i are both false. - */ - template - inline void - partial_sort(_RandomAccessIterator __first, - _RandomAccessIterator __middle, - _RandomAccessIterator __last) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) - __glibcxx_requires_valid_range(__first, __middle); - __glibcxx_requires_valid_range(__middle, __last); - - std::__heap_select(__first, __middle, __last); - std::sort_heap(__first, __middle); - } - - /** - * @brief Sort the smallest elements of a sequence using a predicate - * for comparison. - * @param first An iterator. - * @param middle Another iterator. - * @param last Another iterator. - * @param comp A comparison functor. - * @return Nothing. - * - * Sorts the smallest @p (middle-first) elements in the range - * @p [first,last) and moves them to the range @p [first,middle). The - * order of the remaining elements in the range @p [middle,last) is - * undefined. - * After the sort if @p i and @j are iterators in the range - * @p [first,middle) such that @i precedes @j and @k is an iterator in - * the range @p [middle,last) then @p *comp(j,*i) and @p comp(*k,*i) - * are both false. - */ - template - inline void - partial_sort(_RandomAccessIterator __first, - _RandomAccessIterator __middle, - _RandomAccessIterator __last, - _Compare __comp) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - _ValueType, _ValueType>) - __glibcxx_requires_valid_range(__first, __middle); - __glibcxx_requires_valid_range(__middle, __last); - - std::__heap_select(__first, __middle, __last, __comp); - std::sort_heap(__first, __middle, __comp); - } + // partial_sort /** * @brief Copy the smallest elements of a sequence. @@ -3033,7 +2016,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) { if (__depth_limit == 0) { - std::partial_sort(__first, __last, __last); + _GLIBCXX_STD_P:partial_sort(__first, __last, __last); return; } --__depth_limit; @@ -3069,7 +2052,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) { if (__depth_limit == 0) { - std::partial_sort(__first, __last, __last, __comp); + _GLIBCXX_STD_P::partial_sort(__first, __last, __last, __comp); return; } --__depth_limit; @@ -3103,76 +2086,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __k; } - /** - * @brief Sort the elements of a sequence. - * @param first An iterator. - * @param last Another iterator. - * @return Nothing. - * - * Sorts the elements in the range @p [first,last) in ascending order, - * such that @p *(i+1)<*i is false for each iterator @p i in the range - * @p [first,last-1). - * - * The relative ordering of equivalent elements is not preserved, use - * @p stable_sort() if this is needed. - */ - template - inline void - sort(_RandomAccessIterator __first, _RandomAccessIterator __last) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first != __last) - { - std::__introsort_loop(__first, __last, - std::__lg(__last - __first) * 2); - std::__final_insertion_sort(__first, __last); - } - } - - /** - * @brief Sort the elements of a sequence using a predicate for comparison. - * @param first An iterator. - * @param last Another iterator. - * @param comp A comparison functor. - * @return Nothing. - * - * Sorts the elements in the range @p [first,last) in ascending order, - * such that @p comp(*(i+1),*i) is false for every iterator @p i in the - * range @p [first,last-1). - * - * The relative ordering of equivalent elements is not preserved, use - * @p stable_sort() if this is needed. - */ - template - inline void - sort(_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, _ValueType, - _ValueType>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first != __last) - { - std::__introsort_loop(__first, __last, - std::__lg(__last - __first) * 2, __comp); - std::__final_insertion_sort(__first, __last, __comp); - } - } + // sort template void @@ -3246,81 +2160,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) std::__insertion_sort(__first, __last, __comp); } - /** - * @brief Sort a sequence just enough to find a particular position. - * @param first An iterator. - * @param nth Another iterator. - * @param last Another iterator. - * @return Nothing. - * - * Rearranges the elements in the range @p [first,last) so that @p *nth - * is the same element that would have been in that position had the - * whole sequence been sorted. - * whole sequence been sorted. The elements either side of @p *nth are - * not completely sorted, but for any iterator @i in the range - * @p [first,nth) and any iterator @j in the range @p [nth,last) it - * holds that @p *j<*i is false. - */ - template - inline void - nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) - __glibcxx_requires_valid_range(__first, __nth); - __glibcxx_requires_valid_range(__nth, __last); - - if (__first == __last || __nth == __last) - return; - - std::__introselect(__first, __nth, __last, - std::__lg(__last - __first) * 2); - } - - /** - * @brief Sort a sequence just enough to find a particular position - * using a predicate for comparison. - * @param first An iterator. - * @param nth Another iterator. - * @param last Another iterator. - * @param comp A comparison functor. - * @return Nothing. - * - * Rearranges the elements in the range @p [first,last) so that @p *nth - * is the same element that would have been in that position had the - * whole sequence been sorted. The elements either side of @p *nth are - * not completely sorted, but for any iterator @i in the range - * @p [first,nth) and any iterator @j in the range @p [nth,last) it - * holds that @p comp(*j,*i) is false. - */ - template - inline void - nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - _ValueType, _ValueType>) - __glibcxx_requires_valid_range(__first, __nth); - __glibcxx_requires_valid_range(__nth, __last); - - if (__first == __last || __nth == __last) - return; - - std::__introselect(__first, __nth, __last, - std::__lg(__last - __first) * 2, __comp); - } + // nth_element /** * @brief Finds the first position in which @a val could be inserted @@ -3328,8 +2168,9 @@ _GLIBCXX_BEGIN_NAMESPACE(std) * @param first An iterator. * @param last Another iterator. * @param val The search term. - * @return An iterator pointing to the first element "not less than" @a val, - * or end() if every element is less than @a val. + * @return An iterator pointing to the first element "not less + * than" @a val, or end() if every element is less than + * @a val. * @ingroup binarysearch */ template @@ -3701,124 +2542,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __i != __last && !bool(__comp(__val, *__i)); } - /** - * @brief Merges two sorted ranges. - * @param first1 An iterator. - * @param first2 Another iterator. - * @param last1 Another iterator. - * @param last2 Another iterator. - * @param result An iterator pointing to the end of the merged range. - * @return An iterator pointing to the first element "not less than" @a val. - * - * Merges the ranges [first1,last1) and [first2,last2) into the sorted range - * [result, result + (last1-first1) + (last2-first2)). Both input ranges - * must be sorted, and the output range must not overlap with either of - * the input ranges. The sort is @e stable, that is, for equivalent - * elements in the two ranges, elements from the first range will always - * come before elements from the second. - */ - template - _OutputIterator - merge(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, - _OutputIterator __result) - { - typedef typename iterator_traits<_InputIterator1>::value_type - _ValueType1; - typedef typename iterator_traits<_InputIterator2>::value_type - _ValueType2; - - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - _ValueType1>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - _ValueType2>) - __glibcxx_function_requires(_LessThanOpConcept<_ValueType2, _ValueType1>) - __glibcxx_requires_sorted(__first1, __last1); - __glibcxx_requires_sorted(__first2, __last2); - - while (__first1 != __last1 && __first2 != __last2) - { - if (*__first2 < *__first1) - { - *__result = *__first2; - ++__first2; - } - else - { - *__result = *__first1; - ++__first1; - } - ++__result; - } - return std::copy(__first2, __last2, std::copy(__first1, __last1, - __result)); - } - - /** - * @brief Merges two sorted ranges. - * @param first1 An iterator. - * @param first2 Another iterator. - * @param last1 Another iterator. - * @param last2 Another iterator. - * @param result An iterator pointing to the end of the merged range. - * @param comp A functor to use for comparisons. - * @return An iterator pointing to the first element "not less than" @a val. - * - * Merges the ranges [first1,last1) and [first2,last2) into the sorted range - * [result, result + (last1-first1) + (last2-first2)). Both input ranges - * must be sorted, and the output range must not overlap with either of - * the input ranges. The sort is @e stable, that is, for equivalent - * elements in the two ranges, elements from the first range will always - * come before elements from the second. - * - * The comparison function should have the same effects on ordering as - * the function used for the initial sort. - */ - template - _OutputIterator - merge(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _InputIterator2 __last2, - _OutputIterator __result, _Compare __comp) - { - typedef typename iterator_traits<_InputIterator1>::value_type - _ValueType1; - typedef typename iterator_traits<_InputIterator2>::value_type - _ValueType2; - - // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - _ValueType1>) - __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, - _ValueType2>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - _ValueType2, _ValueType1>) - __glibcxx_requires_sorted_pred(__first1, __last1, __comp); - __glibcxx_requires_sorted_pred(__first2, __last2, __comp); - - while (__first1 != __last1 && __first2 != __last2) - { - if (__comp(*__first2, *__first1)) - { - *__result = *__first2; - ++__first2; - } - else - { - *__result = *__first1; - ++__first1; - } - ++__result; - } - return std::copy(__first2, __last2, std::copy(__first1, __last1, - __result)); - } + // merge /** * @if maint @@ -3952,7 +2676,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std) if (__len1 <= __len2 && __len1 <= __buffer_size) { _Pointer __buffer_end = std::copy(__first, __middle, __buffer); - std::merge(__buffer, __buffer_end, __middle, __last, __first); + _GLIBCXX_STD_P::merge(__buffer, __buffer_end, __middle, __last, + __first); } else if (__len2 <= __buffer_size) { @@ -3999,8 +2724,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std) * This is a helper function for the merge routines. * @endif */ - template + template void __merge_adaptive(_BidirectionalIterator __first, _BidirectionalIterator __middle, @@ -4012,7 +2737,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) if (__len1 <= __len2 && __len1 <= __buffer_size) { _Pointer __buffer_end = std::copy(__first, __middle, __buffer); - std::merge(__buffer, __buffer_end, __middle, __last, __first, __comp); + _GLIBCXX_STD_P::merge(__buffer, __buffer_end, __middle, __last, __first, __comp); } else if (__len2 <= __buffer_size) { @@ -4273,14 +2998,15 @@ _GLIBCXX_BEGIN_NAMESPACE(std) while (__last - __first >= __two_step) { - __result = std::merge(__first, __first + __step_size, + __result = _GLIBCXX_STD_P::merge(__first, __first + __step_size, __first + __step_size, __first + __two_step, __result); __first += __two_step; } __step_size = std::min(_Distance(__last - __first), __step_size); - std::merge(__first, __first + __step_size, __first + __step_size, __last, + _GLIBCXX_STD_P::merge(__first, __first + __step_size, + __first + __step_size, __last, __result); } @@ -4296,7 +3022,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) while (__last - __first >= __two_step) { - __result = std::merge(__first, __first + __step_size, + __result = _GLIBCXX_STD_P::merge(__first, __first + __step_size, __first + __step_size, __first + __two_step, __result, __comp); @@ -4304,10 +3030,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std) } __step_size = std::min(_Distance(__last - __first), __step_size); - std::merge(__first, __first + __step_size, - __first + __step_size, __last, - __result, - __comp); + _GLIBCXX_STD_P::merge(__first, __first + __step_size, + __first + __step_size, __last, __result, __comp); } template @@ -4493,89 +3217,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __comp); } - /** - * @brief Sort the elements of a sequence, preserving the relative order - * of equivalent elements. - * @param first An iterator. - * @param last Another iterator. - * @return Nothing. - * - * Sorts the elements in the range @p [first,last) in ascending order, - * such that @p *(i+1)<*i is false for each iterator @p i in the range - * @p [first,last-1). - * - * The relative ordering of equivalent elements is preserved, so any two - * elements @p x and @p y in the range @p [first,last) such that - * @p x - inline void - stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - typedef typename iterator_traits<_RandomAccessIterator>::difference_type - _DistanceType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) - __glibcxx_requires_valid_range(__first, __last); - - _Temporary_buffer<_RandomAccessIterator, _ValueType> __buf(__first, - __last); - if (__buf.begin() == 0) - std::__inplace_stable_sort(__first, __last); - else - std::__stable_sort_adaptive(__first, __last, __buf.begin(), - _DistanceType(__buf.size())); - } - - /** - * @brief Sort the elements of a sequence using a predicate for comparison, - * preserving the relative order of equivalent elements. - * @param first An iterator. - * @param last Another iterator. - * @param comp A comparison functor. - * @return Nothing. - * - * Sorts the elements in the range @p [first,last) in ascending order, - * such that @p comp(*(i+1),*i) is false for each iterator @p i in the - * range @p [first,last-1). - * - * The relative ordering of equivalent elements is preserved, so any two - * elements @p x and @p y in the range @p [first,last) such that - * @p comp(x,y) is false and @p comp(y,x) is false will have the same - * relative ordering after calling @p stable_sort(). - */ - template - inline void - stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) - { - typedef typename iterator_traits<_RandomAccessIterator>::value_type - _ValueType; - typedef typename iterator_traits<_RandomAccessIterator>::difference_type - _DistanceType; - - // concept requirements - __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< - _RandomAccessIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - _ValueType, - _ValueType>) - __glibcxx_requires_valid_range(__first, __last); - - _Temporary_buffer<_RandomAccessIterator, _ValueType> __buf(__first, - __last); - if (__buf.begin() == 0) - std::__inplace_stable_sort(__first, __last, __comp); - else - std::__stable_sort_adaptive(__first, __last, __buf.begin(), - _DistanceType(__buf.size()), __comp); - } + // stable_sort // Set algorithms: includes, set_union, set_intersection, set_difference, // set_symmetric_difference. All of these algorithms have the precondition @@ -4678,6 +3320,1580 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __first2 == __last2; } + // nth_element + // merge + // set_difference + // set_intersection + // set_union + // stable_sort + // set_symmetric_difference + // min_element + // max_element + + /** + * @brief Permute range into the next "dictionary" ordering. + * @param first Start of range. + * @param last End of range. + * @return False if wrapped to first permutation, true otherwise. + * + * Treats all permutations of the range as a set of "dictionary" sorted + * sequences. Permutes the current sequence into the next one of this set. + * Returns true if there are more sequences to generate. If the sequence + * is the largest of the set, the smallest is generated and false returned. + */ + template + bool + next_permutation(_BidirectionalIterator __first, + _BidirectionalIterator __last) + { + // concept requirements + __glibcxx_function_requires(_BidirectionalIteratorConcept< + _BidirectionalIterator>) + __glibcxx_function_requires(_LessThanComparableConcept< + typename iterator_traits<_BidirectionalIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return false; + _BidirectionalIterator __i = __first; + ++__i; + if (__i == __last) + return false; + __i = __last; + --__i; + + for(;;) + { + _BidirectionalIterator __ii = __i; + --__i; + if (*__i < *__ii) + { + _BidirectionalIterator __j = __last; + while (!(*__i < *--__j)) + {} + std::iter_swap(__i, __j); + std::reverse(__ii, __last); + return true; + } + if (__i == __first) + { + std::reverse(__first, __last); + return false; + } + } + } + + /** + * @brief Permute range into the next "dictionary" ordering using + * comparison functor. + * @param first Start of range. + * @param last End of range. + * @param comp + * @return False if wrapped to first permutation, true otherwise. + * + * Treats all permutations of the range [first,last) as a set of + * "dictionary" sorted sequences ordered by @a comp. Permutes the current + * sequence into the next one of this set. Returns true if there are more + * sequences to generate. If the sequence is the largest of the set, the + * smallest is generated and false returned. + */ + template + bool + next_permutation(_BidirectionalIterator __first, + _BidirectionalIterator __last, _Compare __comp) + { + // concept requirements + __glibcxx_function_requires(_BidirectionalIteratorConcept< + _BidirectionalIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + typename iterator_traits<_BidirectionalIterator>::value_type, + typename iterator_traits<_BidirectionalIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return false; + _BidirectionalIterator __i = __first; + ++__i; + if (__i == __last) + return false; + __i = __last; + --__i; + + for(;;) + { + _BidirectionalIterator __ii = __i; + --__i; + if (__comp(*__i, *__ii)) + { + _BidirectionalIterator __j = __last; + while (!bool(__comp(*__i, *--__j))) + {} + std::iter_swap(__i, __j); + std::reverse(__ii, __last); + return true; + } + if (__i == __first) + { + std::reverse(__first, __last); + return false; + } + } + } + + /** + * @brief Permute range into the previous "dictionary" ordering. + * @param first Start of range. + * @param last End of range. + * @return False if wrapped to last permutation, true otherwise. + * + * Treats all permutations of the range as a set of "dictionary" sorted + * sequences. Permutes the current sequence into the previous one of this + * set. Returns true if there are more sequences to generate. If the + * sequence is the smallest of the set, the largest is generated and false + * returned. + */ + template + bool + prev_permutation(_BidirectionalIterator __first, + _BidirectionalIterator __last) + { + // concept requirements + __glibcxx_function_requires(_BidirectionalIteratorConcept< + _BidirectionalIterator>) + __glibcxx_function_requires(_LessThanComparableConcept< + typename iterator_traits<_BidirectionalIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return false; + _BidirectionalIterator __i = __first; + ++__i; + if (__i == __last) + return false; + __i = __last; + --__i; + + for(;;) + { + _BidirectionalIterator __ii = __i; + --__i; + if (*__ii < *__i) + { + _BidirectionalIterator __j = __last; + while (!(*--__j < *__i)) + {} + std::iter_swap(__i, __j); + std::reverse(__ii, __last); + return true; + } + if (__i == __first) + { + std::reverse(__first, __last); + return false; + } + } + } + + /** + * @brief Permute range into the previous "dictionary" ordering using + * comparison functor. + * @param first Start of range. + * @param last End of range. + * @param comp + * @return False if wrapped to last permutation, true otherwise. + * + * Treats all permutations of the range [first,last) as a set of + * "dictionary" sorted sequences ordered by @a comp. Permutes the current + * sequence into the previous one of this set. Returns true if there are + * more sequences to generate. If the sequence is the smallest of the set, + * the largest is generated and false returned. + */ + template + bool + prev_permutation(_BidirectionalIterator __first, + _BidirectionalIterator __last, _Compare __comp) + { + // concept requirements + __glibcxx_function_requires(_BidirectionalIteratorConcept< + _BidirectionalIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + typename iterator_traits<_BidirectionalIterator>::value_type, + typename iterator_traits<_BidirectionalIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return false; + _BidirectionalIterator __i = __first; + ++__i; + if (__i == __last) + return false; + __i = __last; + --__i; + + for(;;) + { + _BidirectionalIterator __ii = __i; + --__i; + if (__comp(*__ii, *__i)) + { + _BidirectionalIterator __j = __last; + while (!bool(__comp(*--__j, *__i))) + {} + std::iter_swap(__i, __j); + std::reverse(__ii, __last); + return true; + } + if (__i == __first) + { + std::reverse(__first, __last); + return false; + } + } + } + + // replace + // replace_if + + /** + * @brief Copy a sequence, replacing each element of one value with another + * value. + * @param first An input iterator. + * @param last An input iterator. + * @param result An output iterator. + * @param old_value The value to be replaced. + * @param new_value The replacement value. + * @return The end of the output sequence, @p result+(last-first). + * + * Copies each element in the input range @p [first,last) to the + * output range @p [result,result+(last-first)) replacing elements + * equal to @p old_value with @p new_value. + */ + template + _OutputIterator + replace_copy(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, + const _Tp& __old_value, const _Tp& __new_value) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_InputIterator>::value_type, _Tp>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first, ++__result) + if (*__first == __old_value) + *__result = __new_value; + else + *__result = *__first; + return __result; + } + + /** + * @brief Copy a sequence, replacing each value for which a predicate + * returns true with another value. + * @param first An input iterator. + * @param last An input iterator. + * @param result An output iterator. + * @param pred A predicate. + * @param new_value The replacement value. + * @return The end of the output sequence, @p result+(last-first). + * + * Copies each element in the range @p [first,last) to the range + * @p [result,result+(last-first)) replacing elements for which + * @p pred returns true with @p new_value. + */ + template + _OutputIterator + replace_copy_if(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, + _Predicate __pred, const _Tp& __new_value) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first, ++__result) + if (__pred(*__first)) + *__result = __new_value; + else + *__result = *__first; + return __result; + } + +_GLIBCXX_END_NAMESPACE + +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_P) + + /** + * @brief Apply a function to every element of a sequence. + * @param first An input iterator. + * @param last An input iterator. + * @param f A unary function object. + * @return @p f. + * + * Applies the function object @p f to each element in the range + * @p [first,last). @p f must not modify the order of the sequence. + * If @p f has a return value it is ignored. + */ + template + _Function + for_each(_InputIterator __first, _InputIterator __last, _Function __f) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_requires_valid_range(__first, __last); + for (; __first != __last; ++__first) + __f(*__first); + return __f; + } + + /** + * @brief Find the first occurrence of a value in a sequence. + * @param first An input iterator. + * @param last An input iterator. + * @param val The value to find. + * @return The first iterator @c i in the range @p [first,last) + * such that @c *i == @p val, or @p last if no such iterator exists. + */ + template + inline _InputIterator + find(_InputIterator __first, _InputIterator __last, + const _Tp& __val) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_InputIterator>::value_type, _Tp>) + __glibcxx_requires_valid_range(__first, __last); + return std::__find(__first, __last, __val, + std::__iterator_category(__first)); + } + + /** + * @brief Find the first element in a sequence for which a + * predicate is true. + * @param first An input iterator. + * @param last An input iterator. + * @param pred A predicate. + * @return The first iterator @c i in the range @p [first,last) + * such that @p pred(*i) is true, or @p last if no such iterator exists. + */ + template + inline _InputIterator + find_if(_InputIterator __first, _InputIterator __last, + _Predicate __pred) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + return std::__find_if(__first, __last, __pred, + std::__iterator_category(__first)); + } + + /** + * @brief Find element from a set in a sequence. + * @param first1 Start of range to search. + * @param last1 End of range to search. + * @param first2 Start of match candidates. + * @param last2 End of match candidates. + * @return The first iterator @c i in the range + * @p [first1,last1) such that @c *i == @p *(i2) such that i2 is an + * interator in [first2,last2), or @p last1 if no such iterator exists. + * + * Searches the range @p [first1,last1) for an element that is equal to + * some element in the range [first2,last2). If found, returns an iterator + * in the range [first1,last1), otherwise returns @p last1. + */ + template + _InputIterator + find_first_of(_InputIterator __first1, _InputIterator __last1, + _ForwardIterator __first2, _ForwardIterator __last2) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_InputIterator>::value_type, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + for (; __first1 != __last1; ++__first1) + for (_ForwardIterator __iter = __first2; __iter != __last2; ++__iter) + if (*__first1 == *__iter) + return __first1; + return __last1; + } + + /** + * @brief Find element from a set in a sequence using a predicate. + * @param first1 Start of range to search. + * @param last1 End of range to search. + * @param first2 Start of match candidates. + * @param last2 End of match candidates. + * @param comp Predicate to use. + * @return The first iterator @c i in the range + * @p [first1,last1) such that @c comp(*i, @p *(i2)) is true and i2 is an + * interator in [first2,last2), or @p last1 if no such iterator exists. + * + + * Searches the range @p [first1,last1) for an element that is + * equal to some element in the range [first2,last2). If found, + * returns an iterator in the range [first1,last1), otherwise + * returns @p last1. + */ + template + _InputIterator + find_first_of(_InputIterator __first1, _InputIterator __last1, + _ForwardIterator __first2, _ForwardIterator __last2, + _BinaryPredicate __comp) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, + typename iterator_traits<_InputIterator>::value_type, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + for (; __first1 != __last1; ++__first1) + for (_ForwardIterator __iter = __first2; __iter != __last2; ++__iter) + if (__comp(*__first1, *__iter)) + return __first1; + return __last1; + } + + /** + * @brief Find two adjacent values in a sequence that are equal. + * @param first A forward iterator. + * @param last A forward iterator. + * @return The first iterator @c i such that @c i and @c i+1 are both + * valid iterators in @p [first,last) and such that @c *i == @c *(i+1), + * or @p last if no such iterator exists. + */ + template + _ForwardIterator + adjacent_find(_ForwardIterator __first, _ForwardIterator __last) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_EqualityComparableConcept< + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + if (__first == __last) + return __last; + _ForwardIterator __next = __first; + while(++__next != __last) + { + if (*__first == *__next) + return __first; + __first = __next; + } + return __last; + } + + /** + * @brief Find two adjacent values in a sequence using a predicate. + * @param first A forward iterator. + * @param last A forward iterator. + * @param binary_pred A binary predicate. + * @return The first iterator @c i such that @c i and @c i+1 are both + * valid iterators in @p [first,last) and such that + * @p binary_pred(*i,*(i+1)) is true, or @p last if no such iterator + * exists. + */ + template + _ForwardIterator + adjacent_find(_ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __binary_pred) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, + typename iterator_traits<_ForwardIterator>::value_type, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + if (__first == __last) + return __last; + _ForwardIterator __next = __first; + while(++__next != __last) + { + if (__binary_pred(*__first, *__next)) + return __first; + __first = __next; + } + return __last; + } + + /** + * @brief Count the number of copies of a value in a sequence. + * @param first An input iterator. + * @param last An input iterator. + * @param value The value to be counted. + * @return The number of iterators @c i in the range @p [first,last) + * for which @c *i == @p value + */ + template + typename iterator_traits<_InputIterator>::difference_type + count(_InputIterator __first, _InputIterator __last, const _Tp& __value) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_InputIterator>::value_type, _Tp>) + __glibcxx_requires_valid_range(__first, __last); + typename iterator_traits<_InputIterator>::difference_type __n = 0; + for (; __first != __last; ++__first) + if (*__first == __value) + ++__n; + return __n; + } + + /** + * @brief Count the elements of a sequence for which a predicate is true. + * @param first An input iterator. + * @param last An input iterator. + * @param pred A predicate. + * @return The number of iterators @c i in the range @p [first,last) + * for which @p pred(*i) is true. + */ + template + typename iterator_traits<_InputIterator>::difference_type + count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + typename iterator_traits<_InputIterator>::difference_type __n = 0; + for (; __first != __last; ++__first) + if (__pred(*__first)) + ++__n; + return __n; + } + + /** + * @brief Search a sequence for a matching sub-sequence. + * @param first1 A forward iterator. + * @param last1 A forward iterator. + * @param first2 A forward iterator. + * @param last2 A forward iterator. + * @return The first iterator @c i in the range + * @p [first1,last1-(last2-first2)) such that @c *(i+N) == @p *(first2+N) + * for each @c N in the range @p [0,last2-first2), or @p last1 if no + * such iterator exists. + * + * Searches the range @p [first1,last1) for a sub-sequence that compares + * equal value-by-value with the sequence given by @p [first2,last2) and + * returns an iterator to the first element of the sub-sequence, or + * @p last1 if the sub-sequence is not found. + * + * Because the sub-sequence must lie completely within the range + * @p [first1,last1) it must start at a position less than + * @p last1-(last2-first2) where @p last2-first2 is the length of the + * sub-sequence. + * This means that the returned iterator @c i will be in the range + * @p [first1,last1-(last2-first2)) + */ + template + _ForwardIterator1 + search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>) + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_ForwardIterator1>::value_type, + typename iterator_traits<_ForwardIterator2>::value_type>) + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + // Test for empty ranges + if (__first1 == __last1 || __first2 == __last2) + return __first1; + + // Test for a pattern of length 1. + _ForwardIterator2 __p1(__first2); + if (++__p1 == __last2) + return _GLIBCXX_STD_P::find(__first1, __last1, *__first2); + + // General case. + _ForwardIterator2 __p; + _ForwardIterator1 __current = __first1; + + for (;;) + { + __first1 = _GLIBCXX_STD_P::find(__first1, __last1, *__first2); + if (__first1 == __last1) + return __last1; + + __p = __p1; + __current = __first1; + if (++__current == __last1) + return __last1; + + while (*__current == *__p) + { + if (++__p == __last2) + return __first1; + if (++__current == __last1) + return __last1; + } + ++__first1; + } + return __first1; + } + + /** + * @brief Search a sequence for a matching sub-sequence using a predicate. + * @param first1 A forward iterator. + * @param last1 A forward iterator. + * @param first2 A forward iterator. + * @param last2 A forward iterator. + * @param predicate A binary predicate. + * @return The first iterator @c i in the range + * @p [first1,last1-(last2-first2)) such that + * @p predicate(*(i+N),*(first2+N)) is true for each @c N in the range + * @p [0,last2-first2), or @p last1 if no such iterator exists. + * + * Searches the range @p [first1,last1) for a sub-sequence that compares + * equal value-by-value with the sequence given by @p [first2,last2), + * using @p predicate to determine equality, and returns an iterator + * to the first element of the sub-sequence, or @p last1 if no such + * iterator exists. + * + * @see search(_ForwardIter1, _ForwardIter1, _ForwardIter2, _ForwardIter2) + */ + template + _ForwardIterator1 + search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _BinaryPredicate __predicate) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>) + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>) + __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, + typename iterator_traits<_ForwardIterator1>::value_type, + typename iterator_traits<_ForwardIterator2>::value_type>) + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + // Test for empty ranges + if (__first1 == __last1 || __first2 == __last2) + return __first1; + + // Test for a pattern of length 1. + _ForwardIterator2 __p1(__first2); + if (++__p1 == __last2) + { + while (__first1 != __last1 + && !bool(__predicate(*__first1, *__first2))) + ++__first1; + return __first1; + } + + // General case. + _ForwardIterator2 __p; + _ForwardIterator1 __current = __first1; + + for (;;) + { + while (__first1 != __last1 + && !bool(__predicate(*__first1, *__first2))) + ++__first1; + if (__first1 == __last1) + return __last1; + + __p = __p1; + __current = __first1; + if (++__current == __last1) + return __last1; + + while (__predicate(*__current, *__p)) + { + if (++__p == __last2) + return __first1; + if (++__current == __last1) + return __last1; + } + ++__first1; + } + return __first1; + } + + + /** + * @brief Search a sequence for a number of consecutive values. + * @param first A forward iterator. + * @param last A forward iterator. + * @param count The number of consecutive values. + * @param val The value to find. + * @return The first iterator @c i in the range @p [first,last-count) + * such that @c *(i+N) == @p val for each @c N in the range @p [0,count), + * or @p last if no such iterator exists. + * + * Searches the range @p [first,last) for @p count consecutive elements + * equal to @p val. + */ + template + _ForwardIterator + search_n(_ForwardIterator __first, _ForwardIterator __last, + _Integer __count, const _Tp& __val) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_ForwardIterator>::value_type, _Tp>) + __glibcxx_requires_valid_range(__first, __last); + + if (__count <= 0) + return __first; + if (__count == 1) + return _GLIBCXX_STD_P::find(__first, __last, __val); + return std::__search_n(__first, __last, __count, __val, + std::__iterator_category(__first)); + } + + + /** + * @brief Search a sequence for a number of consecutive values using a + * predicate. + * @param first A forward iterator. + * @param last A forward iterator. + * @param count The number of consecutive values. + * @param val The value to find. + * @param binary_pred A binary predicate. + * @return The first iterator @c i in the range @p [first,last-count) + * such that @p binary_pred(*(i+N),val) is true for each @c N in the + * range @p [0,count), or @p last if no such iterator exists. + * + * Searches the range @p [first,last) for @p count consecutive elements + * for which the predicate returns true. + */ + template + _ForwardIterator + search_n(_ForwardIterator __first, _ForwardIterator __last, + _Integer __count, const _Tp& __val, + _BinaryPredicate __binary_pred) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_BinaryPredicate, + typename iterator_traits<_ForwardIterator>::value_type, _Tp>) + __glibcxx_requires_valid_range(__first, __last); + + if (__count <= 0) + return __first; + if (__count == 1) + { + while (__first != __last && !bool(__binary_pred(*__first, __val))) + ++__first; + return __first; + } + return std::__search_n(__first, __last, __count, __val, __binary_pred, + std::__iterator_category(__first)); + } + + + /** + * @brief Perform an operation on a sequence. + * @param first An input iterator. + * @param last An input iterator. + * @param result An output iterator. + * @param unary_op A unary operator. + * @return An output iterator equal to @p result+(last-first). + * + * Applies the operator to each element in the input range and assigns + * the results to successive elements of the output sequence. + * Evaluates @p *(result+N)=unary_op(*(first+N)) for each @c N in the + * range @p [0,last-first). + * + * @p unary_op must not alter its argument. + */ + template + _OutputIterator + transform(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + // "the type returned by a _UnaryOperation" + __typeof__(__unary_op(*__first))>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first, ++__result) + *__result = __unary_op(*__first); + return __result; + } + + /** + * @brief Perform an operation on corresponding elements of two sequences. + * @param first1 An input iterator. + * @param last1 An input iterator. + * @param first2 An input iterator. + * @param result An output iterator. + * @param binary_op A binary operator. + * @return An output iterator equal to @p result+(last-first). + * + * Applies the operator to the corresponding elements in the two + * input ranges and assigns the results to successive elements of the + * output sequence. + * Evaluates @p *(result+N)=binary_op(*(first1+N),*(first2+N)) for each + * @c N in the range @p [0,last1-first1). + * + * @p binary_op must not alter either of its arguments. + */ + template + _OutputIterator + transform(_InputIterator1 __first1, _InputIterator1 __last1, + _InputIterator2 __first2, _OutputIterator __result, + _BinaryOperation __binary_op) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + // "the type returned by a _BinaryOperation" + __typeof__(__binary_op(*__first1,*__first2))>) + __glibcxx_requires_valid_range(__first1, __last1); + + for (; __first1 != __last1; ++__first1, ++__first2, ++__result) + *__result = __binary_op(*__first1, *__first2); + return __result; + } + + /** + * @brief Replace each occurrence of one value in a sequence with another + * value. + * @param first A forward iterator. + * @param last A forward iterator. + * @param old_value The value to be replaced. + * @param new_value The replacement value. + * @return replace() returns no value. + * + * For each iterator @c i in the range @p [first,last) if @c *i == + * @p old_value then the assignment @c *i = @p new_value is performed. + */ + template + void + replace(_ForwardIterator __first, _ForwardIterator __last, + const _Tp& __old_value, const _Tp& __new_value) + { + // concept requirements + __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< + _ForwardIterator>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_ForwardIterator>::value_type, _Tp>) + __glibcxx_function_requires(_ConvertibleConcept<_Tp, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first) + if (*__first == __old_value) + *__first = __new_value; + } + + /** + * @brief Replace each value in a sequence for which a predicate returns + * true with another value. + * @param first A forward iterator. + * @param last A forward iterator. + * @param pred A predicate. + * @param new_value The replacement value. + * @return replace_if() returns no value. + * + * For each iterator @c i in the range @p [first,last) if @p pred(*i) + * is true then the assignment @c *i = @p new_value is performed. + */ + template + void + replace_if(_ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred, const _Tp& __new_value) + { + // concept requirements + __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< + _ForwardIterator>) + __glibcxx_function_requires(_ConvertibleConcept<_Tp, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first) + if (__pred(*__first)) + *__first = __new_value; + } + + /** + * @brief Assign the result of a function object to each value in a + * sequence. + * @param first A forward iterator. + * @param last A forward iterator. + * @param gen A function object taking no arguments. + * @return generate() returns no value. + * + * Performs the assignment @c *i = @p gen() for each @c i in the range + * @p [first,last). + */ + template + void + generate(_ForwardIterator __first, _ForwardIterator __last, + _Generator __gen) + { + // concept requirements + __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>) + __glibcxx_function_requires(_GeneratorConcept<_Generator, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + for (; __first != __last; ++__first) + *__first = __gen(); + } + + /** + * @brief Assign the result of a function object to each value in a + * sequence. + * @param first A forward iterator. + * @param n The length of the sequence. + * @param gen A function object taking no arguments. + * @return The end of the sequence, @p first+n + * + * Performs the assignment @c *i = @p gen() for each @c i in the range + * @p [first,first+n). + */ + template + _OutputIterator + generate_n(_OutputIterator __first, _Size __n, _Generator __gen) + { + // concept requirements + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + // "the type returned by a _Generator" + __typeof__(__gen())>) + + for (; __n > 0; --__n, ++__first) + *__first = __gen(); + return __first; + } + + + /** + * @brief Copy a sequence, removing consecutive duplicate values. + * @param first An input iterator. + * @param last An input iterator. + * @param result An output iterator. + * @return An iterator designating the end of the resulting sequence. + * + * Copies each element in the range @p [first,last) to the range + * beginning at @p result, except that only the first element is copied + * from groups of consecutive elements that compare equal. + * unique_copy() is stable, so the relative order of elements that are + * copied is unchanged. + * + * @if maint + * _GLIBCXX_RESOLVE_LIB_DEFECTS + * DR 241. Does unique_copy() require CopyConstructible and Assignable? + * + * _GLIBCXX_RESOLVE_LIB_DEFECTS + * DR 538. 241 again: Does unique_copy() require CopyConstructible and + * Assignable? + * @endif + */ + template + inline _OutputIterator + unique_copy(_InputIterator __first, _InputIterator __last, + _OutputIterator __result) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_function_requires(_EqualityComparableConcept< + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return __result; + return std::__unique_copy(__first, __last, __result, + std::__iterator_category(__first), + std::__iterator_category(__result)); + } + + /** + * @brief Copy a sequence, removing consecutive values using a predicate. + * @param first An input iterator. + * @param last An input iterator. + * @param result An output iterator. + * @param binary_pred A binary predicate. + * @return An iterator designating the end of the resulting sequence. + * + * Copies each element in the range @p [first,last) to the range + * beginning at @p result, except that only the first element is copied + * from groups of consecutive elements for which @p binary_pred returns + * true. + * unique_copy() is stable, so the relative order of elements that are + * copied is unchanged. + * + * @if maint + * _GLIBCXX_RESOLVE_LIB_DEFECTS + * DR 241. Does unique_copy() require CopyConstructible and Assignable? + * @endif + */ + template + inline _OutputIterator + unique_copy(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, + _BinaryPredicate __binary_pred) + { + // concept requirements -- predicates checked later + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + typename iterator_traits<_InputIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return __result; + return std::__unique_copy(__first, __last, __result, __binary_pred, + std::__iterator_category(__first), + std::__iterator_category(__result)); + } + + + /** + * @brief Randomly shuffle the elements of a sequence. + * @param first A forward iterator. + * @param last A forward iterator. + * @return Nothing. + * + * Reorder the elements in the range @p [first,last) using a random + * distribution, so that every possible ordering of the sequence is + * equally likely. + */ + template + inline void + random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last) + { + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first != __last) + for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) + std::iter_swap(__i, __first + (std::rand() % ((__i - __first) + 1))); + } + + /** + * @brief Shuffle the elements of a sequence using a random number + * generator. + * @param first A forward iterator. + * @param last A forward iterator. + * @param rand The RNG functor or function. + * @return Nothing. + * + * Reorders the elements in the range @p [first,last) using @p rand to + * provide a random distribution. Calling @p rand(N) for a positive + * integer @p N should return a randomly chosen integer from the + * range [0,N). + */ + template + void + random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last, + _RandomNumberGenerator& __rand) + { + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first == __last) + return; + for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) + std::iter_swap(__i, __first + __rand((__i - __first) + 1)); + } + + + /** + * @brief Move elements for which a predicate is true to the beginning + * of a sequence. + * @param first A forward iterator. + * @param last A forward iterator. + * @param pred A predicate functor. + * @return An iterator @p middle such that @p pred(i) is true for each + * iterator @p i in the range @p [first,middle) and false for each @p i + * in the range @p [middle,last). + * + * @p pred must not modify its operand. @p partition() does not preserve + * the relative ordering of elements in each group, use + * @p stable_partition() if this is needed. + */ + template + inline _ForwardIterator + partition(_ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred) + { + // concept requirements + __glibcxx_function_requires(_Mutable_ForwardIteratorConcept< + _ForwardIterator>) + __glibcxx_function_requires(_UnaryPredicateConcept<_Predicate, + typename iterator_traits<_ForwardIterator>::value_type>) + __glibcxx_requires_valid_range(__first, __last); + + return std::__partition(__first, __last, __pred, + std::__iterator_category(__first)); + } + + + + /** + * @brief Sort the smallest elements of a sequence. + * @param first An iterator. + * @param middle Another iterator. + * @param last Another iterator. + * @return Nothing. + * + * Sorts the smallest @p (middle-first) elements in the range + * @p [first,last) and moves them to the range @p [first,middle). The + * order of the remaining elements in the range @p [middle,last) is + * undefined. + * After the sort if @p i and @j are iterators in the range + * @p [first,middle) such that @i precedes @j and @k is an iterator in + * the range @p [middle,last) then @p *j<*i and @p *k<*i are both false. + */ + template + inline void + partial_sort(_RandomAccessIterator __first, + _RandomAccessIterator __middle, + _RandomAccessIterator __last) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) + __glibcxx_requires_valid_range(__first, __middle); + __glibcxx_requires_valid_range(__middle, __last); + + std::__heap_select(__first, __middle, __last); + std::sort_heap(__first, __middle); + } + + /** + * @brief Sort the smallest elements of a sequence using a predicate + * for comparison. + * @param first An iterator. + * @param middle Another iterator. + * @param last Another iterator. + * @param comp A comparison functor. + * @return Nothing. + * + * Sorts the smallest @p (middle-first) elements in the range + * @p [first,last) and moves them to the range @p [first,middle). The + * order of the remaining elements in the range @p [middle,last) is + * undefined. + * After the sort if @p i and @j are iterators in the range + * @p [first,middle) such that @i precedes @j and @k is an iterator in + * the range @p [middle,last) then @p *comp(j,*i) and @p comp(*k,*i) + * are both false. + */ + template + inline void + partial_sort(_RandomAccessIterator __first, + _RandomAccessIterator __middle, + _RandomAccessIterator __last, + _Compare __comp) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + _ValueType, _ValueType>) + __glibcxx_requires_valid_range(__first, __middle); + __glibcxx_requires_valid_range(__middle, __last); + + std::__heap_select(__first, __middle, __last, __comp); + std::sort_heap(__first, __middle, __comp); + } + + /** + * @brief Sort a sequence just enough to find a particular position. + * @param first An iterator. + * @param nth Another iterator. + * @param last Another iterator. + * @return Nothing. + * + * Rearranges the elements in the range @p [first,last) so that @p *nth + * is the same element that would have been in that position had the + * whole sequence been sorted. + * whole sequence been sorted. The elements either side of @p *nth are + * not completely sorted, but for any iterator @i in the range + * @p [first,nth) and any iterator @j in the range @p [nth,last) it + * holds that @p *j<*i is false. + */ + template + inline void + nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) + __glibcxx_requires_valid_range(__first, __nth); + __glibcxx_requires_valid_range(__nth, __last); + + if (__first == __last || __nth == __last) + return; + + std::__introselect(__first, __nth, __last, + std::__lg(__last - __first) * 2); + } + + /** + * @brief Sort a sequence just enough to find a particular position + * using a predicate for comparison. + * @param first An iterator. + * @param nth Another iterator. + * @param last Another iterator. + * @param comp A comparison functor. + * @return Nothing. + * + * Rearranges the elements in the range @p [first,last) so that @p *nth + * is the same element that would have been in that position had the + * whole sequence been sorted. The elements either side of @p *nth are + * not completely sorted, but for any iterator @i in the range + * @p [first,nth) and any iterator @j in the range @p [nth,last) it + * holds that @p comp(*j,*i) is false. + */ + template + inline void + nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + _ValueType, _ValueType>) + __glibcxx_requires_valid_range(__first, __nth); + __glibcxx_requires_valid_range(__nth, __last); + + if (__first == __last || __nth == __last) + return; + + std::__introselect(__first, __nth, __last, + std::__lg(__last - __first) * 2, __comp); + } + + + /** + * @brief Sort the elements of a sequence. + * @param first An iterator. + * @param last Another iterator. + * @return Nothing. + * + * Sorts the elements in the range @p [first,last) in ascending order, + * such that @p *(i+1)<*i is false for each iterator @p i in the range + * @p [first,last-1). + * + * The relative ordering of equivalent elements is not preserved, use + * @p stable_sort() if this is needed. + */ + template + inline void + sort(_RandomAccessIterator __first, _RandomAccessIterator __last) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first != __last) + { + std::__introsort_loop(__first, __last, + std::__lg(__last - __first) * 2); + std::__final_insertion_sort(__first, __last); + } + } + + /** + * @brief Sort the elements of a sequence using a predicate for comparison. + * @param first An iterator. + * @param last Another iterator. + * @param comp A comparison functor. + * @return Nothing. + * + * Sorts the elements in the range @p [first,last) in ascending order, + * such that @p comp(*(i+1),*i) is false for every iterator @p i in the + * range @p [first,last-1). + * + * The relative ordering of equivalent elements is not preserved, use + * @p stable_sort() if this is needed. + */ + template + inline void + sort(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, _ValueType, + _ValueType>) + __glibcxx_requires_valid_range(__first, __last); + + if (__first != __last) + { + std::__introsort_loop(__first, __last, + std::__lg(__last - __first) * 2, __comp); + std::__final_insertion_sort(__first, __last, __comp); + } + } + + /** + * @brief Merges two sorted ranges. + * @param first1 An iterator. + * @param first2 Another iterator. + * @param last1 Another iterator. + * @param last2 Another iterator. + * @param result An iterator pointing to the end of the merged range. + * @return An iterator pointing to the first element "not less + * than" @a val. + * + * Merges the ranges [first1,last1) and [first2,last2) into the sorted range + * [result, result + (last1-first1) + (last2-first2)). Both input ranges + * must be sorted, and the output range must not overlap with either of + * the input ranges. The sort is @e stable, that is, for equivalent + * elements in the two ranges, elements from the first range will always + * come before elements from the second. + */ + template + _OutputIterator + merge(_InputIterator1 __first1, _InputIterator1 __last1, + _InputIterator2 __first2, _InputIterator2 __last2, + _OutputIterator __result) + { + typedef typename iterator_traits<_InputIterator1>::value_type + _ValueType1; + typedef typename iterator_traits<_InputIterator2>::value_type + _ValueType2; + + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + _ValueType1>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + _ValueType2>) + __glibcxx_function_requires(_LessThanOpConcept<_ValueType2, _ValueType1>) + __glibcxx_requires_sorted(__first1, __last1); + __glibcxx_requires_sorted(__first2, __last2); + + while (__first1 != __last1 && __first2 != __last2) + { + if (*__first2 < *__first1) + { + *__result = *__first2; + ++__first2; + } + else + { + *__result = *__first1; + ++__first1; + } + ++__result; + } + return std::copy(__first2, __last2, std::copy(__first1, __last1, + __result)); + } + + /** + * @brief Merges two sorted ranges. + * @param first1 An iterator. + * @param first2 Another iterator. + * @param last1 Another iterator. + * @param last2 Another iterator. + * @param result An iterator pointing to the end of the merged range. + * @param comp A functor to use for comparisons. + * @return An iterator pointing to the first element "not less + * than" @a val. + * + * Merges the ranges [first1,last1) and [first2,last2) into the sorted range + * [result, result + (last1-first1) + (last2-first2)). Both input ranges + * must be sorted, and the output range must not overlap with either of + * the input ranges. The sort is @e stable, that is, for equivalent + * elements in the two ranges, elements from the first range will always + * come before elements from the second. + * + * The comparison function should have the same effects on ordering as + * the function used for the initial sort. + */ + template + _OutputIterator + merge(_InputIterator1 __first1, _InputIterator1 __last1, + _InputIterator2 __first2, _InputIterator2 __last2, + _OutputIterator __result, _Compare __comp) + { + typedef typename iterator_traits<_InputIterator1>::value_type + _ValueType1; + typedef typename iterator_traits<_InputIterator2>::value_type + _ValueType2; + + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + _ValueType1>) + __glibcxx_function_requires(_OutputIteratorConcept<_OutputIterator, + _ValueType2>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + _ValueType2, _ValueType1>) + __glibcxx_requires_sorted_pred(__first1, __last1, __comp); + __glibcxx_requires_sorted_pred(__first2, __last2, __comp); + + while (__first1 != __last1 && __first2 != __last2) + { + if (__comp(*__first2, *__first1)) + { + *__result = *__first2; + ++__first2; + } + else + { + *__result = *__first1; + ++__first1; + } + ++__result; + } + return std::copy(__first2, __last2, std::copy(__first1, __last1, + __result)); + } + + + /** + * @brief Sort the elements of a sequence, preserving the relative order + * of equivalent elements. + * @param first An iterator. + * @param last Another iterator. + * @return Nothing. + * + * Sorts the elements in the range @p [first,last) in ascending order, + * such that @p *(i+1)<*i is false for each iterator @p i in the range + * @p [first,last-1). + * + * The relative ordering of equivalent elements is preserved, so any two + * elements @p x and @p y in the range @p [first,last) such that + * @p x + inline void + stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type + _DistanceType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_LessThanComparableConcept<_ValueType>) + __glibcxx_requires_valid_range(__first, __last); + + _Temporary_buffer<_RandomAccessIterator, _ValueType> __buf(__first, + __last); + if (__buf.begin() == 0) + std::__inplace_stable_sort(__first, __last); + else + std::__stable_sort_adaptive(__first, __last, __buf.begin(), + _DistanceType(__buf.size())); + } + + /** + * @brief Sort the elements of a sequence using a predicate for comparison, + * preserving the relative order of equivalent elements. + * @param first An iterator. + * @param last Another iterator. + * @param comp A comparison functor. + * @return Nothing. + * + * Sorts the elements in the range @p [first,last) in ascending order, + * such that @p comp(*(i+1),*i) is false for each iterator @p i in the + * range @p [first,last-1). + * + * The relative ordering of equivalent elements is preserved, so any two + * elements @p x and @p y in the range @p [first,last) such that + * @p comp(x,y) is false and @p comp(y,x) is false will have the same + * relative ordering after calling @p stable_sort(). + */ + template + inline void + stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type + _ValueType; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type + _DistanceType; + + // concept requirements + __glibcxx_function_requires(_Mutable_RandomAccessIteratorConcept< + _RandomAccessIterator>) + __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, + _ValueType, + _ValueType>) + __glibcxx_requires_valid_range(__first, __last); + + _Temporary_buffer<_RandomAccessIterator, _ValueType> __buf(__first, + __last); + if (__buf.begin() == 0) + std::__inplace_stable_sort(__first, __last, __comp); + else + std::__stable_sort_adaptive(__first, __last, __buf.begin(), + _DistanceType(__buf.size()), __comp); + } + + /** * @brief Return the union of two sorted ranges. * @param first1 Start of first range. @@ -5167,12 +5383,10 @@ _GLIBCXX_BEGIN_NAMESPACE(std) ++__first1; ++__first2; } - return std::copy(__first2, __last2, std::copy(__first1, - __last1, __result)); + return std::copy(__first2, __last2, + std::copy(__first1, __last1, __result)); } - // min_element and max_element, with and without an explicitly supplied - // comparison function. /** * @brief Return the minimum element in a range. @@ -5281,230 +5495,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __result; } - // next_permutation and prev_permutation, with and without an explicitly - // supplied comparison function. - - /** - * @brief Permute range into the next "dictionary" ordering. - * @param first Start of range. - * @param last End of range. - * @return False if wrapped to first permutation, true otherwise. - * - * Treats all permutations of the range as a set of "dictionary" sorted - * sequences. Permutes the current sequence into the next one of this set. - * Returns true if there are more sequences to generate. If the sequence - * is the largest of the set, the smallest is generated and false returned. - */ - template - bool - next_permutation(_BidirectionalIterator __first, - _BidirectionalIterator __last) - { - // concept requirements - __glibcxx_function_requires(_BidirectionalIteratorConcept< - _BidirectionalIterator>) - __glibcxx_function_requires(_LessThanComparableConcept< - typename iterator_traits<_BidirectionalIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return false; - _BidirectionalIterator __i = __first; - ++__i; - if (__i == __last) - return false; - __i = __last; - --__i; - - for(;;) - { - _BidirectionalIterator __ii = __i; - --__i; - if (*__i < *__ii) - { - _BidirectionalIterator __j = __last; - while (!(*__i < *--__j)) - {} - std::iter_swap(__i, __j); - std::reverse(__ii, __last); - return true; - } - if (__i == __first) - { - std::reverse(__first, __last); - return false; - } - } - } - - /** - * @brief Permute range into the next "dictionary" ordering using - * comparison functor. - * @param first Start of range. - * @param last End of range. - * @param comp - * @return False if wrapped to first permutation, true otherwise. - * - * Treats all permutations of the range [first,last) as a set of - * "dictionary" sorted sequences ordered by @a comp. Permutes the current - * sequence into the next one of this set. Returns true if there are more - * sequences to generate. If the sequence is the largest of the set, the - * smallest is generated and false returned. - */ - template - bool - next_permutation(_BidirectionalIterator __first, - _BidirectionalIterator __last, _Compare __comp) - { - // concept requirements - __glibcxx_function_requires(_BidirectionalIteratorConcept< - _BidirectionalIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - typename iterator_traits<_BidirectionalIterator>::value_type, - typename iterator_traits<_BidirectionalIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return false; - _BidirectionalIterator __i = __first; - ++__i; - if (__i == __last) - return false; - __i = __last; - --__i; - - for(;;) - { - _BidirectionalIterator __ii = __i; - --__i; - if (__comp(*__i, *__ii)) - { - _BidirectionalIterator __j = __last; - while (!bool(__comp(*__i, *--__j))) - {} - std::iter_swap(__i, __j); - std::reverse(__ii, __last); - return true; - } - if (__i == __first) - { - std::reverse(__first, __last); - return false; - } - } - } - - /** - * @brief Permute range into the previous "dictionary" ordering. - * @param first Start of range. - * @param last End of range. - * @return False if wrapped to last permutation, true otherwise. - * - * Treats all permutations of the range as a set of "dictionary" sorted - * sequences. Permutes the current sequence into the previous one of this - * set. Returns true if there are more sequences to generate. If the - * sequence is the smallest of the set, the largest is generated and false - * returned. - */ - template - bool - prev_permutation(_BidirectionalIterator __first, - _BidirectionalIterator __last) - { - // concept requirements - __glibcxx_function_requires(_BidirectionalIteratorConcept< - _BidirectionalIterator>) - __glibcxx_function_requires(_LessThanComparableConcept< - typename iterator_traits<_BidirectionalIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return false; - _BidirectionalIterator __i = __first; - ++__i; - if (__i == __last) - return false; - __i = __last; - --__i; - - for(;;) - { - _BidirectionalIterator __ii = __i; - --__i; - if (*__ii < *__i) - { - _BidirectionalIterator __j = __last; - while (!(*--__j < *__i)) - {} - std::iter_swap(__i, __j); - std::reverse(__ii, __last); - return true; - } - if (__i == __first) - { - std::reverse(__first, __last); - return false; - } - } - } - - /** - * @brief Permute range into the previous "dictionary" ordering using - * comparison functor. - * @param first Start of range. - * @param last End of range. - * @param comp - * @return False if wrapped to last permutation, true otherwise. - * - * Treats all permutations of the range [first,last) as a set of - * "dictionary" sorted sequences ordered by @a comp. Permutes the current - * sequence into the previous one of this set. Returns true if there are - * more sequences to generate. If the sequence is the smallest of the set, - * the largest is generated and false returned. - */ - template - bool - prev_permutation(_BidirectionalIterator __first, - _BidirectionalIterator __last, _Compare __comp) - { - // concept requirements - __glibcxx_function_requires(_BidirectionalIteratorConcept< - _BidirectionalIterator>) - __glibcxx_function_requires(_BinaryPredicateConcept<_Compare, - typename iterator_traits<_BidirectionalIterator>::value_type, - typename iterator_traits<_BidirectionalIterator>::value_type>) - __glibcxx_requires_valid_range(__first, __last); - - if (__first == __last) - return false; - _BidirectionalIterator __i = __first; - ++__i; - if (__i == __last) - return false; - __i = __last; - --__i; - - for(;;) - { - _BidirectionalIterator __ii = __i; - --__i; - if (__comp(*__ii, *__i)) - { - _BidirectionalIterator __j = __last; - while (!bool(__comp(*--__j, *__i))) - {} - std::iter_swap(__i, __j); - std::reverse(__ii, __last); - return true; - } - if (__i == __first) - { - std::reverse(__first, __last); - return false; - } - } - } - -_GLIBCXX_END_NAMESPACE +_GLIBCXX_END_NESTED_NAMESPACE #endif /* _STL_ALGO_H */ diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h index 4146b21df5b..c6648b43b9d 100644 --- a/libstdc++-v3/include/bits/stl_algobase.h +++ b/libstdc++-v3/include/bits/stl_algobase.h @@ -65,11 +65,10 @@ #include #include #include -#include #include #include #include -#include +#include #include #include #include @@ -596,7 +595,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __niter_base<_ForwardIterator>::__b(__last), __value); } - template struct __fill_n { @@ -678,7 +676,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __value)); } - template struct __equal { @@ -719,6 +716,90 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return std::__equal<__simple>::equal(__first1, __last1, __first2); } + + template + struct __lc_rai + { + template + static _II1 + __newlast1(_II1, _II1 __last1, _II2, _II2) + { return __last1; } + + template + static bool + __cnd2(_II __first, _II __last) + { return __first != __last; } + }; + + template<> + struct __lc_rai + { + template + static _RAI1 + __newlast1(_RAI1 __first1, _RAI1 __last1, + _RAI2 __first2, _RAI2 __last2) + { + const typename iterator_traits<_RAI1>::difference_type + __diff1 = __last1 - __first1; + const typename iterator_traits<_RAI2>::difference_type + __diff2 = __last2 - __first2; + return __diff2 < __diff1 ? __first1 + __diff2 : __last1; + } + + template + static bool + __cnd2(_RAI, _RAI) + { return true; } + }; + + // XXX should these be enabled-if'd for signed/unsigned types instead? + inline bool + lexicographical_compare(const unsigned char* __first1, + const unsigned char* __last1, + const unsigned char* __first2, + const unsigned char* __last2) + { + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + const size_t __len1 = __last1 - __first1; + const size_t __len2 = __last2 - __first2; + const int __result = __builtin_memcmp(__first1, __first2, + std::min(__len1, __len2)); + return __result != 0 ? __result < 0 : __len1 < __len2; + } + + inline bool + lexicographical_compare(const char* __first1, const char* __last1, + const char* __first2, const char* __last2) + { + __glibcxx_requires_valid_range(__first1, __last1); + __glibcxx_requires_valid_range(__first2, __last2); + + if (__gnu_cxx::__numeric_traits::__is_signed) + { + typedef const signed char* value_type; + value_type __f1 = reinterpret_cast(__first1); + value_type __l1 = reinterpret_cast(__last1); + value_type __f2 = reinterpret_cast(__first2); + value_type __l2 = reinterpret_cast(__last2); + return _GLIBCXX_STD_P::lexicographical_compare(__f1, __l1, __f2, __l2); + } + else + { + typedef const unsigned char* value_type; + value_type __f1 = reinterpret_cast(__first1); + value_type __l1 = reinterpret_cast(__last1); + value_type __f2 = reinterpret_cast(__first2); + value_type __l2 = reinterpret_cast(__last2); + return _GLIBCXX_STD_P::lexicographical_compare(__f1, __l1, __f2, __l2); + } + } + +_GLIBCXX_END_NAMESPACE + +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_P) + /** * @brief Tests a range for element-wise equality. * @param first1 An input iterator. @@ -752,24 +833,23 @@ _GLIBCXX_BEGIN_NAMESPACE(std) * @param first1 An input iterator. * @param last1 An input iterator. * @param first2 An input iterator. - * @param binary_pred A binary predicate @link s20_3_1_base functor@endlink. - * @return A boolean true or false. + * @param binary_pred A binary predicate @link s20_3_1_base + * functor@endlink. + * @return A boolean true or false. * * This compares the elements of two ranges using the binary_pred * parameter, and returns true or * false depending on whether all of the corresponding elements of the * ranges are equal. */ - template + template inline bool - equal(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, - _BinaryPredicate __binary_pred) + equal(_IIter1 __first1, _IIter1 __last1, + _IIter2 __first2, _BinaryPredicate __binary_pred) { // concept requirements - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) - __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_function_requires(_InputIteratorConcept<_IIter1>) + __glibcxx_function_requires(_InputIteratorConcept<_IIter2>) __glibcxx_requires_valid_range(__first1, __last1); for (; __first1 != __last1; ++__first1, ++__first2) @@ -778,43 +858,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return true; } - - template - struct __lc_rai - { - template - static _II1 - __newlast1(_II1, _II1 __last1, _II2, _II2) - { return __last1; } - - template - static bool - __cnd2(_II __first, _II __last) - { return __first != __last; } - }; - - template<> - struct __lc_rai - { - template - static _RAI1 - __newlast1(_RAI1 __first1, _RAI1 __last1, - _RAI2 __first2, _RAI2 __last2) - { - const typename iterator_traits<_RAI1>::difference_type - __diff1 = __last1 - __first1; - const typename iterator_traits<_RAI2>::difference_type - __diff2 = __last2 - __first2; - return __diff2 < __diff1 ? __first1 + __diff2 : __last1; - } - - template - static bool - __cnd2(_RAI, _RAI) - { return true; } - }; - /** * @brief Performs "dictionary" comparison on ranges. * @param first1 An input iterator. @@ -831,7 +874,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) */ template bool - lexicographical_compare(_II1 __first1, _II1 __last1, + lexicographical_compare(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2) { typedef typename iterator_traits<_II1>::iterator_category _Category1; @@ -882,6 +925,7 @@ _GLIBCXX_BEGIN_NAMESPACE(std) { typedef typename iterator_traits<_II1>::iterator_category _Category1; typedef typename iterator_traits<_II2>::iterator_category _Category2; + typedef __lc_rai<_Category1, _Category2> __rai_type; // concept requirements __glibcxx_function_requires(_InputIteratorConcept<_II1>) @@ -889,12 +933,8 @@ _GLIBCXX_BEGIN_NAMESPACE(std) __glibcxx_requires_valid_range(__first1, __last1); __glibcxx_requires_valid_range(__first2, __last2); - __last1 = __lc_rai<_Category1, _Category2>::__newlast1(__first1, - __last1, - __first2, - __last2); - for (; __first1 != __last1 - && __lc_rai<_Category1, _Category2>::__cnd2(__first2, __last2); + __last1 = __rai_type::__newlast1(__first1, __last1, __first2, __last2); + for (; __first1 != __last1 && __rai_type::__cnd2(__first2, __last2); ++__first1, ++__first2) { if (__comp(*__first1, *__first2)) @@ -905,41 +945,82 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return __first1 == __last1 && __first2 != __last2; } - inline bool - lexicographical_compare(const unsigned char* __first1, - const unsigned char* __last1, - const unsigned char* __first2, - const unsigned char* __last2) - { - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); - const size_t __len1 = __last1 - __first1; - const size_t __len2 = __last2 - __first2; - const int __result = __builtin_memcmp(__first1, __first2, - std::min(__len1, __len2)); - return __result != 0 ? __result < 0 : __len1 < __len2; - } + /** + * @brief Finds the places in ranges which don't match. + * @param first1 An input iterator. + * @param last1 An input iterator. + * @param first2 An input iterator. + * @return A pair of iterators pointing to the first mismatch. + * + * This compares the elements of two ranges using @c == and returns a pair + * of iterators. The first iterator points into the first range, the + * second iterator points into the second range, and the elements pointed + * to by the iterators are not equal. + */ + template + pair<_InputIterator1, _InputIterator2> + mismatch(_InputIterator1 __first1, _InputIterator1 __last1, + _InputIterator2 __first2) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_function_requires(_EqualOpConcept< + typename iterator_traits<_InputIterator1>::value_type, + typename iterator_traits<_InputIterator2>::value_type>) + __glibcxx_requires_valid_range(__first1, __last1); - inline bool - lexicographical_compare(const char* __first1, const char* __last1, - const char* __first2, const char* __last2) - { - __glibcxx_requires_valid_range(__first1, __last1); - __glibcxx_requires_valid_range(__first2, __last2); + while (__first1 != __last1 && *__first1 == *__first2) + { + ++__first1; + ++__first2; + } + return pair<_InputIterator1, _InputIterator2>(__first1, __first2); + } - if (__gnu_cxx::__numeric_traits::__is_signed) - return std::lexicographical_compare((const signed char*) __first1, - (const signed char*) __last1, - (const signed char*) __first2, - (const signed char*) __last2); - else - return std::lexicographical_compare((const unsigned char*) __first1, - (const unsigned char*) __last1, - (const unsigned char*) __first2, - (const unsigned char*) __last2); - } + /** + * @brief Finds the places in ranges which don't match. + * @param first1 An input iterator. + * @param last1 An input iterator. + * @param first2 An input iterator. + * @param binary_pred A binary predicate @link s20_3_1_base + * functor@endlink. + * @return A pair of iterators pointing to the first mismatch. + * + * This compares the elements of two ranges using the binary_pred + * parameter, and returns a pair + * of iterators. The first iterator points into the first range, the + * second iterator points into the second range, and the elements pointed + * to by the iterators are not equal. + */ + template + pair<_InputIterator1, _InputIterator2> + mismatch(_InputIterator1 __first1, _InputIterator1 __last1, + _InputIterator2 __first2, _BinaryPredicate __binary_pred) + { + // concept requirements + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>) + __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>) + __glibcxx_requires_valid_range(__first1, __last1); -_GLIBCXX_END_NAMESPACE + while (__first1 != __last1 && bool(__binary_pred(*__first1, *__first2))) + { + ++__first1; + ++__first2; + } + return pair<_InputIterator1, _InputIterator2>(__first1, __first2); + } + +_GLIBCXX_END_NESTED_NAMESPACE + +// NB: This file is included within many other C++ includes, as a way +// of getting the base algorithms. So, make sure that parallel bits +// come in too if requested. +#ifdef _GLIBCXX_PARALLEL +//# include +# include +#endif #endif diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h index a792b744c1f..a2a86f26f17 100644 --- a/libstdc++-v3/include/bits/stl_bvector.h +++ b/libstdc++-v3/include/bits/stl_bvector.h @@ -62,7 +62,7 @@ #ifndef _STL_BVECTOR_H #define _STL_BVECTOR_H 1 -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) typedef unsigned long _Bit_type; enum { _S_word_bit = int(__CHAR_BIT__ * sizeof(_Bit_type)) }; @@ -432,7 +432,7 @@ _GLIBCXX_END_NESTED_NAMESPACE // Declare a partial specialization of vector. #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @brief A specialization of vector for booleans which offers fixed time diff --git a/libstdc++-v3/include/bits/stl_deque.h b/libstdc++-v3/include/bits/stl_deque.h index 890afc58d41..a0553961a5b 100644 --- a/libstdc++-v3/include/bits/stl_deque.h +++ b/libstdc++-v3/include/bits/stl_deque.h @@ -66,7 +66,7 @@ #include #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @if maint diff --git a/libstdc++-v3/include/bits/stl_list.h b/libstdc++-v3/include/bits/stl_list.h index 44327dab4e6..dac02c8e616 100644 --- a/libstdc++-v3/include/bits/stl_list.h +++ b/libstdc++-v3/include/bits/stl_list.h @@ -64,7 +64,7 @@ #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) // Supporting structures are split into common and templated types; the // latter publicly inherits from the former in an effort to reduce code diff --git a/libstdc++-v3/include/bits/stl_map.h b/libstdc++-v3/include/bits/stl_map.h index 741b9e6c16f..e1429ef6956 100644 --- a/libstdc++-v3/include/bits/stl_map.h +++ b/libstdc++-v3/include/bits/stl_map.h @@ -65,7 +65,7 @@ #include #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @brief A standard container made up of (key,value) pairs, which can be diff --git a/libstdc++-v3/include/bits/stl_multimap.h b/libstdc++-v3/include/bits/stl_multimap.h index 67d81cbd7c7..5730854ee69 100644 --- a/libstdc++-v3/include/bits/stl_multimap.h +++ b/libstdc++-v3/include/bits/stl_multimap.h @@ -64,7 +64,7 @@ #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @brief A standard container made up of (key,value) pairs, which can be diff --git a/libstdc++-v3/include/bits/stl_multiset.h b/libstdc++-v3/include/bits/stl_multiset.h index 3f88e534893..dab8d6f47f3 100644 --- a/libstdc++-v3/include/bits/stl_multiset.h +++ b/libstdc++-v3/include/bits/stl_multiset.h @@ -64,7 +64,7 @@ #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @brief A standard container made up of elements, which can be retrieved diff --git a/libstdc++-v3/include/bits/stl_numeric.h b/libstdc++-v3/include/bits/stl_numeric.h index 8e5c69b8c04..3940e4aa970 100644 --- a/libstdc++-v3/include/bits/stl_numeric.h +++ b/libstdc++-v3/include/bits/stl_numeric.h @@ -65,7 +65,7 @@ #include #include -_GLIBCXX_BEGIN_NAMESPACE(std) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_P) /** * @brief Accumulate values in a range. @@ -336,6 +336,6 @@ _GLIBCXX_BEGIN_NAMESPACE(std) return ++__result; } -_GLIBCXX_END_NAMESPACE +_GLIBCXX_END_NESTED_NAMESPACE #endif /* _STL_NUMERIC_H */ diff --git a/libstdc++-v3/include/bits/stl_set.h b/libstdc++-v3/include/bits/stl_set.h index d2d8a6d47c4..3bddefc635d 100644 --- a/libstdc++-v3/include/bits/stl_set.h +++ b/libstdc++-v3/include/bits/stl_set.h @@ -64,7 +64,7 @@ #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @brief A standard container made up of unique keys, which can be diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index a942a37b714..93db4138bdd 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -66,7 +66,7 @@ #include #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @if maint diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index 09266a2a997..442447c27f1 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -62,7 +62,7 @@ #ifndef _VECTOR_TCC #define _VECTOR_TCC 1 -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) template void diff --git a/libstdc++-v3/include/debug/bitset b/libstdc++-v3/include/debug/bitset index 58d4e6b82d9..ba677122d74 100644 --- a/libstdc++-v3/include/debug/bitset +++ b/libstdc++-v3/include/debug/bitset @@ -45,10 +45,10 @@ namespace __debug { template class bitset - : public _GLIBCXX_STD::bitset<_Nb>, + : public _GLIBCXX_STD_D::bitset<_Nb>, public __gnu_debug::_Safe_sequence_base { - typedef _GLIBCXX_STD::bitset<_Nb> _Base; + typedef _GLIBCXX_STD_D::bitset<_Nb> _Base; typedef __gnu_debug::_Safe_sequence_base _Safe_base; public: diff --git a/libstdc++-v3/include/debug/debug.h b/libstdc++-v3/include/debug/debug.h index 95aa3d5b95e..d488f1587d8 100644 --- a/libstdc++-v3/include/debug/debug.h +++ b/libstdc++-v3/include/debug/debug.h @@ -113,6 +113,7 @@ namespace std #else # define _GLIBCXX_DEBUG_PEDASSERT(_Condition) #endif + # define _GLIBCXX_DEBUG_ONLY(_Statement) _Statement # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg) diff --git a/libstdc++-v3/include/debug/deque b/libstdc++-v3/include/debug/deque index 6e523b882d4..2bf4a19cf8b 100644 --- a/libstdc++-v3/include/debug/deque +++ b/libstdc++-v3/include/debug/deque @@ -45,10 +45,10 @@ namespace __debug { template > class deque - : public _GLIBCXX_STD::deque<_Tp, _Allocator>, + : public _GLIBCXX_STD_D::deque<_Tp, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::deque<_Tp, _Allocator> _Base; + typedef _GLIBCXX_STD_D::deque<_Tp, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/list b/libstdc++-v3/include/debug/list index 939fe4da48d..485df086551 100644 --- a/libstdc++-v3/include/debug/list +++ b/libstdc++-v3/include/debug/list @@ -73,10 +73,10 @@ namespace __debug { template > class list - : public _GLIBCXX_STD::list<_Tp, _Allocator>, + : public _GLIBCXX_STD_D::list<_Tp, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::list<_Tp, _Allocator> _Base; + typedef _GLIBCXX_STD_D::list<_Tp, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h index 96591ca15c8..d6195208664 100644 --- a/libstdc++-v3/include/debug/map.h +++ b/libstdc++-v3/include/debug/map.h @@ -46,10 +46,10 @@ namespace __debug template, typename _Allocator = std::allocator > > class map - : public _GLIBCXX_STD::map<_Key, _Tp, _Compare, _Allocator>, + : public _GLIBCXX_STD_D::map<_Key, _Tp, _Compare, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::map<_Key, _Tp, _Compare, _Allocator> _Base; + typedef _GLIBCXX_STD_D::map<_Key, _Tp, _Compare, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/multimap.h b/libstdc++-v3/include/debug/multimap.h index cbd6704520f..5e4962f9660 100644 --- a/libstdc++-v3/include/debug/multimap.h +++ b/libstdc++-v3/include/debug/multimap.h @@ -46,10 +46,10 @@ namespace __debug template, typename _Allocator = std::allocator > > class multimap - : public _GLIBCXX_STD::multimap<_Key, _Tp, _Compare, _Allocator>, + : public _GLIBCXX_STD_D::multimap<_Key, _Tp, _Compare, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::multimap<_Key, _Tp, _Compare, _Allocator> _Base; + typedef _GLIBCXX_STD_D::multimap<_Key, _Tp, _Compare, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/multiset.h b/libstdc++-v3/include/debug/multiset.h index a37099e1ac8..2ca534a6599 100644 --- a/libstdc++-v3/include/debug/multiset.h +++ b/libstdc++-v3/include/debug/multiset.h @@ -46,10 +46,10 @@ namespace __debug template, typename _Allocator = std::allocator<_Key> > class multiset - : public _GLIBCXX_STD::multiset<_Key, _Compare, _Allocator>, + : public _GLIBCXX_STD_D::multiset<_Key, _Compare, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::multiset<_Key, _Compare, _Allocator> _Base; + typedef _GLIBCXX_STD_D::multiset<_Key, _Compare, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/set.h b/libstdc++-v3/include/debug/set.h index 6c2ce9fd1fc..d40d319855b 100644 --- a/libstdc++-v3/include/debug/set.h +++ b/libstdc++-v3/include/debug/set.h @@ -46,10 +46,10 @@ namespace __debug template, typename _Allocator = std::allocator<_Key> > class set - : public _GLIBCXX_STD::set<_Key,_Compare,_Allocator>, + : public _GLIBCXX_STD_D::set<_Key,_Compare,_Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::set<_Key,_Compare,_Allocator> _Base; + typedef _GLIBCXX_STD_D::set<_Key,_Compare,_Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; public: diff --git a/libstdc++-v3/include/debug/vector b/libstdc++-v3/include/debug/vector index 33b8b638a77..1365525fef2 100644 --- a/libstdc++-v3/include/debug/vector +++ b/libstdc++-v3/include/debug/vector @@ -47,10 +47,10 @@ namespace __debug template > class vector - : public _GLIBCXX_STD::vector<_Tp, _Allocator>, + : public _GLIBCXX_STD_D::vector<_Tp, _Allocator>, public __gnu_debug::_Safe_sequence > { - typedef _GLIBCXX_STD::vector<_Tp, _Allocator> _Base; + typedef _GLIBCXX_STD_D::vector<_Tp, _Allocator> _Base; typedef __gnu_debug::_Safe_sequence _Safe_base; typedef typename _Base::const_iterator _Base_const_iterator; diff --git a/libstdc++-v3/include/ext/hash_map b/libstdc++-v3/include/ext/hash_map index b6855ebb3be..183bff556f4 100644 --- a/libstdc++-v3/include/ext/hash_map +++ b/libstdc++-v3/include/ext/hash_map @@ -65,7 +65,7 @@ #include #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(__gnu_cxx, _GLIBCXX_EXT) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(__gnu_cxx, _GLIBCXX_EXT_D) using std::equal_to; using std::allocator; diff --git a/libstdc++-v3/include/ext/hash_set b/libstdc++-v3/include/ext/hash_set index 668fe13bd2a..b796babd68b 100644 --- a/libstdc++-v3/include/ext/hash_set +++ b/libstdc++-v3/include/ext/hash_set @@ -65,7 +65,7 @@ #include #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(__gnu_cxx, _GLIBCXX_EXT) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(__gnu_cxx, _GLIBCXX_EXT_D) using std::equal_to; using std::allocator; diff --git a/libstdc++-v3/include/parallel/algo.h b/libstdc++-v3/include/parallel/algo.h new file mode 100644 index 00000000000..dcda79090b4 --- /dev/null +++ b/libstdc++-v3/include/parallel/algo.h @@ -0,0 +1,1585 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/algo.h + * @brief Parallel STL function calls corresponding to the stl_algo.h header. + * + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGO_H +#define _GLIBCXX_PARALLEL_ALGO_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace std +{ +namespace __parallel +{ + // Sequential fallback + template + inline Function + for_each(InputIterator begin, InputIterator end, Function f, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::for_each(begin, end, f); + } + + // Sequential fallback for input iterator case + template + Function + for_each_switch(InputIterator begin, InputIterator end, Function f, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { + return for_each(begin, end, f, __gnu_parallel::sequential_tag()); + } + + // Parallel algorithm for random access iterators + template + Function + for_each_switch(RandomAccessIterator begin, RandomAccessIterator end, Function f, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::for_each_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel::for_each_selector functionality; + return __gnu_parallel::for_each_template_random_access(begin, end, f, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag); + } + else + return for_each(begin, end, f, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline Function + for_each(Iterator begin, Iterator end, Function f, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + + return for_each_switch(begin, end, f, iterator_category(), parallelism_tag); + } + + + // Sequential fallback + template + inline InputIterator + find(InputIterator begin, InputIterator end, const T& val, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::find(begin, end, val); } + + // Sequential fallback for input iterator case + template + inline InputIterator + find_switch(InputIterator begin, InputIterator end, const T& val, IteratorTag) + { return _GLIBCXX_STD_P::find(begin, end, val); } + + // Parallel find for random access iterators + template + RandomAccessIterator + find_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& val, random_access_iterator_tag) + { + typedef typename iterator_traits::value_type value_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + binder2nd<__gnu_parallel::equal_to > comp(__gnu_parallel::equal_to(), val); + return __gnu_parallel::find_template(begin, end, begin, comp, __gnu_parallel::find_if_selector()).first; + } + else + return _GLIBCXX_STD_P::find(begin, end, val); + } + + // Public interface + template + inline InputIterator + find(InputIterator begin, InputIterator end, const T& val) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return find_switch(begin, end, val, iterator_category()); + } + + // Sequential fallback + template + inline InputIterator + find_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::find_if(begin, end, pred); + } + + // Sequential fallback for input iterator case + template + inline InputIterator + find_if_switch(InputIterator begin, InputIterator end, Predicate pred, IteratorTag) + { + return _GLIBCXX_STD_P::find_if(begin, end, pred); + } + + // Parallel find_if for random access iterators + template + RandomAccessIterator + find_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::find_template(begin, end, begin, pred, __gnu_parallel::find_if_selector()).first; + else + return _GLIBCXX_STD_P::find_if(begin, end, pred); + } + + // Public interface + template + inline InputIterator + find_if (InputIterator begin, InputIterator end, Predicate pred) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return find_if_switch(begin, end, pred, iterator_category()); + } + + // Sequential fallback + template + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, ForwardIterator begin2, ForwardIterator end2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2); + } + + // Sequential fallback + template + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, + BinaryPredicate comp, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::find_first_of(begin1, end1, begin2, end2, comp); + } + + // Sequential fallback for input iterator type + template + inline InputIterator + find_first_of_switch(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, IteratorTag1, IteratorTag2) + { + return find_first_of(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag()); + } + + // Parallel algorithm for random access iterators + template + inline RandomAccessIterator + find_first_of_switch(RandomAccessIterator begin1, RandomAccessIterator end1, + ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp, random_access_iterator_tag, IteratorTag) + { + return __gnu_parallel::find_template(begin1, end1, begin1, comp, __gnu_parallel::find_first_of_selector(begin2, end2)).first; + } + + // Sequential fallback for input iterator type + template + inline + InputIterator + find_first_of_switch(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp, IteratorTag1, IteratorTag2) + { + return find_first_of(begin1, end1, begin2, end2, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline InputIterator + find_first_of(InputIterator begin1, InputIterator end1, + ForwardIterator begin2, ForwardIterator end2, BinaryPredicate comp) + { + typedef std::iterator_traits iteratori_traits; + typedef std::iterator_traits iteratorf_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratorf_traits::iterator_category iteratorf_category; + + return find_first_of_switch(begin1, end1, begin2, end2, comp, iteratori_category(), iteratorf_category()); + } + + // Public interface, insert default comparator + template + InputIterator + find_first_of(InputIterator begin1, InputIterator end1, ForwardIterator begin2, ForwardIterator end2) + { + typedef std::iterator_traits iteratori_traits; + typedef std::iterator_traits iteratorf_traits; + typedef typename iteratori_traits::value_type valuei_type; + typedef typename iteratorf_traits::value_type valuef_type; + + return find_first_of(begin1, end1, begin2, end2, __gnu_parallel::equal_to()); + } + + // Sequential fallback + template + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::unique_copy(begin1, end1, out); + } + + // Sequential fallback + template + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::unique_copy(begin1, end1, out, pred); + } + + // Sequential fallback for input iterator case + template + inline OutputIterator + unique_copy_switch(InputIterator begin, InputIterator last, OutputIterator out, + Predicate pred, IteratorTag1, IteratorTag2) + { + return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred); + } + + // Parallel unique_copy for random access iterators + template + RandomAccessOutputIterator + unique_copy_switch(RandomAccessIterator begin, RandomAccessIterator last, RandomAccessOutputIterator out, + Predicate pred, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(last - begin) > __gnu_parallel::Settings::unique_copy_minimal_n)) + return __gnu_parallel::parallel_unique_copy(begin, last, out, pred); + else + return _GLIBCXX_STD_P::unique_copy(begin, last, out, pred); + } + + // Public interface + template + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out) + { + typedef std::iterator_traits iteratori_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratori_traits::value_type value_type; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return unique_copy_switch(begin1, end1, out, equal_to(), + iteratori_category(), iteratoro_category()); + } + + // Public interface + template + inline OutputIterator + unique_copy(InputIterator begin1, InputIterator end1, OutputIterator out, + Predicate pred) + { + typedef std::iterator_traits iteratori_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return unique_copy_switch(begin1, end1, out, pred, iteratori_category(), iteratoro_category()); + } + + // Sequential fallback + template + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, out); + } + + // Sequential fallback + template + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, out, pred); + } + + // Sequential fallback for input iterator case + template + inline OutputIterator + set_union_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, + IteratorTag2, IteratorTag3) + { + return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, result, pred); + } + + // Parallel set_union for random access iterators + template + OutputRandomAccessIterator + set_union_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_union_minimal_n)) + return __gnu_parallel::parallel_set_union(begin1, end1, begin2, end2, result, pred); + else + return _GLIBCXX_STD_P::set_union(begin1, end1, begin2, end2, result, pred); + } + + // Public interface + template + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_union_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less(), + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Public interface + template + inline OutputIterator + set_union(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator out, Predicate pred) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_union_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Sequential fallback. + template + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, out); + } + + // Sequential fallback. + template + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, out, pred); + } + + // Sequential fallback for input iterator case + template + inline OutputIterator + set_intersection_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, + IteratorTag2, IteratorTag3) + { + return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, result, pred); + } + + // Parallel set_intersection for random access iterators + template + OutputRandomAccessIterator + set_intersection_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_union_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_union_minimal_n)) + return __gnu_parallel::parallel_set_intersection(begin1, end1, begin2, end2, result, pred); + else + return _GLIBCXX_STD_P::set_intersection(begin1, end1, begin2, end2, result, pred); + } + + // Public interface + template + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_intersection_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less(), + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + template + inline OutputIterator + set_intersection(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator out, Predicate pred) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_intersection_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Sequential fallback + template + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_symmetric_difference(begin1,end1, begin2, end2, out); + } + + // Sequential fallback + template + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, + InputIterator2 begin2, InputIterator2 end2, + OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, out, pred); + } + + // Sequential fallback for input iterator case + template + inline OutputIterator + set_symmetric_difference_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, IteratorTag2, IteratorTag3) + { + return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, result, pred); + } + + // Parallel set_symmetric_difference for random access iterators + template + OutputRandomAccessIterator + set_symmetric_difference_switch(RandomAccessIterator1 begin1, + RandomAccessIterator1 end1, RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_symmetric_difference_minimal_n)) + return __gnu_parallel::parallel_set_symmetric_difference(begin1, end1, begin2, end2, result, pred); + else + return _GLIBCXX_STD_P::set_symmetric_difference(begin1, end1, begin2, end2, result, pred); + } + + // Public interface. + template + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less(), + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Public interface. + template + inline OutputIterator + set_symmetric_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator out, Predicate pred) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_symmetric_difference_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Sequential fallback. + template + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_difference(begin1,end1, begin2, end2, out); + } + + // Sequential fallback. + template + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, out, pred); + } + + // Sequential fallback for input iterator case. + template + inline OutputIterator + set_difference_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator result, Predicate pred, IteratorTag1, IteratorTag2, IteratorTag3) + { + return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, result, pred); + } + + // Parallel set_difference for random access iterators + template + OutputRandomAccessIterator + set_difference_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, + RandomAccessIterator2 end2, OutputRandomAccessIterator result, Predicate pred, + random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::set_difference_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::set_difference_minimal_n)) + return __gnu_parallel::parallel_set_difference(begin1, end1, begin2, end2, result, pred); + else + return _GLIBCXX_STD_P::set_difference(begin1, end1, begin2, end2, result, pred); + } + + // Public interface + template + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator out) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + typedef typename iteratori1_traits::value_type value1_type; + typedef typename iteratori2_traits::value_type value2_type; + + return set_difference_switch(begin1, end1, begin2, end2, out, __gnu_parallel::less(), + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Public interface + template + inline OutputIterator + set_difference(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + InputIterator2 end2, OutputIterator out, Predicate pred) + { + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return set_difference_switch(begin1, end1, begin2, end2, out, pred, + iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + // Sequential fallback + template + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::adjacent_find(begin, end); + } + + // Sequential fallback + template + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::adjacent_find(begin, end, binary_pred); + } + + // Parallel algorithm for random access iterators + template + RandomAccessIterator + adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, random_access_iterator_tag) + { + typedef typename iterator_traits::value_type value_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + RandomAccessIterator spot = __gnu_parallel::find_template(begin, end - 1, begin, equal_to(), __gnu_parallel::adjacent_find_selector()).first; + if (spot == (end - 1)) + return end; + else + return spot; + } + else + return adjacent_find(begin, end, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template + inline ForwardIterator + adjacent_find_switch(ForwardIterator begin, ForwardIterator end, IteratorTag) + { + return adjacent_find(begin, end, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end) + { + return adjacent_find_switch(begin, end, typename std::iterator_traits::iterator_category()); + } + + // Sequential fallback for input iterator case + template + inline ForwardIterator + adjacent_find_switch(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred, IteratorTag) + { + return adjacent_find(begin, end, binary_pred, __gnu_parallel::sequential_tag()); + } + + // Parallel algorithm for random access iterators + template + RandomAccessIterator + adjacent_find_switch(RandomAccessIterator begin, RandomAccessIterator end, BinaryPredicate binary_pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::find_template(begin, end, begin, binary_pred, __gnu_parallel::adjacent_find_selector()).first; + else + return adjacent_find(begin, end, binary_pred, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline ForwardIterator + adjacent_find(ForwardIterator begin, ForwardIterator end, BinaryPredicate binary_pred) + { + return adjacent_find_switch(begin, end, binary_pred, typename std::iterator_traits::iterator_category()); + } + + // Sequential fallback + template + inline typename iterator_traits::difference_type + count(InputIterator begin, InputIterator end, const T& value, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::count(begin, end, value); + } + + // Parallel code for random access iterators + template + typename iterator_traits::difference_type + count_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::difference_type difference_type; + + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + difference_type res = 0; + __gnu_parallel::count_selector functionality; + __gnu_parallel::for_each_template_random_access(begin, end, value, functionality, std::plus<__gnu_parallel::sequence_index_t>(), res, res, -1, parallelism_tag); + return res; + } + else + return count(begin, end, value, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template + typename iterator_traits::difference_type + count_switch(InputIterator begin, InputIterator end, const T& value, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { + return count(begin, end, value, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline typename iterator_traits::difference_type + count(InputIterator begin, InputIterator end, const T& value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + return count_switch(begin, end, value, typename std::iterator_traits::iterator_category(), parallelism_tag); + } + + // Sequential fallback. + template + inline typename iterator_traits::difference_type + count_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::count_if(begin, end, pred); + } + + // Parallel count_if for random access iterators + template + typename iterator_traits::difference_type + count_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::difference_type difference_type; + + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::count_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + difference_type res = 0; + __gnu_parallel::count_if_selector functionality; + __gnu_parallel::for_each_template_random_access(begin, end, pred, functionality, std::plus<__gnu_parallel::sequence_index_t>(), res, res, -1, parallelism_tag); + return res; + } + else + return count_if(begin, end, pred, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template + typename iterator_traits::difference_type + count_if_switch(InputIterator begin, InputIterator end, Predicate pred, IteratorTag, __gnu_parallel::parallelism) + { + return count_if(begin, end, pred, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline typename iterator_traits::difference_type + count_if(InputIterator begin, InputIterator end, Predicate pred, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits traits_type; + typedef typename traits_type::iterator_category iterator_category; + return count_if_switch(begin, end, pred, iterator_category(), parallelism_tag); + } + + + // Sequential fallback. + template + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2); + } + + // Parallel algorithm for random access iterator + template + RandomAccessIterator1 + search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator2 end2, random_access_iterator_tag, random_access_iterator_tag) + { + typedef std::iterator_traits iterator1_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator2_traits::value_type value2_type; + + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::search_template(begin1, end1, begin2, end2, __gnu_parallel::equal_to()); + else + return search(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template + inline ForwardIterator1 + search_switch(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, IteratorTag1, IteratorTag2) + { + return search(begin1, end1, begin2, end2, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2) + { + typedef std::iterator_traits iterator1_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator2_traits::iterator_category iterator2_category; + + return search_switch(begin1, end1, begin2, end2, iterator1_category(), iterator2_category()); + } + + // Public interface. + template + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::search(begin1, end1, begin2, end2, pred); + } + + // Parallel algorithm for random access iterator. + template + RandomAccessIterator1 + search_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, RandomAccessIterator2 end2, BinaryPredicate pred, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + return __gnu_parallel::search_template(begin1, end1, begin2, end2, pred); + else + return search(begin1, end1, begin2, end2, pred, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case + template + inline ForwardIterator1 + search_switch(ForwardIterator1 begin1, ForwardIterator1 end1, + ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate pred, IteratorTag1, IteratorTag2) + { + return search(begin1, end1, begin2, end2, pred, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline ForwardIterator1 + search(ForwardIterator1 begin1, ForwardIterator1 end1, ForwardIterator2 begin2, ForwardIterator2 end2, BinaryPredicate pred) + { + typedef std::iterator_traits iterator1_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator2_traits::iterator_category iterator2_category; + return search_switch(begin1, end1, begin2, end2, pred, iterator1_category(), iterator2_category()); + } + + // Sequential fallback + template + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::search_n(begin, end, count, val); } + + // Sequential fallback + template + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::search_n(begin, end, count, val, binary_pred); + } + + // Public interface. + template + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val) + { + typedef typename iterator_traits::value_type value_type; + return search_n(begin, end, count, val, __gnu_parallel::equal_to()); + } + + // Parallel algorithm for random access iterators. + template + RandomAccessIterator + search_n_switch(RandomAccessIterator begin, RandomAccessIterator end, Integer count, const T& val, BinaryPredicate binary_pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + __gnu_parallel::pseudo_sequence ps(val, count); + return __gnu_parallel::search_template(begin, end, ps.begin(), ps.end(), binary_pred); + } + else + return std::__search_n(begin, end, count, val, binary_pred, random_access_iterator_tag()); + } + + // Sequential fallback for input iterator case. + template + inline ForwardIterator + search_n_switch(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred, IteratorTag) + { + return __search_n(begin, end, count, val, binary_pred, IteratorTag()); + } + + // Public interface. + template + inline ForwardIterator + search_n(ForwardIterator begin, ForwardIterator end, Integer count, const T& val, BinaryPredicate binary_pred) + { + return search_n_switch(begin, end, count, val, binary_pred, typename std::iterator_traits::iterator_category()); + } + + // Sequential fallback. + template + inline OutputIterator + transform(InputIterator begin, InputIterator end, OutputIterator result, UnaryOperation unary_op, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::transform(begin, end, result, unary_op); + } + + // Sequential fallback + template + inline OutputIterator + transform(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, OutputIterator result, BinaryOperation binary_op, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::transform(begin1, end1, begin2, result, binary_op); + } + + // Parallel unary transform for random access iterators. + template + RandomAccessIterator3 + transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, RandomAccessIterator3 result, UnaryOperation unary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_pair ip; + ip begin_pair(begin, result), end_pair(end, result + (end - begin)); + __gnu_parallel::transform1_selector functionality; + __gnu_parallel::for_each_template_random_access(begin_pair, end_pair, unary_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag); + return functionality.finish_iterator; + } + else + return transform(begin, end, result, unary_op, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template + inline RandomAccessIterator3 + transform1_switch(RandomAccessIterator1 begin, RandomAccessIterator1 end, RandomAccessIterator3 result, UnaryOperation unary_op, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + return _GLIBCXX_STD_P::transform(begin, end, result, unary_op); + } + + + // Parallel binary transform for random access iterators. + template + RandomAccessIterator3 + transform2_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator3 result, BinaryOperation binary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + if (_GLIBCXX_PARALLEL_CONDITION((end1 - begin1) >= __gnu_parallel::Settings::transform_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_triple ip; + ip begin_triple(begin1, begin2, result), end_triple(end1, begin2 + (end1 - begin1), result + (end1 - begin1)); + __gnu_parallel::transform2_selector functionality; + __gnu_parallel::for_each_template_random_access(begin_triple, end_triple, binary_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag); + return functionality.finish_iterator; + } + else + return transform(begin1, end1, begin2, result, binary_op, __gnu_parallel::sequential_tag()); + } + + // Sequential fallback for input iterator case. + template + inline RandomAccessIterator3 + transform2_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator3 result, BinaryOperation binary_op, tag1, tag2, tag3, __gnu_parallel::parallelism) + { + return _GLIBCXX_STD_P::transform(begin1, end1, begin2, result, binary_op); + } + + // Public interface. + template + inline OutputIterator + transform(InputIterator begin, InputIterator end, OutputIterator result, + UnaryOperation unary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iteratori_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori_traits::iterator_category iteratori_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return transform1_switch(begin, end, result, unary_op, + iteratori_category(), iteratoro_category(), parallelism_tag); + } + + // Public interface. + template + inline OutputIterator + transform(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, OutputIterator result, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iteratori1_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef std::iterator_traits iteratori2_traits; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + + return transform2_switch(begin1, end1, begin2, result, binary_op, + iteratori1_category(), iteratori2_category(), iteratoro_category(), parallelism_tag); + } + + // Sequential fallback + template + inline void + replace(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::replace(begin, end, old_value, new_value); } + + // Sequential fallback for input iterator case + template + void + replace_switch(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { replace(begin, end, old_value, new_value, __gnu_parallel::sequential_tag()); } + + // Parallel replace for random access iterators + template + void + replace_switch(RandomAccessIterator begin, RandomAccessIterator end, const T& old_value, const T& new_value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { replace(begin, end, old_value, new_value, __gnu_parallel::sequential_tag()); } + + // Public interface + template + inline void + replace(ForwardIterator begin, ForwardIterator end, const T& old_value, const T& new_value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + replace_switch(begin, end, old_value, new_value, typename std::iterator_traits::iterator_category(), parallelism_tag); + } + + + // Sequential fallback + template + inline void + replace_if(ForwardIterator begin, ForwardIterator end, Predicate pred, const T& new_value, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::replace_if(begin, end, pred, new_value); } + + // Sequential fallback for input iterator case + template + void + replace_if_switch(ForwardIterator begin, ForwardIterator end, Predicate pred, const T& new_value, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { replace_if(begin, end, pred, new_value, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template + void + replace_if_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, const T& new_value, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::replace_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel::replace_if_selector functionality(new_value); + __gnu_parallel::for_each_template_random_access(begin, end, pred, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag); + } + else + replace_if(begin, end, pred, new_value, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline void + replace_if(ForwardIterator begin, ForwardIterator end, + Predicate pred, const T& new_value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + + replace_if_switch(begin, end, pred, new_value, iterator_category(), parallelism_tag); + } + + // Sequential fallback + template + inline void + generate(ForwardIterator begin, ForwardIterator end, Generator gen, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::generate(begin, end, gen); } + + // Sequential fallback for input iterator case. + template + void + generate_switch(ForwardIterator begin, ForwardIterator end, Generator gen, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { generate(begin, end, gen, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template + void + generate_switch(RandomAccessIterator begin, RandomAccessIterator end, + Generator gen, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::generate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy; + __gnu_parallel::generate_selector functionality; + __gnu_parallel::for_each_template_random_access(begin, end, gen, functionality, __gnu_parallel::dummy_reduct(), true, dummy, -1, parallelism_tag); + } + else + generate(begin, end, gen, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline void + generate(ForwardIterator begin, ForwardIterator end, + Generator gen, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + generate_switch(begin, end, gen, iterator_category(), parallelism_tag); + } + + + // Sequential fallback. + template + inline OutputIterator + generate_n(OutputIterator begin, Size n, Generator gen, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::generate_n(begin, n, gen); } + + // Sequential fallback for input iterator case. + template + OutputIterator + generate_n_switch(OutputIterator begin, Size n, Generator gen, IteratorTag, __gnu_parallel::parallelism) + { return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template + RandomAccessIterator + generate_n_switch(RandomAccessIterator begin, Size n, Generator gen, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { return generate_n(begin, n, gen, __gnu_parallel::sequential_tag()); } + + // Public interface. + template + inline OutputIterator + generate_n(OutputIterator begin, Size n, Generator gen, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef std::iterator_traits iterator_traits; + typedef typename iterator_traits::iterator_category iterator_category; + return generate_n_switch(begin, n, gen, iterator_category(), parallelism_tag); + } + + + // Sequential fallback. + template + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::random_shuffle(begin, end); } + + // Sequential fallback. + template + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rand, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::random_shuffle(begin, end, rand); } + + + /** @brief Functor wrapper for std::rand(). */ + template + struct c_rand_number + { + inline int operator()(int limit) + { return rand() % limit; } + }; + + // Fill in random number generator. + template + inline void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end) + { + c_rand_number<> r; + // Parallelization still possible. + random_shuffle(begin, end, r); + } + + // Parallel algorithm for random access iterators. + template + void + random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rand) + { + if (begin == end) + return; + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::random_shuffle_minimal_n)) + __gnu_parallel::parallel_random_shuffle(begin, end, rand); + else + __gnu_parallel::sequential_random_shuffle(begin, end, rand); + } + + // Sequential fallback. + template + inline ForwardIterator + partition(ForwardIterator begin, ForwardIterator end, Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partition(begin, end, pred); } + + // Sequential fallback for input iterator case. + template + inline ForwardIterator + partition_switch(ForwardIterator begin, ForwardIterator end, Predicate pred, IteratorTag) + { return partition(begin, end, pred, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators. + template + RandomAccessIterator + partition_switch(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partition_minimal_n)) + { + typedef typename std::iterator_traits::difference_type difference_type; + difference_type middle = __gnu_parallel::parallel_partition(begin, end, pred, __gnu_parallel::get_max_threads()); + return begin + middle; + } + else + return partition(begin, end, pred, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline ForwardIterator + partition(ForwardIterator begin, ForwardIterator end, Predicate pred) + { + return partition_switch(begin, end, pred, typename std::iterator_traits::iterator_category()); + } + + // Sequential fallback + template + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::sort(begin, end); } + + // Sequential fallback + template + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::sort(begin, end, comp); } + + // Public interface, insert default comparator + template + inline void + sort(RandomAccessIterator begin, RandomAccessIterator end) + { + typedef iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + sort(begin, end, std::less()); + } + + template + void + sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp) + { + typedef iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + + if (begin != end) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::sort_minimal_n)) + __gnu_parallel::parallel_sort(begin, end, comp, false); + else + sort(begin, end, comp, __gnu_parallel::sequential_tag()); + } + } + + // Sequential fallback. + template + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::stable_sort(begin, end); + } + + // Sequential fallback. + template + inline void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::stable_sort(begin, end, comp); + } + + template + void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end) + { + typedef iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + + stable_sort(begin, end, std::less()); + } + + // Parallel algorithm for random access iterators + template + void + stable_sort(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp) + { + if (begin != end) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::sort_minimal_n)) + __gnu_parallel::parallel_sort(begin, end, comp, true); + else + stable_sort(begin, end, comp, __gnu_parallel::sequential_tag()); + } + } + + // Sequential fallback + template + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, + __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result); + } + + // Sequential fallback + template + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp, + __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result, comp); + } + + // Sequential fallback for input iterator case + template + inline OutputIterator + merge_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp, IteratorTag1, IteratorTag2, IteratorTag3) + { + return _GLIBCXX_STD_P::merge(begin1, end1, begin2, end2, result, comp); + } + + // Parallel algorithm for random access iterators + template + OutputIterator + merge_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION((static_cast<__gnu_parallel::sequence_index_t>(end1 - begin1) >= __gnu_parallel::Settings::merge_minimal_n || static_cast<__gnu_parallel::sequence_index_t>(end2 - begin2) >= __gnu_parallel::Settings::merge_minimal_n))) + return __gnu_parallel::parallel_merge_advance(begin1, end1, begin2, end2, result, (end1 - begin1) + (end2 - begin2), comp); + else + return __gnu_parallel::merge_advance(begin1, end1, begin2, end2, result, (end1 - begin1) + (end2 - begin2), comp); + } + + // Public interface + template + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result, Comparator comp) + { + typedef typename iterator_traits::value_type value_type; + + typedef std::iterator_traits iteratori1_traits; + typedef std::iterator_traits iteratori2_traits; + typedef std::iterator_traits iteratoro_traits; + typedef typename iteratori1_traits::iterator_category iteratori1_category; + typedef typename iteratori2_traits::iterator_category iteratori2_category; + typedef typename iteratoro_traits::iterator_category iteratoro_category; + + return merge_switch(begin1, end1, begin2, end2, result, comp, iteratori1_category(), iteratori2_category(), iteratoro_category()); + } + + + // Public interface, insert default comparator + template + inline OutputIterator + merge(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, OutputIterator result) + { + typedef std::iterator_traits iterator1_traits; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef typename iterator2_traits::value_type value2_type; + + return merge(begin1, end1, begin2, end2, result, __gnu_parallel::less()); + } + + // Sequential fallback + template + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::nth_element(begin, nth, end); } + + // Sequential fallback + template + void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::nth_element(begin, nth, end, comp); } + + // Public interface + template + inline void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::nth_element_minimal_n)) + __gnu_parallel::parallel_nth_element(begin, nth, end, comp); + else + nth_element(begin, nth, end, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template + void + nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end) + { + typedef typename iterator_traits::value_type value_type; + nth_element(begin, nth, end, std::less()); + } + + // Sequential fallback + template + void + partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, _Compare comp, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::partial_sort(begin, middle, end, comp); } + + // Sequential fallback + template + void + partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, __gnu_parallel::sequential_tag) + { _GLIBCXX_STD_P::partial_sort(begin, middle, end); } + + // Public interface, parallel algorithm for random access iterators + template + void + partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end, _Compare comp) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partial_sort_minimal_n)) + __gnu_parallel::parallel_partial_sort(begin, middle, end, comp); + else + partial_sort(begin, middle, end, comp, __gnu_parallel::sequential_tag()); + } + + // Public interface, insert default comparator + template + void + partial_sort(_RandomAccessIterator begin, _RandomAccessIterator middle, _RandomAccessIterator end) + { + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; + partial_sort(begin, middle, end, std::less()); + } + + // Sequential fallback + template + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::max_element(begin, end); } + + // Sequential fallback + template + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::max_element(begin, end, comp); } + + // Sequential fallback for input iterator case + template + ForwardIterator + max_element_switch(ForwardIterator begin, ForwardIterator end, Comparator comp, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { return max_element(begin, end, comp, __gnu_parallel::sequential_tag()); } + + // Public interface, insert default comparator + template + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef typename iterator_traits::value_type value_type; + return max_element(begin, end, std::less(), parallelism_tag); + } + + template + RandomAccessIterator + max_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::max_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + RandomAccessIterator res(begin); + __gnu_parallel::identity_selector functionality; + __gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), functionality, __gnu_parallel::max_element_reduct(comp), res, res, -1, parallelism_tag); + return res; + } + else + return max_element(begin, end, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline ForwardIterator + max_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + return max_element_switch(begin, end, comp, typename std::iterator_traits::iterator_category(), parallelism_tag); + } + + // Sequential fallback + template + inline + ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::min_element(begin, end); } + + // Sequential fallback + template + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::min_element(begin, end, comp); } + + // Public interface + template + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef typename iterator_traits::value_type value_type; + return min_element(begin, end, std::less(), parallelism_tag); + } + + // Sequential fallback for input iterator case + template + ForwardIterator + min_element_switch(ForwardIterator begin, ForwardIterator end, Comparator comp, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { return min_element(begin, end, comp, __gnu_parallel::sequential_tag()); } + + // Parallel algorithm for random access iterators + template + RandomAccessIterator + min_element_switch(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::min_element_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + RandomAccessIterator res(begin); + __gnu_parallel::identity_selector functionality; + __gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), functionality, __gnu_parallel::min_element_reduct(comp), res, res, -1, parallelism_tag); + return res; + } + else + return min_element(begin, end, __gnu_parallel::sequential_tag()); + } + + // Public interface + template + inline ForwardIterator + min_element(ForwardIterator begin, ForwardIterator end, Comparator comp, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef iterator_traits traits_type; + typedef typename traits_type::iterator_category iterator_category; + return min_element_switch(begin, end, comp, iterator_category(), parallelism_tag); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_ALGORITHM_H */ + diff --git a/libstdc++-v3/include/parallel/algobase.h b/libstdc++-v3/include/parallel/algobase.h new file mode 100644 index 00000000000..0bd8b39afcc --- /dev/null +++ b/libstdc++-v3/include/parallel/algobase.h @@ -0,0 +1,256 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/algobase.h + * @brief Parallel STL function calls corresponding to the + * stl_algobase.h header. The functions defined here mainly do case + * switches and call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one + * function call, are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_ALGOBASE_H +#define _GLIBCXX_PARALLEL_ALGOBASE_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace std +{ +namespace __parallel +{ + // Sequential fallback + template + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::equal(begin1, end1, begin2); + } + + // Sequential fallback + template + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::equal(begin1, end1, begin2, pred); } + + // Public interface + template + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2) + { return mismatch(begin1, end1, begin2).first == end1; } + + // Public interface + template + inline bool + equal(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred) + { return mismatch(begin1, end1, begin2, pred).first == end1; } + + // NB: lexicographical_compare equires mismatch. + + // Sequential fallback + template + inline pair + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2); + } + + // Sequential fallback + template + inline pair + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); } + + // Sequential fallback for input iterator case + template + inline pair + mismatch_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, Predicate pred, IteratorTag1, IteratorTag2) + { return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); } + + // Parallel mismatch for random access iterators + template + pair + mismatch_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Predicate pred, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + RandomAccessIterator1 res_first = + __gnu_parallel::find_template(begin1, end1, begin2, pred, __gnu_parallel::mismatch_selector()).first; + return make_pair(res_first, begin2 + (res_first - begin1)); + } + else + return _GLIBCXX_STD_P::mismatch(begin1, end1, begin2, pred); + } + + // Public interface + template + inline pair + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2) + { + typedef std::iterator_traits iterator1_traits; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator1_traits::value_type value1_type; + typedef typename iterator2_traits::value_type value2_type; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef typename iterator2_traits::iterator_category iterator2_category; + + return mismatch_switch(begin1, end1, begin2, __gnu_parallel::equal_to(), iterator1_category(), iterator2_category()); + } + + // Public interface + template + inline pair + mismatch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, + Predicate pred) + { + typedef std::iterator_traits iterator1_traits; + typedef std::iterator_traits iterator2_traits; + typedef typename iterator1_traits::iterator_category iterator1_category; + typedef typename iterator2_traits::iterator_category iterator2_category; + + return mismatch_switch(begin1, end1, begin2, pred, iterator1_category(), iterator2_category()); + } + + // Sequential fallback + template + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2); + } + + // Sequential fallback + template + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred); + } + + // Sequential fallback for input iterator case + template + inline bool + lexicographical_compare_switch(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred, IteratorTag1, IteratorTag2) + { + return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred); + } + + // Parallel lexicographical_compare for random access iterators + // Limitation: Both valuetypes must be the same + template + bool + lexicographical_compare_switch(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, RandomAccessIterator2 end2, Predicate pred, random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(true)) + { + typedef iterator_traits traits1_type; + typedef typename traits1_type::value_type value1_type; + + typedef iterator_traits traits2_type; + typedef typename traits2_type::value_type value2_type; + + typedef __gnu_parallel::equal_from_less equal_type; + + // Longer sequence in first place. + if ((end1 - begin1) < (end2 - begin2)) + { + typedef pair pair_type; + pair_type mm = mismatch_switch(begin1, end1, begin2, equal_type(pred), random_access_iterator_tag(), random_access_iterator_tag()); + + // Less because shorter. + const bool lbs = mm.first == end1; + + // Less because differing elements less. + const bool lbdel = pred(*mm.first, *mm.second); + + return lbs || lbdel; + } + else + { + typedef pair pair_type; + pair_type mm = mismatch_switch(begin2, end2, begin1, equal_type(pred), random_access_iterator_tag(), random_access_iterator_tag()); + + // Less because shorter. + const bool lbs = mm.first != end2; + + // Less because differing element less. + const bool lbdel = pred(*mm.second, *mm.first); + + return lbs && lbdel; + } + } + else + return _GLIBCXX_STD_P::lexicographical_compare(begin1, end1, begin2, end2, pred); + } + + // Public interface + template + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2) + { + typedef iterator_traits traits1_type; + typedef typename traits1_type::value_type value1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits traits2_type; + typedef typename traits2_type::value_type value2_type; + typedef typename traits2_type::iterator_category iterator2_category; + typedef __gnu_parallel::less less_type; + + return lexicographical_compare_switch(begin1, end1, begin2, end2, less_type(), iterator1_category(), iterator2_category()); + } + + // Public interface + template + inline bool + lexicographical_compare(InputIterator1 begin1, InputIterator1 end1, InputIterator2 begin2, InputIterator2 end2, Predicate pred) + { + typedef iterator_traits traits1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits traits2_type; + typedef typename traits2_type::iterator_category iterator2_category; + + return lexicographical_compare_switch(begin1, end1, begin2, end2, pred, iterator1_category(), iterator2_category()); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_ALGOBASE_H */ diff --git a/libstdc++-v3/include/parallel/algorithm b/libstdc++-v3/include/parallel/algorithm new file mode 100644 index 00000000000..0672e372eb9 --- /dev/null +++ b/libstdc++-v3/include/parallel/algorithm @@ -0,0 +1,45 @@ +// Algorithm extensions -*- C++ -*- + +// Copyright (C) 2007 +// Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +/** @file parallel/algorithm + * This file is a GNU extension to the Standard C++ Library. + */ + +#ifndef _PARALLEL_ALGORITHM +#define _PARALLEL_ALGORITHM 1 + +#pragma GCC system_header + +#include +#include +#include +#include + +#endif diff --git a/libstdc++-v3/include/parallel/algorithmfwd.h b/libstdc++-v3/include/parallel/algorithmfwd.h new file mode 100644 index 00000000000..319091904cd --- /dev/null +++ b/libstdc++-v3/include/parallel/algorithmfwd.h @@ -0,0 +1,719 @@ +// parallel extensions -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/algorithmfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_ALGORITHMFWD_H +#define _GLIBCXX_PARALLEL_ALGORITHMFWD_H 1 + +#pragma GCC system_header + +#include +#include + +namespace std +{ +namespace __parallel +{ + template + inline _FIter + adjacent_find(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template + inline _FIter + adjacent_find(_FIter, _FIter, BinaryPredicate, __gnu_parallel::sequential_tag); + + template + inline _FIter + adjacent_find(_FIter, _FIter); + + template + inline _FIter + adjacent_find(_FIter, _FIter, BinaryPredicate); + + template + _RAIter + adjacent_find_switch(_RAIter, _RAIter, random_access_iterator_tag); + + template + inline _FIter + adjacent_find_switch(_FIter, _FIter, IteratorTag); + + template + inline _FIter + adjacent_find_switch(_FIter, _FIter, BinaryPredicate, IteratorTag); + + template + _RAIter + adjacent_find_switch(_RAIter, _RAIter, BinaryPredicate, random_access_iterator_tag); + + + template + inline typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const T& value, __gnu_parallel::sequential_tag); + + template + inline typename iterator_traits<_IIter>::difference_type + count(_IIter, _IIter, const T& value, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + typename iterator_traits<_RAIter>::difference_type + count_switch(_RAIter, _RAIter, const T& value, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + typename iterator_traits<_IIter>::difference_type + count_switch(_IIter, _IIter, const T& value, IteratorTag, __gnu_parallel::parallelism); + + + template + inline typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline typename iterator_traits<_IIter>::difference_type + count_if(_IIter, _IIter, Predicate, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + typename iterator_traits<_RAIter>::difference_type + count_if_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + typename iterator_traits<_IIter>::difference_type + count_if_switch(_IIter, _IIter, Predicate, IteratorTag, __gnu_parallel::parallelism); + + // algobase.h + template + inline bool + equal(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template + inline bool + equal(_IIter1, _IIter1, _IIter2, Predicate, __gnu_parallel::sequential_tag); + + template + inline bool + equal(_IIter1, _IIter1, _IIter2); + + template + inline bool + equal(_IIter1, _IIter1, _IIter2, Predicate); + + template + inline _IIter + find(_IIter, _IIter, const T&, __gnu_parallel::sequential_tag); + + template + inline _IIter + find(_IIter, _IIter, const T& val); + + template + inline _IIter + find_switch(_IIter, _IIter, const T&, IteratorTag); + + template + _RAIter + find_switch(_RAIter, _RAIter, const T&, random_access_iterator_tag); + + template + inline _IIter + find_if(_IIter, _IIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _IIter + find_if (_IIter, _IIter, Predicate); + + template + inline _IIter + find_if_switch(_IIter, _IIter, Predicate, IteratorTag); + + template + _RAIter + find_if_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag); + + template + inline _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, __gnu_parallel::sequential_tag); + + template + inline _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, BinaryPredicate, __gnu_parallel::sequential_tag); + + template + inline _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter, BinaryPredicate); + + template + _IIter + find_first_of(_IIter, _IIter, _FIter, _FIter); + + template + inline _IIter + find_first_of_switch(_IIter, _IIter, _FIter, _FIter, IteratorTag1, IteratorTag2); + + template + inline _RAIter + find_first_of_switch(_RAIter, _RAIter, _FIter, _FIter, BinaryPredicate, random_access_iterator_tag, IteratorTag); + + template + inline _IIter + find_first_of_switch(_IIter, _IIter, _FIter, _FIter, BinaryPredicate, IteratorTag1, IteratorTag2); + + + template + inline Function + for_each(_IIter, _IIter, Function f, __gnu_parallel::sequential_tag); + + template + inline Function + for_each(Iterator, Iterator, Function f, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + Function + for_each_switch(_IIter, _IIter, Function f, IteratorTag, __gnu_parallel::parallelism); + + template + Function + for_each_switch(_RAIter, _RAIter, Function f, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline void + generate(_FIter, _FIter, Generator, __gnu_parallel::sequential_tag); + + template + inline void + generate(_FIter, _FIter, Generator, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + void + generate_switch(_FIter, _FIter, Generator, IteratorTag, __gnu_parallel::parallelism); + + template + void + generate_switch(_RAIter, _RAIter, Generator, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline _OIter + generate_n(_OIter, Size, Generator, __gnu_parallel::sequential_tag); + + template + inline _OIter + generate_n(_OIter, Size, Generator, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + _OIter + generate_n_switch(_OIter, Size, Generator, IteratorTag, __gnu_parallel::parallelism); + + template + _RAIter + generate_n_switch(_RAIter, Size, Generator, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, __gnu_parallel::sequential_tag); + + template + inline bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, Predicate, __gnu_parallel::sequential_tag); + + template + inline bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2); + + template + inline bool + lexicographical_compare(_IIter1, _IIter1, _IIter2, _IIter2, Predicate); + + template + inline bool + lexicographical_compare_switch(_IIter1, _IIter1, _IIter2, _IIter2, Predicate, IteratorTag1, IteratorTag2); + + template + bool + lexicographical_compare_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Predicate, random_access_iterator_tag, random_access_iterator_tag); + + // algo.h + template + inline pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, __gnu_parallel::sequential_tag); + + template + inline pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, Predicate, __gnu_parallel::sequential_tag); + + template + inline pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2); + + template + inline pair<_IIter1, _IIter2> + mismatch(_IIter1, _IIter1, _IIter2, Predicate); + + template + inline pair<_IIter1, _IIter2> + mismatch_switch(_IIter1, _IIter1, _IIter2, Predicate, IteratorTag1, IteratorTag2); + + template + pair<_RAIter1, _RAIter2> + mismatch_switch(_RAIter1, _RAIter1, _RAIter2, Predicate, random_access_iterator_tag, random_access_iterator_tag); + + template + inline _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, __gnu_parallel::sequential_tag); + + template + inline _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2); + + template + inline _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate, __gnu_parallel::sequential_tag); + + template + inline _FIter1 + search(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate); + + template + _RAIter1 + search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, random_access_iterator_tag, random_access_iterator_tag); + + template + inline _FIter1 + search_switch(_FIter1, _FIter1, _FIter2, _FIter2, IteratorTag1, IteratorTag2); + + template + _RAIter1 + search_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, BinaryPredicate , random_access_iterator_tag, random_access_iterator_tag); + + template + inline _FIter1 + search_switch(_FIter1, _FIter1, _FIter2, _FIter2, BinaryPredicate, IteratorTag1, IteratorTag2); + + template + inline _FIter + search_n(_FIter, _FIter, Integer, const T&, __gnu_parallel::sequential_tag); + + template + inline _FIter + search_n(_FIter, _FIter, Integer, const T&, BinaryPredicate, __gnu_parallel::sequential_tag); + + template + inline _FIter + search_n(_FIter, _FIter, Integer, const T& val); + + template + inline _FIter + search_n(_FIter, _FIter, Integer, const T&, BinaryPredicate); + + template + _RAIter + search_n_switch(_RAIter, _RAIter, Integer, const T&, BinaryPredicate, random_access_iterator_tag); + + template + inline _FIter + search_n_switch(_FIter, _FIter, Integer, const T&, BinaryPredicate, IteratorTag); + + + template + inline _OIter + transform(_IIter, _IIter, _OIter, UnaryOperation, __gnu_parallel::sequential_tag); + + template + inline _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, BinaryOperation binary_op, __gnu_parallel::sequential_tag); + + template + inline _OIter + transform(_IIter, _IIter, _OIter, UnaryOperation, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _OIter + transform(_IIter1, _IIter1, _IIter2, _OIter, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + _RAIter3 + transform1_switch(_RAIter1, _RAIter1, _RAIter3, UnaryOperation, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _RAIter3 + transform1_switch(_RAIter1, _RAIter1, _RAIter3, UnaryOperation, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + _RAIter3 + transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, BinaryOperation binary_op, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _RAIter3 + transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, BinaryOperation binary_op, tag1, tag2, tag3, __gnu_parallel::parallelism); + + template + inline void + replace(_FIter, _FIter, const T&, const T&, __gnu_parallel::sequential_tag); + + template + inline void + replace(_FIter, _FIter, const T&, const T&, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + void + replace_switch(_FIter, _FIter, const T&, const T&, IteratorTag, __gnu_parallel::parallelism); + + template + void + replace_switch(_RAIter, _RAIter, const T&, const T&, random_access_iterator_tag, __gnu_parallel::parallelism); + + + template + inline void + replace_if(_FIter, _FIter, Predicate, const T&, __gnu_parallel::sequential_tag); + + template + inline void + replace_if(_FIter, _FIter, Predicate, const T&, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + void + replace_if_switch(_FIter, _FIter, Predicate, const T&, IteratorTag, __gnu_parallel::parallelism); + + template + void + replace_if_switch(_RAIter, _RAIter, Predicate, const T&, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline _FIter + max_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template + inline _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template + inline _FIter + max_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _FIter + max_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + _FIter + max_element_switch(_FIter, _FIter, _Compare, IteratorTag, __gnu_parallel::parallelism); + + template + _RAIter + max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, __gnu_parallel::sequential_tag); + + template + inline _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare); + + template + inline _OIter + merge(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + inline _OIter + merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, IteratorTag1, IteratorTag2, IteratorTag3); + + template + _OIter + merge_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, _Compare, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag); + + template + inline _FIter + min_element(_FIter, _FIter, __gnu_parallel::sequential_tag); + + template + inline _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::sequential_tag); + + template + inline _FIter + min_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _FIter + min_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + _FIter + min_element_switch(_FIter, _FIter, _Compare, IteratorTag, __gnu_parallel::parallelism); + + template + _RAIter + min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, __gnu_parallel::parallelism); + + template + inline void + nth_element(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template + void + nth_element(_RAIter, _RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template + inline void + nth_element(_RAIter, _RAIter, _RAIter, _Compare); + + template + void + nth_element(_RAIter, _RAIter, _RAIter); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter, _Compare); + + template + void + partial_sort(_RAIter, _RAIter, _RAIter); + + template + inline _FIter + partition(_FIter, _FIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _FIter + partition(_FIter, _FIter, Predicate); + + template + inline _FIter + partition_switch(_FIter, _FIter, Predicate, IteratorTag); + + template + _RAIter + partition_switch(_RAIter, _RAIter, Predicate, random_access_iterator_tag); + + template + inline void + random_shuffle(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template + inline void + random_shuffle(_RAIter, _RAIter, RandomNumberGenerator& rand, __gnu_parallel::sequential_tag); + + template + inline void + random_shuffle(_RAIter, _RAIter); + + template + void + random_shuffle(_RAIter, _RAIter, RandomNumberGenerator& rand); + + template + inline _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + inline _OIter + set_union(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate); + + template + inline _OIter + set_union_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3); + + template + Output_RAIter + set_union_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag); + + template + inline _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + inline _OIter + set_intersection(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate); + + template + inline _OIter + set_intersection_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3); + + template + Output_RAIter + set_intersection_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag); + + template + inline _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + inline _OIter + set_symmetric_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate); + + template + inline _OIter + set_symmetric_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3); + + template + Output_RAIter + set_symmetric_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag); + + + template + inline _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter); + + template + inline _OIter + set_difference(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate); + + template + inline _OIter + set_difference_switch(_IIter1, _IIter1, _IIter2, _IIter2, _OIter, Predicate, IteratorTag1, IteratorTag2, IteratorTag3); + + template + Output_RAIter + set_difference_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter2, Output_RAIter, Predicate, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag); + + + template + inline void + sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template + inline void + sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template + inline void + sort(_RAIter, _RAIter); + + template + void + sort(_RAIter, _RAIter, _Compare); + + template + inline void + stable_sort(_RAIter, _RAIter, __gnu_parallel::sequential_tag); + + template + inline void + stable_sort(_RAIter, _RAIter, _Compare, __gnu_parallel::sequential_tag); + + template + void + stable_sort(_RAIter, _RAIter); + + template + void + stable_sort(_RAIter, _RAIter, _Compare); + + template + inline _OIter + unique_copy(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + unique_copy(_IIter, _IIter, _OIter, Predicate, __gnu_parallel::sequential_tag); + + template + inline _OIter + unique_copy(_IIter, _IIter, _OIter); + + template + inline _OIter + unique_copy(_IIter, _IIter, _OIter, Predicate); + + template + inline _OIter + unique_copy_switch(_IIter, _IIter, _OIter, Predicate, IteratorTag1, IteratorTag2); + + template + RandomAccess_OIter + unique_copy_switch(_RAIter, _RAIter, RandomAccess_OIter, Predicate, random_access_iterator_tag, random_access_iterator_tag); +} // end namespace __parallel +} // end namespace std + +// NB: cannot use _GLIBCXX_STD_P directly here, as it is both scoped +// (std::__norm) and unscoped (std::). +namespace __gnu_sequential +{ +#ifdef _GLIBCXX_PARALLEL + using std::__norm::partition; + using std::__norm::sort; + using std::__norm::stable_sort; + using std::__norm::random_shuffle; +#else + using std::partition; + using std::sort; + using std::stable_sort; + using std::random_shuffle; +#endif +} + +#endif diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h new file mode 100644 index 00000000000..94b0e8cd6c6 --- /dev/null +++ b/libstdc++-v3/include/parallel/balanced_quicksort.h @@ -0,0 +1,451 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/balanced_quicksort.h + * @brief Implementation of a dynamically load-balanced parallel quicksort. + * + * It works in-place and needs only logarithmic extra memory. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BAL_QUICKSORT_H +#define _GLIBCXX_PARALLEL_BAL_QUICKSORT_H 1 + +#include +#include + +#include +#include +#include +#include +#include + +#if _GLIBCXX_ASSERTIONS +#include +#endif + +namespace __gnu_parallel +{ + /** @brief Information local to one thread in the parallel quicksort run. */ + template + struct QSBThreadLocal + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Continuous part of the sequence, described by an + iterator pair. */ + typedef std::pair Piece; + + /** @brief Initial piece to work on. */ + Piece initial; + + /** @brief Work-stealing queue. */ + RestrictedBoundedConcurrentQueue leftover_parts; + + /** @brief Number of threads involved in this algorithm. */ + thread_index_t num_threads; + + /** @brief Pointer to a counter of elements left over to sort. */ + volatile difference_type* elements_leftover; + + /** @brief The complete sequence to sort. */ + Piece global; + + /** @brief Constructor. + * @param queue_size Size of the work-stealing queue. */ + QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { } + }; + + /** @brief Initialize the thread local storage. + * @param tls Array of thread-local storages. + * @param queue_size Size of the work-stealing queue. */ + template + inline void + qsb_initialize(QSBThreadLocal** tls, int queue_size) + { + int iam = omp_get_thread_num(); + tls[iam] = new QSBThreadLocal(queue_size); + } + + + /** @brief Balanced quicksort divide step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + * @pre @c (end-begin)>=1 */ + template + inline typename std::iterator_traits::difference_type + qsb_divide(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, int num_threads) + { + _GLIBCXX_PARALLEL_ASSERT(num_threads > 0); + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + RandomAccessIterator pivot_pos = median_of_three_iterators(begin, begin + (end - begin) / 2, end - 1, comp); + +#if defined(_GLIBCXX_ASSERTIONS) + // Must be in between somewhere. + difference_type n = end - begin; + + _GLIBCXX_PARALLEL_ASSERT((!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos)) + || (!comp(*pivot_pos, *begin) && !comp(*end, *pivot_pos)) + || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos)) + || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*end, *pivot_pos)) + || (!comp(*pivot_pos, *end) && !comp(*begin, *pivot_pos)) + || (!comp(*pivot_pos, *end) && !comp(*(begin + n / 2), *pivot_pos))); +#endif + + // Swap pivot value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + __gnu_parallel::binder2nd pred(comp, *pivot_pos); + + // Divide, returning end - begin - 1 in the worst case. + difference_type split_pos = parallel_partition(begin, end - 1, pred, num_threads); + + // Swap back pivot to middle. + std::swap(*(begin + split_pos), *pivot_pos); + pivot_pos = begin + split_pos; + +#if _GLIBCXX_ASSERTIONS + RandomAccessIterator r; + for (r = begin; r != pivot_pos; r++) + _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos)); + for (; r != end; r++) + _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos)); +#endif + + return split_pos; + } + + /** @brief Quicksort conquer step. + * @param tls Array of thread-local storages. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param iam Number of the thread processing this function. + * @param num_threads Number of threads that are allowed to work on this part. */ + template + inline void + qsb_conquer(QSBThreadLocal** tls, + RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, thread_index_t iam, thread_index_t num_threads) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + if (num_threads <= 1 || n < 2) + { + tls[iam]->initial.first = begin; + tls[iam]->initial.second = end; + + qsb_local_sort_with_helping(tls, comp, iam); + + return; + } + + // Divide step. + difference_type split_pos = qsb_divide(begin, end, comp, num_threads); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin)); +#endif + + thread_index_t num_threads_leftside = std::max(1, std::min(num_threads - 1, split_pos * num_threads / n)); + +#pragma omp atomic + *tls[iam]->elements_leftover -= (difference_type)1; + + // Conquer step. +#pragma omp parallel sections num_threads(2) + { +#pragma omp section + qsb_conquer(tls, begin, begin + split_pos, comp, iam, num_threads_leftside); + // The pivot_pos is left in place, to ensure termination. +#pragma omp section + qsb_conquer(tls, begin + split_pos + 1, end, comp, + iam + num_threads_leftside, num_threads - num_threads_leftside); + } + } + + /** + * @brief Quicksort step doing load-balanced local sort. + * @param tls Array of thread-local storages. + * @param comp Comparator. + * @param iam Number of the thread processing this function. + */ + template + inline void + qsb_local_sort_with_helping(QSBThreadLocal** tls, + Comparator& comp, int iam) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef std::pair Piece; + + QSBThreadLocal& tl = *tls[iam]; + + difference_type base_case_n = Settings::sort_qsb_base_case_maximal_n; + if (base_case_n < 2) + base_case_n = 2; + thread_index_t num_threads = tl.num_threads; + + // Every thread has its own random number generator. + random_number rng(iam + 1); + + Piece current = tl.initial; + + difference_type elements_done = 0; +#if _GLIBCXX_ASSERTIONS + difference_type total_elements_done = 0; +#endif + + for (;;) + { + // Invariant: current must be a valid (maybe empty) range. + RandomAccessIterator begin = current.first, end = current.second; + difference_type n = end - begin; + + if (n > base_case_n) + { + // Divide. + RandomAccessIterator pivot_pos = begin + rng(n); + + // Swap pivot_pos value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + __gnu_parallel::binder2nd pred(comp, *pivot_pos); + + // Divide, leave pivot unchanged in last place. + RandomAccessIterator split_pos1, split_pos2; + split_pos1 = __gnu_sequential::partition(begin, end - 1, pred); + + // Left side: < pivot_pos; right side: >= pivot_pos. +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end); +#endif + // Swap pivot back to middle. + if (split_pos1 != pivot_pos) + std::swap(*split_pos1, *pivot_pos); + pivot_pos = split_pos1; + + // In case all elements are equal, split_pos1 == 0. + if ((split_pos1 + 1 - begin) < (n >> 7) + || (end - split_pos1) < (n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not stricly larger than the pivot. + __gnu_parallel::unary_negate<__gnu_parallel::binder1st, value_type> pred(__gnu_parallel::binder1st(comp, *pivot_pos)); + + // Find other end of pivot-equal range. + split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred); + } + else + { + // Only skip the pivot. + split_pos2 = split_pos1 + 1; + } + + // Elements equal to pivot are done. + elements_done += (split_pos2 - split_pos1); +#if _GLIBCXX_ASSERTIONS + total_elements_done += (split_pos2 - split_pos1); +#endif + // Always push larger part onto stack. + if (((split_pos1 + 1) - begin) < (end - (split_pos2))) + { + // Right side larger. + if ((split_pos2) != end) + tl.leftover_parts.push_front(std::make_pair(split_pos2, end)); + + //current.first = begin; //already set anyway + current.second = split_pos1; + continue; + } + else + { + // Left side larger. + if (begin != split_pos1) + tl.leftover_parts.push_front(std::make_pair(begin, split_pos1)); + + current.first = split_pos2; + //current.second = end; //already set anyway + continue; + } + } + else + { + __gnu_sequential::sort(begin, end, comp); + elements_done += n; +#if _GLIBCXX_ASSERTIONS + total_elements_done += n; +#endif + + // Prefer own stack, small pieces. + if (tl.leftover_parts.pop_front(current)) + continue; + +#pragma omp atomic + *tl.elements_leftover -= elements_done; + elements_done = 0; + +#if _GLIBCXX_ASSERTIONS + double search_start = omp_get_wtime(); +#endif + + // Look for new work. + bool success = false; + while (*tl.elements_leftover > 0 && !success +#if _GLIBCXX_ASSERTIONS + // Possible dead-lock. + && (omp_get_wtime() < (search_start + 1.0)) +#endif + ) + { + thread_index_t victim; + victim = rng(num_threads); + + // Large pieces. + success = (victim != iam) && tls[victim]->leftover_parts.pop_back(current); + if (!success) + yield(); +#if !defined(__ICC) && !defined(__ECC) +#pragma omp flush +#endif + } + +#if _GLIBCXX_ASSERTIONS + if (omp_get_wtime() >= (search_start + 1.0)) + { + sleep(1); + _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() < (search_start + 1.0)); + } +#endif + if (!success) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0); +#endif + return; + } + } + } + } + + /** @brief Top-level quicksort routine. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @param n Length of the sequence to sort. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template + inline void + parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + typename std::iterator_traits::difference_type n, int num_threads) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef std::pair Piece; + + typedef QSBThreadLocal tls_type; + + if (n <= 1) + return; + + // At least one element per processor. + if (num_threads > n) + num_threads = static_cast(n); + + tls_type** tls = new tls_type*[num_threads]; + +#pragma omp parallel num_threads(num_threads) + // Initialize variables per processor. + qsb_initialize(tls, num_threads * (thread_index_t)(log2(n) + 1)); + + // There can never be more than ceil(log2(n)) ranges on the stack, because + // 1. Only one processor pushes onto the stack + // 2. The largest range has at most length n + // 3. Each range is larger than half of the range remaining + volatile difference_type elements_leftover = n; + for (int i = 0; i < num_threads; i++) + { + tls[i]->elements_leftover = &elements_leftover; + tls[i]->num_threads = num_threads; + tls[i]->global = std::make_pair(begin, end); + + // Just in case nothing is left to assign. + tls[i]->initial = std::make_pair(end, end); + } + + // Initial splitting, recursively. + int old_nested = omp_get_nested(); + omp_set_nested(true); + + // Main recursion call. + qsb_conquer(tls, begin, begin + n, comp, 0, num_threads); + + omp_set_nested(old_nested); + +#if _GLIBCXX_ASSERTIONS + // All stack must be empty. + Piece dummy; + for (int i = 1; i < num_threads; i++) + _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy)); +#endif + + for (int i = 0; i < num_threads; i++) + delete tls[i]; + delete[] tls; + } +} // namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h new file mode 100644 index 00000000000..117292ba44b --- /dev/null +++ b/libstdc++-v3/include/parallel/base.h @@ -0,0 +1,358 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/base.h + * @brief Sequential helper functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASE_H +#define _GLIBCXX_PARALLEL_BASE_H 1 + +#include +#include +#include +#include +#include + +namespace __gnu_parallel +{ + // XXX remove std::duplicates from here if possible, + // XXX but keep minimal dependencies. + + /** @brief Calculates the rounded-down logrithm of @c n for base 2. + * @param n Argument. + * @return Returns 0 for argument 0. + */ + template + inline Size + log2(Size n) + { + Size k; + for (k = 0; n != 1; n >>= 1) + ++k; + return k; + } + + /** @brief Encode two integers into one __gnu_parallel::lcas_t. + * @param a First integer, to be encoded in the most-significant @c + * lcas_t_bits/2 bits. + * @param b Second integer, to be encoded in the least-significant + * @c lcas_t_bits/2 bits. + * @return __gnu_parallel::lcas_t value encoding @c a and @c b. + * @see decode2 + */ + inline lcas_t + encode2(int a, int b) //must all be non-negative, actually + { + return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0); + } + + /** @brief Decode two integers from one __gnu_parallel::lcas_t. + * @param x __gnu_parallel::lcas_t to decode integers from. + * @param a First integer, to be decoded from the most-significant + * @c lcas_t_bits/2 bits of @c x. + * @param b Second integer, to be encoded in the least-significant + * @c lcas_t_bits/2 bits of @c x. + * @see encode2 + */ + inline void + decode2(lcas_t x, int& a, int& b) + { + a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask); + b = (int)((x >> 0 ) & lcas_t_mask); + } + + /** @brief Constructs predicate for equality from strict weak + * ordering predicate + */ + // XXX comparator at the end, as per others + template + class equal_from_less : public std::binary_function + { + private: + Comparator& comp; + + public: + equal_from_less(Comparator& _comp) : comp(_comp) { } + + bool operator()(const T1& a, const T2& b) + { + // FIXME: wrong in general (T1 != T2) + return !comp(a, b) && !comp(b, a); + } + }; + + + /** @brief Similar to std::equal_to, but allows two different types. */ + template + struct equal_to : std::binary_function + { + bool operator()(const T1& t1, const T2& t2) const + { return t1 == t2; } + }; + + /** @brief Similar to std::binder1st, but giving the argument types explicitly. */ + template + class unary_negate + : public std::unary_function + { + protected: + _Predicate _M_pred; + + public: + explicit + unary_negate(const _Predicate& __x) : _M_pred(__x) { } + + bool + operator()(const argument_type& __x) + { return !_M_pred(__x); } + }; + + /** @brief Similar to std::binder1st, but giving the argument types explicitly. */ + template + class binder1st + : public std::unary_function + { + protected: + _Operation op; + first_argument_type value; + + public: + binder1st(const _Operation& __x, + const first_argument_type& __y) + : op(__x), value(__y) { } + + result_type + operator()(const second_argument_type& __x) + { return op(value, __x); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + result_type + operator()(second_argument_type& __x) const + { return op(value, __x); } + }; + + /** @brief Similar to std::binder2nd, but giving the argument types explicitly. */ + template + class binder2nd + : public std::unary_function + { + protected: + _Operation op; + second_argument_type value; + + public: + binder2nd(const _Operation& __x, + const second_argument_type& __y) + : op(__x), value(__y) { } + + result_type + operator()(const first_argument_type& __x) const + { return op(__x, value); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + result_type + operator()(first_argument_type& __x) + { return op(__x, value); } + }; + + /** @brief Similar to std::less, but allows two different types. */ + template + struct less : std::binary_function + { + bool operator()(const T1& t1, const T2& t2) const + { return t1 < t2; } + }; + + + template + class pseudo_sequence; + + /** @brief Iterator associated with __gnu_parallel::pseudo_sequence. + * If features the usual random-access iterator functionality. + * @param T Sequence value type. + * @param difference_type Sequence difference type. + */ + template + class pseudo_sequence_iterator + { + public: + typedef _DifferenceTp difference_type; + + private: + typedef pseudo_sequence_iterator type; + + const T& val; + difference_type pos; + + public: + pseudo_sequence_iterator(const T& val, difference_type pos) + : val(val), pos(pos) { } + + // Pre-increment operator. + type& + operator++() + { + ++pos; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(pos++); } + + const T& + operator*() const + { return val; } + + const T& + operator[](difference_type) const + { return val; } + + bool + operator==(const type& i2) + { return pos == i2.pos; } + + difference_type + operator!=(const type& i2) + { return pos != i2.pos; } + + difference_type + operator-(const type& i2) + { return pos - i2.pos; } + }; + + /** @brief Sequence that conceptually consists of multiple copies of + the same element. + * The copies are not stored explicitly, of course. + * @param T Sequence value type. + * @param difference_type Sequence difference type. + */ + template + class pseudo_sequence + { + typedef pseudo_sequence type; + + public: + typedef _DifferenceTp difference_type; + typedef pseudo_sequence_iterator iterator; //better case down to uint64, than up to _DifferenceTp + + /** @brief Constructor. + * @param val Element of the sequence. + * @param count Number of (virtual) copies. + */ + pseudo_sequence(const T& val, difference_type count) + : val(val), count(count) { } + + /** @brief Begin iterator. */ + iterator + begin() const + { return iterator(val, 0); } + + /** @brief End iterator. */ + iterator + end() const + { return iterator(val, count); } + + private: + const T& val; + difference_type count; + }; + + /** @brief Functor that does nothing */ + template + class void_functor + { + inline void + operator()(const _ValueTp& v) const { } + }; + + /** @brief Compute the median of three referenced elements, + according to @c comp. + * @param a First iterator. + * @param b Second iterator. + * @param c Third iterator. + * @param comp Comparator. + */ + template + RandomAccessIterator + median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, + RandomAccessIterator c, Comparator& comp) + { + if (comp(*a, *b)) + if (comp(*b, *c)) + return b; + else + if (comp(*a, *c)) + return c; + else + return a; + else + { + // Just swap a and b. + if (comp(*a, *c)) + return a; + else + if (comp(*b, *c)) + return c; + else + return b; + } + } + + // Avoid the use of assert, because we're trying to keep the + // include out of the mix. (Same as debug mode). + inline void + __replacement_assert(const char* __file, int __line, + const char* __function, const char* __condition) + { + std::printf("%s:%d: %s: Assertion '%s' failed.\n", __file, __line, + __function, __condition); + __builtin_abort(); + } + +#define _GLIBCXX_PARALLEL_ASSERT(_Condition) \ + do \ + { \ + if (!(_Condition)) \ + __gnu_parallel::__replacement_assert(__FILE__, __LINE__, \ + __PRETTY_FUNCTION__, #_Condition); \ + } while (false) + +} //namespace __gnu_parallel + +#endif + diff --git a/libstdc++-v3/include/parallel/basic_iterator.h b/libstdc++-v3/include/parallel/basic_iterator.h new file mode 100644 index 00000000000..4b891be80b2 --- /dev/null +++ b/libstdc++-v3/include/parallel/basic_iterator.h @@ -0,0 +1,48 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/basic_iterator.h + * @brief Includes the original header files concerned with iterators + * except for stream iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_BASIC_ITERATOR_H +#define _GLIBCXX_PARALLEL_BASIC_ITERATOR_H 1 + +#include +#include +#include +#include +#include + +#endif /* _GLIBCXX_BASIC_ITERATOR_H */ diff --git a/libstdc++-v3/include/parallel/checkers.h b/libstdc++-v3/include/parallel/checkers.h new file mode 100644 index 00000000000..b34ee051927 --- /dev/null +++ b/libstdc++-v3/include/parallel/checkers.h @@ -0,0 +1,148 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/checkers.h + * @brief Routines for checking the correctness of algorithm results. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_CHECKERS +#define _GLIBCXX_PARALLEL_CHECKERS 1 + +#include +#include +#include + +namespace __gnu_parallel +{ + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + // XXX Comparator default template argument + template + bool + is_sorted(InputIterator begin, InputIterator end, Comparator comp = std::less::value_type>()) + { + if (begin == end) + return true; + + InputIterator current(begin), recent(begin); + + unsigned long long position = 1; + for (current++; current != end; current++) + { + if (comp(*current, *recent)) + { + printf("is_sorted: check failed before position %i.\n", position); + return false; + } + recent = current; + position++; + } + + return true; + } + + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * Prints the position in case an misordered pair is found. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param first_failure The first failure is returned in this variable. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + // XXX Comparator default template argument + template + bool + is_sorted_failure(InputIterator begin, InputIterator end, InputIterator& first_failure, Comparator comp = std::less::value_type>()) + { + if (begin == end) + return true; + + InputIterator current(begin), recent(begin); + + unsigned long long position = 1; + for (current++; current != end; current++) + { + if (comp(*current, *recent)) + { + first_failure = current; + printf("is_sorted: check failed before position %lld.\n", position); + return false; + } + recent = current; + position++; + } + + first_failure = end; + return true; + } + + /** + * @brief Check whether @c [begin, @c end) is sorted according to @c comp. + * Prints all misordered pair, including the surrounding two elements. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @return @c true if sorted, @c false otherwise. + */ + template + bool + // XXX Comparator default template argument + is_sorted_print_failures(InputIterator begin, InputIterator end, Comparator comp = std::less::value_type>()) + { + if (begin == end) + return true; + + InputIterator recent(begin); + bool ok = true; + + for (InputIterator pos(begin + 1); pos != end; pos++) + { + if (comp(*pos, *recent)) + { + printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2), + *(pos- 1), *pos, *(pos + 1)); + ok = false; + } + recent = pos; + } + return ok; + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/compatibility.h b/libstdc++-v3/include/parallel/compatibility.h new file mode 100644 index 00000000000..ec0c0531885 --- /dev/null +++ b/libstdc++-v3/include/parallel/compatibility.h @@ -0,0 +1,338 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/compatibility.h + * @brief Compatibility layer, mostly concerned with atomic operations. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H +#define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 + +#include +#include + +#if defined(__SUNPRO_CC) && defined(__sparc) +#include +#endif + +#if !defined(_WIN32) +#include +#endif + +#if defined(_MSC_VER) +#include +#include +#undef max +#undef min +#endif + +namespace __gnu_parallel +{ +#if defined(__ICC) + template + int32 faa32(int32* x, int32 inc) + { + asm volatile("lock xadd %0,%1" + : "=r" (inc), "=m" (*x) + : "0" (inc) + : "memory"); + return inc; + } +#if defined(__x86_64) + template + int64 faa64(int64* x, int64 inc) + { + asm volatile("lock xadd %0,%1" + : "=r" (inc), "=m" (*x) + : "0" (inc) + : "memory"); + return inc; + } +#endif +#endif + + // atomic functions only work on integers + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a 32-bit signed integer. + * @param addend Value to add. + */ + inline int32 + fetch_and_add_32(volatile int32* ptr, int32 addend) + { +#if defined(__ICC) //x86 version + return _InterlockedExchangeAdd((void*)ptr, addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd((void*)ptr, addend); +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedExchangeAdd(reinterpret_cast(ptr), addend); +#elif defined(__GNUC__) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int32 before, after; + do + { + before = *ptr; + after = before + addend; + } while (atomic_cas_32((volatile unsigned int*)ptr, before, after) != before); + return before; +#else //fallback, slow +#pragma message("slow fetch_and_add_32") + int32 res; +#pragma omp critical + { + res = *ptr; + *(ptr) += addend; + } + return res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a 64-bit signed integer. + * @param addend Value to add. + */ + inline int64 + fetch_and_add_64(volatile int64* ptr, int64 addend) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return faa64((int64*)ptr, addend); +#elif defined(__ECC) //IA-64 version + return _InterlockedExchangeAdd64((void*)ptr, addend); +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedExchangeAdd64(ptr, addend); +#endif +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon)) + return __sync_fetch_and_add(ptr, addend); +#elif defined(__SUNPRO_CC) && defined(__sparc) + volatile int64 before, after; + do + { + before = *ptr; + after = before + addend; + } while (atomic_cas_64((volatile unsigned long long*)ptr, before, after) != before); + return before; +#else //fallback, slow +#if defined(__GNUC__) && defined(__i386) + // XXX doesn't work with -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow fetch_and_add_64") + int64 res; +#pragma omp critical + { + res = *ptr; + *(ptr) += addend; + } + return res; +#endif + } + + /** @brief Add a value to a variable, atomically. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to a signed integer. + * @param addend Value to add. + */ + template + inline T + fetch_and_add(volatile T* ptr, T addend) + { + if (sizeof(T) == sizeof(int32)) + return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend); + else if (sizeof(T) == sizeof(int64)) + return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + +#if defined(__ICC) + + template + inline int32 + cas32(volatile int32* ptr, int32 old, int32 nw) + { + int32 before; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(before) + : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) + : "memory"); + return before; + } + +#if defined(__x86_64) + template + inline int64 + cas64(volatile int64 *ptr, int64 old, int64 nw) + { + int64 before; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(before) + : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) + : "memory"); + return before; + } +#endif + +#endif + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to 32-bit signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. + */ + inline bool + compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement) + { +#if defined(__ICC) //x86 version + return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand; +#elif defined(__ICL) || defined(_MSC_VER) + return _InterlockedCompareExchange(reinterpret_cast(ptr), replacement, comparand) == comparand; +#elif defined(__GNUC__) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_32((volatile unsigned int*)ptr, comparand, replacement) == comparand; +#else +#pragma message("slow compare_and_swap_32") + bool res = false; +#pragma omp critical + { + if (*ptr == comparand) + { + *ptr = replacement; + res = true; + } + } + return res; +#endif + } + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to 64-bit signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. + */ + inline bool + compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement) + { +#if defined(__ICC) && defined(__x86_64) //x86 version + return cas64(ptr, comparand, replacement) == comparand; +#elif defined(__ECC) //IA-64 version + return _InterlockedCompareExchange64((void*)ptr, replacement, comparand) == comparand; +#elif defined(__ICL) || defined(_MSC_VER) +#ifndef _WIN64 + _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case + return 0; +#else + return _InterlockedCompareExchange64(ptr, replacement, comparand) == comparand; +#endif + +#elif defined(__GNUC__) && defined(__x86_64) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__GNUC__) && defined(__i386) && \ + (defined(__i686) || defined(__pentium4) || defined(__athlon)) + return __sync_bool_compare_and_swap(ptr, comparand, replacement); +#elif defined(__SUNPRO_CC) && defined(__sparc) + return atomic_cas_64((volatile unsigned long long*)ptr, comparand, replacement) == comparand; +#else +#if defined(__GNUC__) && defined(__i386) + // XXX -march=native + //#warning "please compile with -march=i686 or better" +#endif +#pragma message("slow compare_and_swap_64") + bool res = false; +#pragma omp critical + { + if (*ptr == comparand) + { + *ptr = replacement; + res = true; + } + } + return res; +#endif + } + + /** @brief Compare @c *ptr and @c comparand. If equal, let @c + * *ptr=replacement and return @c true, return @c false otherwise. + * + * Implementation is heavily platform-dependent. + * @param ptr Pointer to signed integer. + * @param comparand Compare value. + * @param replacement Replacement value. */ + template + inline bool + compare_and_swap(volatile T* ptr, T comparand, T replacement) + { + if (sizeof(T) == sizeof(int32)) + return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement); + else if (sizeof(T) == sizeof(int64)) + return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement); + else + _GLIBCXX_PARALLEL_ASSERT(false); + } + + /** @brief Yield the control to another thread, without waiting for + the end to the time slice. */ + inline void + yield() + { +#ifdef _WIN32 + Sleep(0); +#else + sched_yield(); +#endif + } +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h new file mode 100644 index 00000000000..6278e44837a --- /dev/null +++ b/libstdc++-v3/include/parallel/compiletime_settings.h @@ -0,0 +1,76 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/compiletime_settings.h + * @brief Defines on options concerning debugging and performance, at + * compile-time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#include + +/** @brief Determine verbosity level of the parallel mode. + * Level 1 prints a message each time when entering a parallel-mode function. */ +#define _GLIBCXX_VERBOSE_LEVEL 0 + +/** @def _GLIBCXX_CALL + * @brief Macro to produce log message when entering a function. + * @param n Input size. + * @see _GLIBCXX_VERBOSE_LEVEL */ +#if (_GLIBCXX_VERBOSE_LEVEL == 0) +#define _GLIBCXX_CALL(n) +#endif +#if (_GLIBCXX_VERBOSE_LEVEL == 1) +#define _GLIBCXX_CALL(n) printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", __PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads()); +#endif + +/** @brief Use floating-point scaling instead of modulo for mapping + * random numbers to a range. This can be faster on certain CPUs. */ +#define _GLIBCXX_SCALE_DOWN_FPU 0 + +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Should be switched on only locally. */ +#define _GLIBCXX_ASSERTIONS 0 + +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the L1 cache for __gnu_parallel::parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0 +/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. + * Consider the size of the TLB for __gnu_parallel::parallel_random_shuffle(). */ +#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0 + +/** @brief First copy the data, sort it locally, and merge it back + * (0); or copy it back after everyting is done (1). + * + * Recommendation: 0 */ +#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0 + diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h new file mode 100644 index 00000000000..730875d0d52 --- /dev/null +++ b/libstdc++-v3/include/parallel/equally_split.h @@ -0,0 +1,68 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/equally_split.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H +#define _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H 1 + +namespace __gnu_parallel +{ + /** @brief Function to split a sequence into parts of almost equal size. + * + * The resulting sequence s of length p+1 contains the splitting + * positions when splitting the range [0,n) into parts of almost + * equal size (plus minus 1). The first entry is 0, the last one + * n. There may result empty parts. + * @param n Number of elements + * @param p Number of parts + * @param s Splitters + * @returns End of splitter sequence, i. e. @c s+p+1 */ + template + OutputIterator + equally_split(_DifferenceTp n, thread_index_t p, OutputIterator s) + { + typedef _DifferenceTp difference_type; + difference_type chunk_length = n / p, split = n % p, start = 0; + for (int i = 0; i < p; i++) + { + *s++ = start; + start += (difference_type(i) < split) ? (chunk_length + 1) : chunk_length; + } + *s++ = n; + return s; + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h new file mode 100644 index 00000000000..83771480f69 --- /dev/null +++ b/libstdc++-v3/include/parallel/features.h @@ -0,0 +1,170 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/features.h + * @brief Defines on whether to include algorithm variants. + * + * Less variants reduce executable size and compile time. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FEATURES_H +#define _GLIBCXX_PARALLEL_FEATURES_H 1 + +#ifndef _GLIBCXX_MERGESORT +/** @def _GLIBCXX_MERGESORT + * @brief Include parallel multi-way mergesort. + * @see __gnu_parallel::Settings::sort_algorithm */ +#define _GLIBCXX_MERGESORT 1 +#endif + +#ifndef _GLIBCXX_QUICKSORT +/** @def _GLIBCXX_QUICKSORT + * @brief Include parallel unbalanced quicksort. + * @see __gnu_parallel::Settings::sort_algorithm */ +#define _GLIBCXX_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_BAL_QUICKSORT +/** @def _GLIBCXX_BAL_QUICKSORT + * @brief Include parallel dynamically load-balanced quicksort. + * @see __gnu_parallel::Settings::sort_algorithm */ +#define _GLIBCXX_BAL_QUICKSORT 1 +#endif + +#ifndef _GLIBCXX_LOSER_TREE +/** @def _GLIBCXX_LOSER_TREE + * @brief Include guarded (sequences may run empty) loser tree, + * moving objects. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE 1 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT +/** @def _GLIBCXX_LOSER_TREE_EXPLICIT + * @brief Include standard loser tree, storing two flags for infimum + * and supremum. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_EXPLICIT 0 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_REFERENCE +/** @def _GLIBCXX_LOSER_TREE_REFERENCE + * @brief Include some loser tree variant. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_REFERENCE 0 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_POINTER +/** @def _GLIBCXX_LOSER_TREE_POINTER + * @brief Include some loser tree variant. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_POINTER 0 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED +/** @def _GLIBCXX_LOSER_TREE_UNGUARDED + * @brief Include unguarded (sequences must not run empty) loser + * tree, moving objects. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_UNGUARDED 1 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED +/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED + * @brief Include some loser tree variant. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 0 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_COMBINED +/** @def _GLIBCXX_LOSER_TREE_COMBINED + * @brief Include some loser tree variant. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_COMBINED 0 +#endif + +#ifndef _GLIBCXX_LOSER_TREE_SENTINEL +/** @def _GLIBCXX_LOSER_TREE_SENTINEL + * @brief Include some loser tree variant. + * @see __gnu_parallel::Settings multiway_merge_algorithm */ +#define _GLIBCXX_LOSER_TREE_SENTINEL 0 +#endif + + +#ifndef _GLIBCXX_FIND_GROWING_BLOCKS +/** @brief Include the growing blocks variant for std::find. + * @see __gnu_parallel::Settings::find_distribution */ +#define _GLIBCXX_FIND_GROWING_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS +/** @brief Include the equal-sized blocks variant for std::find. + * @see __gnu_parallel::Settings::find_distribution */ +#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1 +#endif + +#ifndef _GLIBCXX_FIND_EQUAL_SPLIT +/** @def _GLIBCXX_FIND_EQUAL_SPLIT + * @brief Include the equal splitting variant for std::find. + * @see __gnu_parallel::Settings::find_distribution */ +#define _GLIBCXX_FIND_EQUAL_SPLIT 1 +#endif + + +#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING +/** @def _GLIBCXX_TREE_INITIAL_SPLITTING + * @brief Include the initial splitting variant for + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_INITIAL_SPLITTING 1 +#endif + +#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING +/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING + * @brief Include the dynamic balancing variant for + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end). + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1 +#endif + +#ifndef _GLIBCXX_TREE_FULL_COPY +/** @def _GLIBCXX_TREE_FULL_COPY + * @brief In order to sort the input sequence of + * _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a + * full copy of the input elements is done. + * @see __gnu_parallel::_Rb_tree */ +#define _GLIBCXX_TREE_FULL_COPY 1 +#endif + + +#endif diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h new file mode 100644 index 00000000000..42f179fa6c7 --- /dev/null +++ b/libstdc++-v3/include/parallel/find.h @@ -0,0 +1,340 @@ +/// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/find.h + * @brief Parallel implementation base for std::find(), std::equal() + * and related functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_FIND_H +#define _GLIBCXX_PARALLEL_FIND_H 1 + +#include + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + /** + * @brief Parallel std::find, switch for different algorithms. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Must have same + * length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + */ + template + std::pair + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector) + { + switch (Settings::find_distribution) + { + case Settings::GROWING_BLOCKS: + return find_template(begin1, end1, begin2, pred, selector, growing_blocks_tag()); + case Settings::CONSTANT_SIZE_BLOCKS: + return find_template(begin1, end1, begin2, pred, selector, constant_size_blocks_tag()); + case Settings::EQUAL_SPLIT: + return find_template(begin1, end1, begin2, pred, selector, equal_split_tag()); + default: + _GLIBCXX_PARALLEL_ASSERT(false); + return std::make_pair(begin1, begin2); + } + } + +#if _GLIBCXX_FIND_EQUAL_SPLIT + + /** + * @brief Parallel std::find, equal splitting variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + */ + template + std::pair + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred, Selector selector, equal_split_tag) + { + _GLIBCXX_CALL(end1 - begin1) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + difference_type length = end1 - begin1; + + difference_type result = length; + + const thread_index_t num_threads = get_max_threads(); + + // XXX VLA error. + difference_type borders[num_threads + 1]; + equally_split(length, num_threads, borders); + +#pragma omp parallel shared(result) num_threads(num_threads) + { + int iam = omp_get_thread_num(); + difference_type pos = borders[iam], limit = borders[iam + 1]; + + RandomAccessIterator1 i1 = begin1 + pos; + RandomAccessIterator2 i2 = begin2 + pos; + for (; pos < limit; pos++) + { +#pragma omp flush(result) + // Result has been set to something lower. + if (result < pos) + break; + + if (selector(i1, i2, pred)) + { +#pragma omp critical (result) + if (result > pos) + result = pos; + break; + } + i1++; + i2++; + } + } + return std::pair(begin1 + result, begin2 + result); + } + +#endif + +#if _GLIBCXX_FIND_GROWING_BLOCKS + + /** + * @brief Parallel std::find, growing block size variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::Settings::find_sequential_search_size + * @see __gnu_parallel::Settings::find_initial_block_size + * @see __gnu_parallel::Settings::find_maximum_block_size + * @see __gnu_parallel::Settings::find_increasing_factor + * + * There are two main differences between the growing blocks and + * the constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + + * 2. For GB, the blocks are allocated dynamically; + * for CSB, the blocks are allocated in a predetermined manner, + * namely spacial round-robin. + */ + template + std::pair + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector, + growing_blocks_tag) + { + _GLIBCXX_CALL(end1 - begin1) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + difference_type length = end1 - begin1; + + difference_type sequential_search_size = std::min(length, Settings::find_sequential_search_size); + + // Try it sequentially first. + std::pair find_seq_result = + selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred); + + if (find_seq_result.first != (begin1 + sequential_search_size)) + return find_seq_result; + + // Index of beginning of next free block (after sequential find). + difference_type next_block_pos = sequential_search_size; + difference_type result = length; + const thread_index_t num_threads = get_max_threads(); + +#pragma omp parallel shared(result) num_threads(num_threads) + { + // Not within first k elements -> start parallel. + thread_index_t iam = omp_get_thread_num(); + + difference_type block_size = Settings::find_initial_block_size; + difference_type start = fetch_and_add(&next_block_pos, block_size); + + // Get new block, update pointer to next block. + difference_type stop = std::min(length, start + block_size); + + std::pair local_result; + + while (start < length) + { +#pragma omp flush(result) + // Get new value of result. + if (result < start) + { + // No chance to find first element. + break; + } + + local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred); + if (local_result.first != (begin1 + stop)) + { +#pragma omp critical(result) + if ((local_result.first - begin1) < result) + { + result = local_result.first - begin1; + + // Result cannot be in future blocks, stop algorithm. + fetch_and_add(&next_block_pos, length); + } + } + + block_size = std::min(block_size * Settings::find_increasing_factor, Settings::find_maximum_block_size); + + // Get new block, update pointer to next block. + start = fetch_and_add(&next_block_pos, block_size); + stop = (length < (start + block_size)) ? length : (start + block_size); + } + } + + // Return iterator on found element. + return std::pair(begin1 + result, begin2 + result); + } + +#endif + +#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS + + /** + * @brief Parallel std::find, constant block size variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. Second sequence + * must have same length as first sequence. + * @param pred Find predicate. + * @param selector Functionality (e. g. std::find_if (), std::equal(),...) + * @return Place of finding in both sequences. + * @see __gnu_parallel::Settings::find_sequential_search_size + * @see __gnu_parallel::Settings::find_block_size + * There are two main differences between the growing blocks and the + * constant-size blocks variants. + * 1. For GB, the block size grows; for CSB, the block size is fixed. + * 2. For GB, the blocks are allocated dynamically; for CSB, the + * blocks are allocated in a predetermined manner, namely spacial + * round-robin. + */ + template + std::pair + find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, + RandomAccessIterator2 begin2, Pred pred, Selector selector, + constant_size_blocks_tag) + { + _GLIBCXX_CALL(end1 - begin1) + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::value_type value_type; + + difference_type length = end1 - begin1; + + difference_type sequential_search_size = std::min(length, Settings::find_sequential_search_size); + + // Try it sequentially first. + std::pair find_seq_result = + selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred); + + if (find_seq_result.first != (begin1 + sequential_search_size)) + return find_seq_result; + + difference_type result = length; + const thread_index_t num_threads = get_max_threads(); + + // Not within first sequential_search_size elements -> start parallel. +#pragma omp parallel shared(result) num_threads(num_threads) + { + thread_index_t iam = omp_get_thread_num(); + difference_type block_size = Settings::find_initial_block_size; + + difference_type start, stop; + + // First element of thread's current iteration. + difference_type iteration_start = sequential_search_size; + + // Where to work (initialization). + start = iteration_start + iam * block_size; + stop = std::min(length, start + block_size); + + std::pair local_result; + + while (start < length) + { + // Get new value of result. +#pragma omp flush(result) + // No chance to find first element. + if (result < start) + break; + + local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred); + if (local_result.first != (begin1 + stop)) + { +#pragma omp critical(result) + if ((local_result.first - begin1) < result) + result = local_result.first - begin1; + + // Will not find better value in its interval. + break; + } + + iteration_start += num_threads * block_size; + + // Where to work. + start = iteration_start + iam * block_size; + stop = std::min(length, start + block_size); + } + } + + // Return iterator on found element. + return std::pair(begin1 + result, begin2 + result); + } +#endif +} // end namespace + +#endif + diff --git a/libstdc++-v3/include/parallel/find_selectors.h b/libstdc++-v3/include/parallel/find_selectors.h new file mode 100644 index 00000000000..fddd8263c0d --- /dev/null +++ b/libstdc++-v3/include/parallel/find_selectors.h @@ -0,0 +1,184 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/find_selectors.h + * @brief Function objects representing different tasks to be plugged + * into the parallel find algorithm. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FIND_FUNCTIONS_H +#define _GLIBCXX_PARALLEL_FIND_FUNCTIONS_H 1 + +#include +#include +#include + +namespace __gnu_parallel +{ + /** @brief Base class of all __gnu_parallel::find_template selectors. */ + struct generic_find_selector + { }; + + /** + * @brief Test predicate on a single element, used for std::find() + * and std::find_if (). + */ + struct find_if_selector : public generic_find_selector + { + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template + inline bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { return pred(*i1); } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template + inline std::pair + sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred) + { + return std::make_pair(find_if(begin1, end1, pred, sequential_tag()), begin2); + } + }; + + /** @brief Test predicate on two adjacent elements. */ + struct adjacent_find_selector : public generic_find_selector + { + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template + inline bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { + // Passed end iterator is one short. + return pred(*i1, *(i1 + 1)); + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template + inline std::pair + sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred) + { + // Passed end iterator is one short. + RandomAccessIterator1 spot = adjacent_find(begin1, end1 + 1, pred, sequential_tag()); + if (spot == (end1 + 1)) + spot = end1; + return std::make_pair(spot, begin2); + } + }; + + /** @brief Test inverted predicate on a single element. */ + struct mismatch_selector : public generic_find_selector + { + /** + * @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. + */ + template + inline bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { return !pred(*i1, *i2); } + + /** + * @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. + */ + template + inline std::pair + sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred) + { + return mismatch(begin1, end1, begin2, pred, sequential_tag()); + } + }; + + + /** @brief Test predicate on several elements. */ + template + struct find_first_of_selector : public generic_find_selector + { + ForwardIterator begin; + ForwardIterator end; + + explicit find_first_of_selector(ForwardIterator begin, ForwardIterator end) + : begin(begin), end(end) { } + + /** @brief Test on one position. + * @param i1 Iterator on first sequence. + * @param i2 Iterator on second sequence (unused). + * @param pred Find predicate. */ + template + inline bool + operator()(RandomAccessIterator1 i1, RandomAccessIterator2 i2, Pred pred) + { + for (ForwardIterator pos_in_candidates = begin; pos_in_candidates != end; pos_in_candidates++) + if (pred(*i1, *pos_in_candidates)) + return true; + return false; + } + + /** @brief Corresponding sequential algorithm on a sequence. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param pred Find predicate. */ + template + inline std::pair + sequential_algorithm(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred) + { + return std::make_pair(find_first_of(begin1, end1, begin, end, pred, sequential_tag()), begin2); + } + }; +} + +#endif diff --git a/libstdc++-v3/include/parallel/for_each.h b/libstdc++-v3/include/parallel/for_each.h new file mode 100644 index 00000000000..eb5e04e84f1 --- /dev/null +++ b/libstdc++-v3/include/parallel/for_each.h @@ -0,0 +1,83 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/for_each.h + * @brief Main interface for embarassingly parallel functions. + * + * The explicit implementation are in other header files, like + * workstealing.h, par_loop.h, omp_loop.h, and omp_loop_static.h. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_H +#define _GLIBCXX_PARALLEL_FOR_EACH_H 1 + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + /** @brief Chose the desired algorithm by evaluating @c parallelism_tag. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param user_op A user-specified functor (comparator, predicate, + * associative operator,...) + * @param functionality functor to "process" an element with + * user_op (depends on desired functionality, e. g. accumulate, + * for_each,... + * @param reduction Reduction functor. + * @param reduction_start Initial value for reduction. + * @param output Output iterator. + * @param bound Maximum number of elements processed. + * @param parallelism_tag Parallelization method */ + template + UserOp + for_each_template_random_access(InputIterator begin, InputIterator end, + UserOp user_op, Functionality& functionality, + Red reduction, Result reduction_start, + Result& output, + typename std::iterator_traits::difference_type bound, parallelism parallelism_tag) + { + if (parallelism_tag == parallel_unbalanced) + return for_each_template_random_access_ed(begin, end, user_op, functionality, reduction, reduction_start, output, bound); + else if (parallelism_tag == parallel_omp_loop) + return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound); + else if (parallelism_tag == parallel_omp_loop_static) + return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound); + else //e. g. parallel_balanced + return for_each_template_random_access_workstealing(begin, end, user_op, functionality, reduction, reduction_start, output, bound); + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/for_each_selectors.h b/libstdc++-v3/include/parallel/for_each_selectors.h new file mode 100644 index 00000000000..f1d0abf255b --- /dev/null +++ b/libstdc++-v3/include/parallel/for_each_selectors.h @@ -0,0 +1,343 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/for_each_selectors.h + * @brief Functors representing different tasks to be plugged into the + * generic parallelization methods for embarrassingly parallel functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H +#define _GLIBCXX_PARALLEL_FOR_EACH_SELECTORS_H 1 + +#include + +namespace __gnu_parallel +{ + + /** @brief Generic selector for embarrassingly parallel functions. */ + template + struct generic_for_each_selector + { + /** @brief Iterator on last element processed; needed for some + * algorithms (e. g. std::transform()). + */ + It finish_iterator; + }; + + + /** @brief std::for_each() selector. */ + template + struct for_each_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template + inline bool operator()(Op& o, It i) + { + o(*i); + return true; + } + }; + + /** @brief std::generate() selector. */ + template + struct generate_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template + inline bool operator()(Op& o, It i) + { + *i = o(); + return true; + } + }; + + /** @brief std::fill() selector. */ + template + struct fill_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. */ + template + inline bool operator()(Val& v, It i) + { + *i = v; + return true; + } + }; + + /** @brief std::transform() selector, one input sequence variant. */ + template + struct transform1_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template + inline bool operator()(Op& o, It i) + { + *i.second = o(*i.first); + return true; + } + }; + + /** @brief std::transform() selector, two input sequences variant. */ + template + struct transform2_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + template + inline bool operator()(Op& o, It i) + { + *i.third = o(*i.first, *i.second); + return true; + } + }; + + /** @brief std::replace() selector. */ + template + struct replace_selector : public generic_for_each_selector + { + /** @brief Value to replace with. */ + const T& new_val; + + /** @brief Constructor + * @param new_val Value to replace with. */ + explicit replace_selector(const T &new_val) : new_val(new_val) {} + + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. */ + inline bool operator()(T& v, It i) + { + if (*i == v) + *i = new_val; + return true; + } + }; + + /** @brief std::replace() selector. */ + template + struct replace_if_selector : public generic_for_each_selector + { + /** @brief Value to replace with. */ + const T& new_val; + + /** @brief Constructor. + * @param new_val Value to replace with. */ + explicit replace_if_selector(const T &new_val) : new_val(new_val) { } + + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. */ + inline bool operator()(Op& o, It i) + { + if (o(*i)) + *i = new_val; + return true; + } + }; + + /** @brief std::count() selector. */ + template + struct count_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param v Current value. + * @param i Iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template + inline Diff operator()(Val& v, It i) + { return (v == *i) ? 1 : 0; } + }; + + /** @brief std::count_if () selector. */ + template + struct count_if_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator. + * @param i Iterator referencing object. + * @return 1 if count, 0 if does not count. */ + template + inline Diff operator()(Op& o, It i) + { return (o(*i)) ? 1 : 0; } + }; + + /** @brief std::accumulate() selector. */ + template + struct accumulate_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator (unused). + * @param i Iterator referencing object. + * @return The current value. */ + template + inline typename std::iterator_traits::value_type operator()(Op o, It i) + { return *i; } + }; + + /** @brief std::inner_product() selector. */ + template + struct inner_product_selector : public generic_for_each_selector + { + /** @brief Begin iterator of first sequence. */ + It begin1_iterator; + + /** @brief Begin iterator of second sequence. */ + It2 begin2_iterator; + + /** @brief Constructor. + * @param b1 Begin iterator of first sequence. + * @param b2 Begin iterator of second sequence. */ + explicit inner_product_selector(It b1, It2 b2) : begin1_iterator(b1), begin2_iterator(b2) { } + + /** @brief Functor execution. + * @param mult Multiplication functor. + * @param current Iterator referencing object. + * @return Inner product elemental result. */ + template + inline T operator()(Op mult, It current) + { + typename std::iterator_traits::difference_type position = current - begin1_iterator; + return mult(*current, *(begin2_iterator + position)); + } + }; + + /** @brief Selector that just returns the passed iterator. */ + template + struct identity_selector : public generic_for_each_selector + { + /** @brief Functor execution. + * @param o Operator (unused). + * @param i Iterator referencing object. + * @return Passed iterator. */ + template + inline It operator()(Op o, It i) + { return i; } + }; + + /** @brief Selector that returns the difference between two adjacent + * elements. + */ + template + struct adjacent_difference_selector : public generic_for_each_selector + { + template + inline bool operator()(Op& o, It i) + { + typename It::first_type go_back_one = i.first; + --go_back_one; + *i.second = o(*i.first, *go_back_one); + return true; + } + }; + + // XXX move into type_traits? + /** @brief Functor doing nothing + * + * For some reduction tasks (this is not a function object, but is + * passed as selector dummy parameter. + */ + struct nothing + { + /** @brief Functor execution. + * @param i Iterator referencing object. */ + template + inline void operator()(It i) + { } + }; + + /** @brief Reduction function doing nothing. */ + struct dummy_reduct + { + inline bool operator()(bool /*x*/, bool /*y*/) const + { return true; } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template + struct min_element_reduct + { + Comp& comp; + + explicit min_element_reduct(Comp &c) : comp(c) + { } + + inline It operator()(It x, It y) + { + if (comp(*x, *y)) + return x; + else + return y; + } + }; + + /** @brief Reduction for finding the maximum element, using a comparator. */ + template + struct max_element_reduct + { + Comp& comp; + + explicit max_element_reduct(Comp& c) : comp(c) + { } + + inline It operator()(It x, It y) + { + if (comp(*x, *y)) + return y; + else + return x; + } + }; + + /** @brief General reduction, using a binary operator. */ + template + struct accumulate_binop_reduct + { + BinOp& binop; + + explicit accumulate_binop_reduct(BinOp& b) : binop(b) {} + + template + inline T operator()(T x, T y) { return binop(x, y); } + }; +} + +#endif diff --git a/libstdc++-v3/include/parallel/iterator.h b/libstdc++-v3/include/parallel/iterator.h new file mode 100644 index 00000000000..26b7f331a82 --- /dev/null +++ b/libstdc++-v3/include/parallel/iterator.h @@ -0,0 +1,203 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/iterator.h + * @brief Helper iterator classes for the std::transform() functions. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_ITERATOR_H +#define _GLIBCXX_PARALLEL_ITERATOR_H 1 + +#include +#include + +namespace __gnu_parallel +{ + /** @brief A pair of iterators. The usual iterator operations are + * applied to both child iterators. + */ + template + class iterator_pair : public std::pair + { + private: + typedef iterator_pair type; + typedef std::pair base_type; + + public: + typedef IteratorCategory iterator_category; + typedef void value_type; + + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef type* pointer; + typedef type& reference; + + iterator_pair() { } + + iterator_pair(const Iterator1& first, const Iterator2& second) + : base_type(first, second) { } + + // Pre-increment operator. + type& + operator++() + { + ++base_type::first; + ++base_type::second; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(base_type::first++, base_type::second++); } + + // Pre-decrement operator. + type& + operator--() + { + --base_type::first; + --base_type::second; + return *this; + } + + // Post-decrement operator. + const type + operator--(int) + { return type(base_type::first--, base_type::second--); } + + // Type conversion. + operator Iterator2() const + { return base_type::second; } + + type& + operator=(const type& other) + { + base_type::first = other.first; + base_type::second = other.second; + return *this; + } + + type + operator+(difference_type delta) const + { return type(base_type::first + delta, base_type::second + delta); } + + difference_type + operator-(const type& other) const + { return base_type::first - other.first; } + }; + + + /** @brief A triple of iterators. The usual iterator operations are + applied to all three child iterators. + */ + template + class iterator_triple + { + private: + typedef iterator_triple type; + + public: + typedef IteratorCategory iterator_category; + typedef void value_type; + typedef typename Iterator1::difference_type difference_type; + typedef type* pointer; + typedef type& reference; + + Iterator1 first; + Iterator2 second; + Iterator3 third; + + iterator_triple() { } + + iterator_triple(const Iterator1& _first, const Iterator2& _second, + const Iterator3& _third) + { + first = _first; + second = _second; + third = _third; + } + + // Pre-increment operator. + type& + operator++() + { + ++first; + ++second; + ++third; + return *this; + } + + // Post-increment operator. + const type + operator++(int) + { return type(first++, second++, third++); } + + // Pre-decrement operator. + type& + operator--() + { + --first; + --second; + --third; + return *this; + } + + // Post-decrement operator. + const type + operator--(int) + { return type(first--, second--, third--); } + + // Type conversion. + operator Iterator3() const + { return third; } + + type& + operator=(const type& other) + { + first = other.first; + second = other.second; + third = other.third; + return *this; + } + + type + operator+(difference_type delta) const + { return type(first + delta, second + delta, third + delta); } + + difference_type + operator-(const type& other) const + { return first - other.first; } + }; +} + +#endif diff --git a/libstdc++-v3/include/parallel/list_partition.h b/libstdc++-v3/include/parallel/list_partition.h new file mode 100644 index 00000000000..320b1b8286a --- /dev/null +++ b/libstdc++-v3/include/parallel/list_partition.h @@ -0,0 +1,181 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/list_partition.h + * @brief Functionality to split sequence referenced by only input + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Leonor Frias Moya and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LIST_PARTITION_H +#define _GLIBCXX_PARALLEL_LIST_PARTITION_H 1 + +#include +#include + +namespace __gnu_parallel +{ + /** @brief Shrinks and doubles the ranges. + * @param os_starts Start positions worked on (oversampled). + * @param count_to_two Counts up to 2. + * @param range_length Current length of a chunk. + * @param make_twice Whether the @c os_starts is allowed to be + * grown or not + */ + template + void + shrink_and_double(std::vector& os_starts, size_t& count_to_two, size_t& range_length, const bool make_twice) + { + ++count_to_two; + if (not make_twice or count_to_two < 2) + { + shrink(os_starts, count_to_two, range_length); + } + else + { + os_starts.resize((os_starts.size() - 1) * 2 + 1); + count_to_two = 0; + } + } + + /** @brief Combines two ranges into one and thus halves the number of ranges. + * @param os_starts Start positions worked on (oversampled). + * @param count_to_two Counts up to 2. + * @param range_length Current length of a chunk. */ + template + void + shrink(std::vector& os_starts, size_t& count_to_two, + size_t& range_length) + { + for (typename std::vector::size_type i = 0; i <= (os_starts.size() / 2); ++i) + { + os_starts[i] = os_starts[i * 2]; + } + range_length *= 2; + } + + /** @brief Splits a sequence given by input iterators into parts of + * almost equal size + * + * The function needs only one pass over the sequence. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param starts Start iterators for the resulting parts, dimension + * @c num_parts+1. For convenience, @c starts @c [num_parts] + * contains the end iterator of the sequence. + * @param lengths Length of the resulting parts. + * @param num_parts Number of parts to split the sequence into. + * @param f Functor to be applied to each element by traversing it + * @param oversampling Oversampling factor. If 0, then the + * partitions will differ in at most @f$ \sqrt{\mathrm{end} - + * \mathrm{begin}} @f$ elements. Otherwise, the ratio between the + * longest and the shortest part is bounded by @f$ + * 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$. + * @return Length of the whole sequence. + */ + template + size_t + list_partition(const InputIterator begin, const InputIterator end, + InputIterator* starts, size_t* lengths, const int num_parts, + FunctorType& f, int oversampling = 0) + { + bool make_twice = false; + + // According to the oversampling factor, the resizing algorithm is chosen. + if (oversampling == 0) + { + make_twice = true; + oversampling = 1; + } + + std::vector os_starts(2 * oversampling * num_parts + 1); + + os_starts[0]= begin; + InputIterator prev = begin, it = begin; + size_t dist_limit = 0, dist = 0; + size_t cur = 1, next = 1; + size_t range_length = 1; + size_t count_to_two = 0; + while (it != end){ + cur = next; + for (; cur < os_starts.size() and it != end; ++cur) + { + for (dist_limit += range_length; dist < dist_limit and it != end; ++dist) + { + f(it); + ++it; + } + os_starts[cur] = it; + } + + // Must compare for end and not cur < os_starts.size() , because + // cur could be == os_starts.size() as well + if (it == end) + break; + + shrink_and_double(os_starts, count_to_two, range_length, make_twice); + next = os_starts.size()/2 + 1; + } + + // Calculation of the parts (one must be extracted from current + // because the partition beginning at end, consists only of + // itself). + size_t size_part = (cur - 1) / num_parts; + int size_greater = static_cast((cur - 1) % num_parts); + starts[0] = os_starts[0]; + + size_t index = 0; + + // Smallest partitions. + for (int i = 1; i < (num_parts + 1 - size_greater); ++i) + { + lengths[i-1] = size_part * range_length; + index += size_part; + starts[i] = os_starts[index]; + } + + // Biggest partitions. + for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i) + { + lengths[i-1] = (size_part+1) * range_length; + index += (size_part+1); + starts[i] = os_starts[index]; + } + + // Correction of the end size (the end iteration has not finished). + lengths[num_parts - 1] -= (dist_limit - dist); + + return dist; + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h new file mode 100644 index 00000000000..1823282c9d3 --- /dev/null +++ b/libstdc++-v3/include/parallel/losertree.h @@ -0,0 +1,1077 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/losertree.h + * @brief Many generic loser tree variants. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_LOSERTREE_H +#define _GLIBCXX_PARALLEL_LOSERTREE_H + +#include + +#include +#include +#include + +namespace __gnu_parallel +{ + +#if _GLIBCXX_LOSER_TREE_EXPLICIT + + /** @brief Guarded loser tree, copying the whole element into the + * tree structure. + * + * Guarding is done explicitly through two flags per element, inf + * and sup This is a quite slow variant. + */ + template > + class LoserTreeExplicit + { + private: + struct Loser + { + // The relevant element. + T key; + + // Is this an infimum or supremum element? + bool inf, sup; + + // Number of the sequence the element comes from. + int source; + }; + + unsigned int size, offset; + Loser* losers; + Comparator comp; + + public: + inline LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less()) : comp(_comp) + { + size = _size; + offset = size; + losers = new Loser[size]; + for (unsigned int l = 0; l < size; l++) + { + //losers[l].key = ... stays unset + losers[l].inf = true; + losers[l].sup = false; + //losers[l].source = -1; //sentinel + } + } + + inline ~LoserTreeExplicit() + { delete[] losers; } + + inline void + print() { } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(T key, int source, bool sup) + { + bool inf = false; + for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) + { + if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key)) || losers[pos].inf || sup) + { + // The other one is smaller. + std::swap(losers[pos].key, key); + std::swap(losers[pos].inf, inf); + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + } + } + + losers[0].key = key; + losers[0].inf = inf; + losers[0].sup = sup; + losers[0].source = source; + } + + inline void + init() { } + + inline void + delete_min_insert(T key, bool sup) + { + bool inf = false; + int source = losers[0].source; + for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key)) + || losers[pos].inf || sup) + { + // The other one is smaller. + std::swap(losers[pos].key, key); + std::swap(losers[pos].inf, inf); + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + } + } + + losers[0].key = key; + losers[0].inf = inf; + losers[0].sup = sup; + losers[0].source = source; + } + + inline void + insert_start_stable(T key, int source, bool sup) + { + bool inf = false; + for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) + { + if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && + ((comp(losers[pos].key, key)) || + (!comp(key, losers[pos].key) && losers[pos].source < source))) + || losers[pos].inf || sup) + { + // Take next key. + std::swap(losers[pos].key, key); + std::swap(losers[pos].inf, inf); + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + } + } + + losers[0].key = key; + losers[0].inf = inf; + losers[0].sup = sup; + losers[0].source = source; + } + + inline void + init_stable() { } + + inline void + delete_min_insert_stable(T key, bool sup) + { + bool inf = false; + int source = losers[0].source; + for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) + { + if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup + && ((comp(losers[pos].key, key)) || + (!comp(key, losers[pos].key) && losers[pos].source < source))) + || losers[pos].inf || sup) + { + std::swap(losers[pos].key, key); + std::swap(losers[pos].inf, inf); + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + } + } + + losers[0].key = key; + losers[0].inf = inf; + losers[0].sup = sup; + losers[0].source = source; + } + }; + +#endif + +#if _GLIBCXX_LOSER_TREE + + /** @brief Guarded loser tree, either copying the whole element into + * the tree structure, or looking up the element via the index. + * + * Guarding is done explicitly through one flag sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. + */ + template > + class LoserTree + { + private: + struct Loser + { + bool sup; + int source; + T key; + }; + + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; + + public: + inline LoserTree(unsigned int _k, Comparator _comp = std::less()) + : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k * 2]; + for (unsigned int i = ik - 1; i < k; i++) + losers[i + k].sup = true; + } + + inline ~LoserTree() + { delete[] losers; } + + void + print() + { + for (unsigned int i = 0; i < (k * 2); i++) + printf("%d %d from %d, %d\n", i, losers[i].key, losers[i].source, losers[i].sup); + } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + losers[pos].sup = sup; + losers[pos].source = source; + losers[pos].key = key; + } + + unsigned int + init_winner (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup || + (!losers[left].sup && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { losers[0] = losers[init_winner(1)]; } + + // Do not pass const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool sup) + { + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } + + inline void + insert_start_stable(const T& key, int source, bool sup) + { return insert_start(key, source, sup); } + + unsigned int + init_winner_stable (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if ( losers[right].sup || + (!losers[left].sup && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init_stable() + { losers[0] = losers[init_winner_stable(1)]; } + + // Do not pass const reference since key will be used as local variable. + inline void + delete_min_insert_stable(T key, bool sup) + { + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || + (!sup && !losers[pos].sup && + ((comp(losers[pos].key, key)) || + (!comp(key, losers[pos].key) && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } + }; + +#endif + +#if _GLIBCXX_LOSER_TREE_REFERENCE + + /** @brief Guarded loser tree, either copying the whole element into + * the tree structure, or looking up the element via the index. + * + * Guarding is done explicitly through one flag sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. + */ + template > + class LoserTreeReference + { +#undef COPY +#ifdef COPY +#define KEY(i) losers[i].key +#define KEY_SOURCE(i) key +#else +#define KEY(i) keys[losers[i].source] +#define KEY_SOURCE(i) keys[i] +#endif + private: + struct Loser + { + bool sup; + int source; +#ifdef COPY + T key; +#endif + }; + + unsigned int ik, k, offset; + Loser* losers; +#ifndef COPY + T* keys; +#endif + Comparator comp; + + public: + inline LoserTreeReference(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k * 2]; +#ifndef COPY + keys = new T[ik]; +#endif + for (unsigned int i = ik - 1; i < k; i++) + losers[i + k].sup = true; + } + + inline ~LoserTreeReference() + { + delete[] losers; +#ifndef COPY + delete[] keys; +#endif + } + + void + print() + { + for (unsigned int i = 0; i < (k * 2); i++) + printf("%d %d from %d, %d\n", i, KEY(i), losers[i].source, losers[i].sup); + } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(T key, int source, bool sup) + { + unsigned int pos = k + source; + + losers[pos].sup = sup; + losers[pos].source = source; + KEY(pos) = key; + } + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if ( losers[right].sup || + (!losers[left].sup && !comp(KEY(right), KEY(left)))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; + } + + inline void + delete_min_insert(T key, bool sup) + { + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); +#ifdef COPY + std::swap(KEY(pos), KEY_SOURCE(source)); +#endif + } + } + + losers[0].sup = sup; + losers[0].source = source; +#ifdef COPY + KEY(0) = KEY_SOURCE(source); +#endif + } + + inline void + insert_start_stable(T key, int source, bool sup) + { return insert_start(key, source, sup); } + + unsigned int + init_winner_stable(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup && !comp(KEY(right), KEY(left)))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init_stable() + { losers[0] = losers[init_winner_stable(1)]; } + + inline void + delete_min_insert_stable(T key, bool sup) + { + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || + (!sup && !losers[pos].sup && + ((comp(KEY(pos), KEY_SOURCE(source))) || + (!comp(KEY_SOURCE(source), KEY(pos)) && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); +#ifdef COPY + std::swap(KEY(pos), KEY_SOURCE(source)); +#endif + } + } + + losers[0].sup = sup; + losers[0].source = source; +#ifdef COPY + KEY(0) = KEY_SOURCE(source); +#endif + } + }; +#undef KEY +#undef KEY_SOURCE + +#endif + +#if _GLIBCXX_LOSER_TREE_POINTER + + /** @brief Guarded loser tree, either copying the whole element into + the tree structure, or looking up the element via the index. + * Guarding is done explicitly through one flag sup per element, + * inf is not needed due to a better initialization routine. + * This is a well-performing variant. + */ + template > + class LoserTreePointer + { + private: + struct Loser + { + bool sup; + int source; + const T* keyp; + }; + + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; + + public: + inline LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k * 2]; + for (unsigned int i = ik - 1; i < k; i++) + losers[i + k].sup = true; + } + + inline ~LoserTreePointer() + { delete[] losers; } + + void + print() + { + for (unsigned int i = 0; i < (k * 2); i++) + printf("%d %d from %d, %d\n", i, losers[i].keyp, losers[i].source, losers[i].sup); + } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + losers[pos].sup = sup; + losers[pos].source = source; + losers[pos].keyp = &key; + } + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if ( losers[right].sup || + (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { losers[0] = losers[init_winner(1)]; } + + inline void delete_min_insert(const T& key, bool sup) + { + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } + + inline void + insert_start_stable(const T& key, int source, bool sup) + { return insert_start(key, source, sup); } + + unsigned int + init_winner_stable (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init_stable() + { losers[0] = losers[init_winner_stable(1)]; } + + inline void + delete_min_insert_stable(const T& key, bool sup) + { + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || + (!sup && !losers[pos].sup && + ((comp(*losers[pos].keyp, *keyp)) || + (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } + }; + +#endif + +#if _GLIBCXX_LOSER_TREE_UNGUARDED + + /** @brief Unguarded loser tree, copying the whole element into the + * tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. This is a very fast variant. + */ + template > + class LoserTreeUnguarded + { + private: + struct Loser + { + int source; + T key; + }; + + unsigned int ik, k, offset; + unsigned int* mapping; + Loser* losers; + Comparator comp; + + void + map(unsigned int root, unsigned int begin, unsigned int end) + { + if (begin + 1 == end) + mapping[begin] = root; + else + { + // Next greater or equal power of 2. + unsigned int left = 1 << (log2(end - begin - 1)); + map(root * 2, begin, begin + left); + map(root * 2 + 1, begin + left, end); + } + } + + public: + inline LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) + { + ik = _k; + // Next greater or equal power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k + ik]; + mapping = new unsigned int[ik]; + map(1, 0, ik); + } + + inline ~LoserTreeUnguarded() + { + delete[] losers; + delete[] mapping; + } + + void + print() + { + for (unsigned int i = 0; i < k + ik; i++) + printf("%d %d from %d\n", i, losers[i].key, losers[i].source); + } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = mapping[source]; + losers[pos].source = source; + losers[pos].key = key; + } + + unsigned int + init_winner(unsigned int root, unsigned int begin, unsigned int end) + { + if (begin + 1 == end) + return mapping[begin]; + else + { + // Next greater or equal power of 2. + unsigned int division = 1 << (log2(end - begin - 1)); + unsigned int left = init_winner(2 * root, begin, begin + division); + unsigned int right = init_winner(2 * root + 1, begin + division, end); + if (!comp(losers[right].key, losers[left].key)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { losers[0] = losers[init_winner(1, 0, ik)]; } + + // Do not pass const reference since key will be used as local variable. + inline void + delete_min_insert(const T& key, bool) + { + losers[0].key = key; + T& keyr = losers[0].key; + int& source = losers[0].source; + for (int pos = mapping[source] / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(losers[pos].key, keyr)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, keyr); + } + } + } + + inline void + insert_start_stable(const T& key, int source, bool) + { return insert_start(key, source, false); } + + inline void + init_stable() + { init(); } + + inline void + delete_min_insert_stable(const T& key, bool) + { + losers[0].key = key; + T& keyr = losers[0].key; + int& source = losers[0].source; + for (int pos = mapping[source] / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(losers[pos].key, keyr) + || (!comp(keyr, losers[pos].key) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, keyr); + } + } + } + }; + +#endif + +#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED + + /** @brief Unguarded loser tree, keeping only pointers to the + * elements in the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. This is a very fast variant. + */ + template > + class LoserTreePointerUnguarded + { + private: + struct Loser + { + int source; + const T* keyp; + }; + + unsigned int ik, k, offset; + unsigned int* mapping; + Loser* losers; + Comparator comp; + + void map(unsigned int root, unsigned int begin, unsigned int end) + { + if (begin + 1 == end) + mapping[begin] = root; + else + { + // Next greater or equal power of 2. + unsigned int left = 1 << (log2(end - begin - 1)); + map(root * 2, begin, begin + left); + map(root * 2 + 1, begin + left, end); + } + } + + public: + inline LoserTreePointerUnguarded(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k + ik]; + mapping = new unsigned int[ik]; + map(1, 0, ik); + } + + inline ~LoserTreePointerUnguarded() + { + delete[] losers; + delete[] mapping; + } + + void + print() + { + for (unsigned int i = 0; i < k + ik; i++) + printf("%d %d from %d\n", i, *losers[i].keyp, losers[i].source); + } + + inline int + get_min_source() + { return losers[0].source; } + + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = mapping[source]; + losers[pos].source = source; + losers[pos].keyp = &key; + } + + unsigned int + init_winner(unsigned int root, unsigned int begin, unsigned int end) + { + if (begin + 1 == end) + return mapping[begin]; + else + { + // Next greater or equal power of 2. + unsigned int division = 1 << (log2(end - begin - 1)); + unsigned int left = init_winner(2 * root, begin, begin + division); + unsigned int right = init_winner(2 * root + 1, begin + division, end); + if (!comp(*losers[right].keyp, *losers[left].keyp)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1, 0, ik)]; + } + + inline void + delete_min_insert(const T& key, bool) + { + const T* keyp = &key; + int& source = losers[0].source; + for (int pos = mapping[source] / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(*losers[pos].keyp, *keyp)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].keyp = keyp; + } + + inline void + insert_start_stable(const T& key, int source, bool) + { return insert_start(key, source, false); } + + inline void + init_stable() + { init(); } + + inline void + delete_min_insert_stable(const T& key, bool) + { + int& source = losers[0].source; + const T* keyp = &key; + for (int pos = mapping[source] / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(*losers[pos].keyp, *keyp) + || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + losers[0].keyp = keyp; + } + }; +#endif +} + +#endif diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h new file mode 100644 index 00000000000..0bf29497f53 --- /dev/null +++ b/libstdc++-v3/include/parallel/merge.h @@ -0,0 +1,230 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/merge.h + * @brief Parallel implementation of std::merge(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MERGE_H +#define _GLIBCXX_PARALLEL_MERGE_H 1 + +#include +#include + +namespace __gnu_parallel +{ + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template + OutputIterator + merge_advance_usual(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp) + { + typedef _DifferenceTp difference_type; + while (begin1 != end1 && begin2 != end2 && max_length > 0) + { + // array1[i1] < array0[i0] + if (comp(*begin2, *begin1)) + *target++ = *begin2++; + else + *target++ = *begin1++; + max_length--; + } + + if (begin1 != end1) + { + target = std::copy(begin1, begin1 + max_length, target); + begin1 += max_length; + } + else + { + target = std::copy(begin2, begin2 + max_length, target); + begin2 += max_length; + } + return target; + } + + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * Specially designed code should allow the compiler to generate + * conditional moves instead of branches. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template + OutputIterator + merge_advance_movc(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp) + { + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type value_type1; + typedef typename std::iterator_traits::value_type value_type2; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(max_length >= 0); +#endif + + while (begin1 != end1 && begin2 != end2 && max_length > 0) + { + value_type1 element1; + value_type2 element2; + RandomAccessIterator1 next1; + RandomAccessIterator2 next2; + + next1 = begin1 + 1; + next2 = begin2 + 1; + element1 = *begin1; + element2 = *begin2; + + if (comp(element2, element1)) + { + element1 = element2; + begin2 = next2; + } + else + { + begin1 = next1; + } + + *target = element1; + + target++; + max_length--; + } + if (begin1 != end1) + { + target = std::copy(begin1, begin1 + max_length, target); + begin1 += max_length; + } + else + { + target = std::copy(begin2, begin2 + max_length, target); + begin2 += max_length; + } + return target; + } + + /** @brief Merge routine being able to merge only the @c max_length + * smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * Static switch on whether to use the conditional-move variant. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template + inline OutputIterator + merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator2& begin2, RandomAccessIterator2 end2, OutputIterator target, _DifferenceTp max_length, Comparator comp) + { + _GLIBCXX_CALL(max_length) + + return merge_advance_movc(begin1, end1, begin2, end2, target, max_length, comp); + } + + /** @brief Merge routine fallback to sequential in case the + iterators of the two input sequences are of different type. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. */ + template + inline RandomAccessIterator3 + parallel_merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, + RandomAccessIterator2& begin2, RandomAccessIterator2 end2, //different iterators, parallel implementation not available + RandomAccessIterator3 target, + typename std::iterator_traits::difference_type max_length, Comparator comp) + { + return merge_advance(begin1, end1, begin2, end2, target, max_length, comp); + } + + /** @brief Parallel merge routine being able to merge only the @c + * max_length smallest elements. + * + * The @c begin iterators are advanced accordingly, they might not + * reach @c end, in contrast to the usual variant. + * The functionality is projected onto parallel_multiway_merge. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param target Target begin iterator. + * @param max_length Maximum number of elements to merge. + * @param comp Comparator. + * @return Output end iterator. + */ + template + inline RandomAccessIterator3 + parallel_merge_advance(RandomAccessIterator1& begin1, RandomAccessIterator1 end1, RandomAccessIterator1& begin2, RandomAccessIterator1 end2, RandomAccessIterator3 target, typename std::iterator_traits::difference_type max_length, Comparator comp) + { + typedef typename std::iterator_traits::value_type + value_type; + typedef typename std::iterator_traits::difference_type + difference_type1 /* == difference_type2 */; + typedef typename std::iterator_traits::difference_type + difference_type3; + + std::pair seqs[2] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) }; + RandomAccessIterator3 target_end = parallel_multiway_merge(seqs, seqs + 2, target, comp, max_length, true, false); + + return target_end; + } +} //namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/multiseq_selection.h b/libstdc++-v3/include/parallel/multiseq_selection.h new file mode 100644 index 00000000000..5b34173cff2 --- /dev/null +++ b/libstdc++-v3/include/parallel/multiseq_selection.h @@ -0,0 +1,608 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/multiseq_selection.h + * @brief Functions to find elements of a certain global rank in + * multiple sorted sequences. Also serves for splitting such + * sequence sets. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H +#define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1 + +#include +#include + +#include + +#include + +namespace __gnu_parallel +{ + /** @brief Compare a pair of types lexicographically, ascending. */ + template + class lexicographic : public std::binary_function, std::pair, bool> + { + private: + Comparator& comp; + + public: + lexicographic(Comparator& _comp) : comp(_comp) { } + + // XXX const + inline bool + operator()(const std::pair& p1, const std::pair& p2) const + { + if (comp(p1.first, p2.first)) + return true; + + if (comp(p2.first, p1.first)) + return false; + + // Firsts are equal. + return p1.second < p2.second; + } + }; + + /** @brief Compare a pair of types lexicographically, descending. */ + template + class lexicographic_reverse : public std::binary_function + { + private: + Comparator& comp; + + public: + lexicographic_reverse(Comparator& _comp) : comp(_comp) { } + + inline bool + operator()(const std::pair& p1, const std::pair& p2) const + { + if (comp(p2.first, p1.first)) + return true; + + if (comp(p1.first, p2.first)) + return false; + + // Firsts are equal. + return p2.second < p1.second; + } + }; + + /** + * @brief Splits several sorted sequences at a certain global rank, + * resulting in a splitting point for each sequence. + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. If there + * are several equal elements across the split, the ones on the + * left side will be chosen from sequences with smaller number. + * @param begin_seqs Begin of the sequence of iterator pairs. + * @param end_seqs End of the sequence of iterator pairs. + * @param rank The global rank to partition at. + * @param begin_offsets A random-access sequence begin where the + * result will be stored in. Each element of the sequence is an + * iterator that points to the first element on the greater part of + * the respective sequence. + * @param comp The ordering functor, defaults to std::less. + */ + template + void + multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank, + RankIterator begin_offsets, + Comparator comp = std::less< + typename std::iterator_traits::value_type::first_type>::value_type>()) // std::less + { + _GLIBCXX_CALL(end_seqs - begin_seqs) + + typedef typename std::iterator_traits::value_type::first_type It; + typedef typename std::iterator_traits::difference_type difference_type; + typedef typename std::iterator_traits::value_type T; + + lexicographic lcomp(comp); + lexicographic_reverse lrcomp(comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + difference_type m = std::distance(begin_seqs, end_seqs), N = 0, nmax, n, r; + + for (int i = 0; i < m; i++) + N += std::distance(begin_seqs[i].first, begin_seqs[i].second); + + if (rank == N) + { + for (int i = 0; i < m; i++) + begin_offsets[i] = begin_seqs[i].second; // Very end. + // Return m - 1; + } + + _GLIBCXX_PARALLEL_ASSERT(m != 0 && N != 0 && rank >= 0 && rank < N); + + difference_type* ns = new difference_type[m]; + difference_type* a = new difference_type[m]; + difference_type* b = new difference_type[m]; + difference_type l; + + ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second); + nmax = ns[0]; + for (int i = 0; i < m; i++) + { + ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second); + nmax = std::max(nmax, ns[i]); + } + + r = log2(nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[i], + // equality iff nmax = 2^k - 1. + l = (1ULL << r) - 1; + + // From now on, including padding. + N = l * m; + + for (int i = 0; i < m; i++) + { + a[i] = 0; + b[i] = l; + } + n = l / 2; + + // Invariants: + // 0 <= a[i] <= ns[i], 0 <= b[i] <= l + +#define S(i) (begin_seqs[i].first) + + // Initial partition. + std::vector > sample; + + for (int i = 0; i < m; i++) + if (n < ns[i]) //sequence long enough + sample.push_back(std::make_pair(S(i)[n], i)); + __gnu_sequential::sort(sample.begin(), sample.end(), lcomp); + + for (int i = 0; i < m; i++) //conceptual infinity + if (n >= ns[i]) //sequence too short, conceptual infinity + sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i)); + + difference_type localrank = rank * m / N ; + + int j; + for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++) + a[sample[j].second] += n + 1; + for (; j < m; j++) + b[sample[j].second] -= n + 1; + + // Further refinement. + while (n > 0) + { + n /= 2; + + int lmax_seq = -1; // to avoid warning + const T* lmax = NULL; // impossible to avoid the warning? + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!lmax) + { + lmax = &(S(i)[a[i] - 1]); + lmax_seq = i; + } + else + { + // Max, favor rear sequences. + if (!comp(S(i)[a[i] - 1], *lmax)) + { + lmax = &(S(i)[a[i] - 1]); + lmax_seq = i; + } + } + } + } + + int i; + for (i = 0; i < m; i++) + { + difference_type middle = (b[i] + a[i]) / 2; + if (lmax && middle < ns[i] && + lcomp(std::make_pair(S(i)[middle], i), std::make_pair(*lmax, lmax_seq))) + a[i] = std::min(a[i] + n + 1, ns[i]); + else + b[i] -= n + 1; + } + + difference_type leftsize = 0, total = 0; + for (int i = 0; i < m; i++) + { + leftsize += a[i] / (n + 1); + total += l / (n + 1); + } + + difference_type skew = static_cast(static_cast(total) * rank / N - leftsize); + + if (skew > 0) + { + // Move to the left, find smallest. + std::priority_queue, std::vector >, lexicographic_reverse > pq(lrcomp); + + for (int i = 0; i < m; i++) + if (b[i] < ns[i]) + pq.push(std::make_pair(S(i)[b[i]], i)); + + for (; skew != 0 && !pq.empty(); skew--) + { + int source = pq.top().second; + pq.pop(); + + a[source] = std::min(a[source] + n + 1, ns[source]); + b[source] += n + 1; + + if (b[source] < ns[source]) + pq.push(std::make_pair(S(source)[b[source]], source)); + } + } + else if (skew < 0) + { + // Move to the right, find greatest. + std::priority_queue, std::vector >, lexicographic > pq(lcomp); + + for (int i = 0; i < m; i++) + if (a[i] > 0) + pq.push(std::make_pair(S(i)[a[i] - 1], i)); + + for (; skew != 0; skew++) + { + int source = pq.top().second; + pq.pop(); + + a[source] -= n + 1; + b[source] -= n + 1; + + if (a[source] > 0) + pq.push(std::make_pair(S(source)[a[source] - 1], source)); + } + } + } + + // Postconditions: + // a[i] == b[i] in most cases, except when a[i] has been clamped + // because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + bool maxleftset = false, minrightset = false; + T maxleft, minright; // Impossible to avoid the warning? + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!maxleftset) + { + maxleft = S(i)[a[i] - 1]; + maxleftset = true; + } + else + { + // Max, favor rear sequences. + if (!comp(S(i)[a[i] - 1], maxleft)) + maxleft = S(i)[a[i] - 1]; + } + } + if (b[i] < ns[i]) + { + if (!minrightset) + { + minright = S(i)[b[i]]; + minrightset = true; + } + else + { + // Min, favor fore sequences. + if (comp(S(i)[b[i]], minright)) + minright = S(i)[b[i]]; + } + } + } + + int seq = 0; + for (int i = 0; i < m; i++) + begin_offsets[i] = S(i) + a[i]; + + delete[] ns; + delete[] a; + delete[] b; + } + + + + /** + * @brief Selects the element at a certain global rank from several + * sorted sequences. + * + * The sequences are passed via a sequence of random-access + * iterator pairs, none of the sequences may be empty. + * @param begin_seqs Begin of the sequence of iterator pairs. + * @param end_seqs End of the sequence of iterator pairs. + * @param rank The global rank to partition at. + * @param offset The rank of the selected element in the global + * subsequence of elements equal to the selected element. If the + * selected element is unique, this number is 0. + * @param comp The ordering functor, defaults to std::less. + */ + template + T + multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank, + RankType& offset, Comparator comp = std::less()) + { + _GLIBCXX_CALL(end_seqs - begin_seqs) + + typedef typename std::iterator_traits::value_type::first_type It; + typedef typename std::iterator_traits::difference_type difference_type; + + lexicographic lcomp(comp); + lexicographic_reverse lrcomp(comp); + + // Number of sequences, number of elements in total (possibly + // including padding). + difference_type m = std::distance(begin_seqs, end_seqs); + difference_type N = 0; + difference_type nmax, n, r; + + for (int i = 0; i < m; i++) + N += std::distance(begin_seqs[i].first, begin_seqs[i].second); + + if (m == 0 || N == 0 || rank < 0 || rank >= N) + { + // Result undefined when there is no data or rank is outside bounds. + throw std::exception(); + } + + + difference_type* ns = new difference_type[m]; + difference_type* a = new difference_type[m]; + difference_type* b = new difference_type[m]; + difference_type l; + + ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second); + nmax = ns[0]; + for (int i = 0; i < m; i++) + { + ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second); + nmax = std::max(nmax, ns[i]); + } + + r = log2(nmax) + 1; + + // Pad all lists to this length, at least as long as any ns[i], + // equality iff nmax = 2^k - 1 + l = pow2(r) - 1; + + // From now on, including padding. + N = l * m; + + for (int i = 0; i < m; i++) + { + a[i] = 0; + b[i] = l; + } + n = l / 2; + + // Invariants: + // 0 <= a[i] <= ns[i], 0 <= b[i] <= l + +#define S(i) (begin_seqs[i].first) + + // Initial partition. + std::vector > sample; + + for (int i = 0; i < m; i++) + if (n < ns[i]) + sample.push_back(std::make_pair(S(i)[n], i)); + __gnu_sequential::sort(sample.begin(), sample.end(), lcomp, sequential_tag()); + + // Conceptual infinity. + for (int i = 0; i < m; i++) + if (n >= ns[i]) + sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i)); + + difference_type localrank = rank * m / N ; + + int j; + for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++) + a[sample[j].second] += n + 1; + for (; j < m; j++) + b[sample[j].second] -= n + 1; + + // Further refinement. + while (n > 0) + { + n /= 2; + + const T* lmax = NULL; + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!lmax) + { + lmax = &(S(i)[a[i] - 1]); + } + else + { + if (comp(*lmax, S(i)[a[i] - 1])) //max + lmax = &(S(i)[a[i] - 1]); + } + } + } + + int i; + for (i = 0; i < m; i++) + { + difference_type middle = (b[i] + a[i]) / 2; + if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax)) + a[i] = std::min(a[i] + n + 1, ns[i]); + else + b[i] -= n + 1; + } + + difference_type leftsize = 0, total = 0; + for (int i = 0; i < m; i++) + { + leftsize += a[i] / (n + 1); + total += l / (n + 1); + } + + difference_type skew = (unsigned long long)total * rank / N - leftsize; + + if (skew > 0) + { + // Move to the left, find smallest. + std::priority_queue, std::vector >, lexicographic_reverse > pq(lrcomp); + + for (int i = 0; i < m; i++) + if (b[i] < ns[i]) + pq.push(std::make_pair(S(i)[b[i]], i)); + + for (; skew != 0 && !pq.empty(); skew--) + { + int source = pq.top().second; + pq.pop(); + + a[source] = std::min(a[source] + n + 1, ns[source]); + b[source] += n + 1; + + if (b[source] < ns[source]) + pq.push(std::make_pair(S(source)[b[source]], source)); + } + } + else if (skew < 0) + { + // Move to the right, find greatest. + std::priority_queue, std::vector >, lexicographic > pq(lcomp); + + for (int i = 0; i < m; i++) + if (a[i] > 0) + pq.push(std::make_pair(S(i)[a[i] - 1], i)); + + for (; skew != 0; skew++) + { + int source = pq.top().second; + pq.pop(); + + a[source] -= n + 1; + b[source] -= n + 1; + + if (a[source] > 0) + pq.push(std::make_pair(S(source)[a[source] - 1], source)); + } + } + } + + // Postconditions: + // a[i] == b[i] in most cases, except when a[i] has been clamped + // because of having reached the boundary + + // Now return the result, calculate the offset. + + // Compare the keys on both edges of the border. + + // Maximum of left edge, minimum of right edge. + bool maxleftset = false, minrightset = false; + + // Impossible to avoid the warning? + T maxleft, minright; + for (int i = 0; i < m; i++) + { + if (a[i] > 0) + { + if (!maxleftset) + { + maxleft = S(i)[a[i] - 1]; + maxleftset = true; + } + else + { + // Max. + if (comp(maxleft, S(i)[a[i] - 1])) + maxleft = S(i)[a[i] - 1]; + } + } + if (b[i] < ns[i]) + { + if (!minrightset) + { + minright = S(i)[b[i]]; + minrightset = true; + } + else + { + // Min. + if (comp(S(i)[b[i]], minright)) + minright = S(i)[b[i]]; + } + } + } + + // Minright is the splitter, in any case. + + if (!maxleftset || comp(minright, maxleft)) + { + // Good luck, everything is split unambigiously. + offset = 0; + } + else + { + // We have to calculate an offset. + offset = 0; + + for (int i = 0; i < m; i++) + { + difference_type lb = std::lower_bound(S(i), S(i) + ns[i], minright, + comp) - S(i); + offset += a[i] - lb; + } + } + + delete[] ns; + delete[] a; + delete[] b; + + return minright; + } +} + +#undef S + +#endif + diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h new file mode 100644 index 00000000000..cdafacbd7a8 --- /dev/null +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -0,0 +1,1639 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/multiway_merge.h + * @brief Implementation of sequential and parallel multiway merge. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H + +#include + +#include +#include +#include +#include +#include +#include +#if _GLIBCXX_ASSERTIONS +#include +#endif + +/** @brief Length of a sequence described by a pair of iterators. */ +#define LENGTH(s) ((s).second - (s).first) + +// XXX need iterator typedefs +namespace __gnu_parallel +{ + template + class guarded_iterator; + + template + inline bool + operator<(guarded_iterator& bi1, + guarded_iterator& bi2); + + template + inline bool + operator<=(guarded_iterator& bi1, + guarded_iterator& bi2); + + /** @brief Iterator wrapper supporting an implicit supremum at the end + of the sequence, dominating all comparisons. + * Deriving from RandomAccessIterator is not possible since + * RandomAccessIterator need not be a class. + */ + template + class guarded_iterator + { + private: + /** @brief Current iterator position. */ + RandomAccessIterator current; + + /** @brief End iterator of the sequence. */ + RandomAccessIterator end; + + /** @brief Comparator. */ + Comparator& comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator provided for associated overloaded + * compare operators. */ + inline guarded_iterator(RandomAccessIterator begin, + RandomAccessIterator end, Comparator& comp) + : current(begin), end(end), comp(comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + inline guarded_iterator& + operator++() + { + ++current; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + inline typename std::iterator_traits::value_type + operator*() + { return *current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + inline operator RandomAccessIterator() + { return current; } + + friend bool + operator< (guarded_iterator& bi1, guarded_iterator& bi2); + + friend bool + operator<= (guarded_iterator& bi1, guarded_iterator& bi2); + }; + + /** @brief Compare two elements referenced by guarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less. */ + template + inline bool + operator<(guarded_iterator& bi1, + guarded_iterator& bi2) + { + if (bi1.current == bi1.end) //bi1 is sup + return bi2.current == bi2.end; //bi2 is not sup + if (bi2.current == bi2.end) //bi2 is sup + return true; + return (bi1.comp)(*bi1, *bi2); //normal compare + } + + /** @brief Compare two elements referenced by guarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less equal. */ + template + inline bool + operator<=(guarded_iterator& bi1, + guarded_iterator& bi2) + { + if (bi2.current == bi2.end) //bi1 is sup + return bi1.current != bi1.end; //bi2 is not sup + if (bi1.current == bi1.end) //bi2 is sup + return false; + return !(bi1.comp)(*bi2, *bi1); //normal compare + } + + template + class unguarded_iterator; + + template + inline bool + operator<(unguarded_iterator& bi1, + unguarded_iterator& bi2); + + template + inline bool + operator<=(unguarded_iterator& bi1, + unguarded_iterator& bi2); + + template + class unguarded_iterator + { + private: + /** @brief Current iterator position. */ + RandomAccessIterator& current; + /** @brief Comparator. */ + mutable Comparator& comp; + + public: + /** @brief Constructor. Sets iterator to beginning of sequence. + * @param begin Begin iterator of sequence. + * @param end Unused, only for compatibility. + * @param comp Unused, only for compatibility. */ + inline unguarded_iterator(RandomAccessIterator begin, + RandomAccessIterator end, Comparator& comp) + : current(begin), comp(comp) + { } + + /** @brief Pre-increment operator. + * @return This. */ + inline unguarded_iterator& + operator++() + { + current++; + return *this; + } + + /** @brief Dereference operator. + * @return Referenced element. */ + inline typename std::iterator_traits::value_type + operator*() + { return *current; } + + /** @brief Convert to wrapped iterator. + * @return Wrapped iterator. */ + inline + operator RandomAccessIterator() + { return current; } + + friend bool + operator< (unguarded_iterator& bi1, unguarded_iterator& bi2); + + friend bool + operator<= (unguarded_iterator& bi1, unguarded_iterator& bi2); + }; + + /** @brief Compare two elements referenced by unguarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less. */ + template + inline bool + operator<(unguarded_iterator& bi1, + unguarded_iterator& bi2) + { + // Normal compare. + return (bi1.comp)(*bi1, *bi2); + } + + /** @brief Compare two elements referenced by unguarded iterators. + * @param bi1 First iterator. + * @param bi2 Second iterator. + * @return @c True if less equal. */ + template + inline bool + operator<=(unguarded_iterator& bi1, + unguarded_iterator& bi2) + { + // Normal compare. + return !(bi1.comp)(*bi2, *bi1); + } + + /** Prepare a set of sequences to be merged without a (end) guard + * @param seqs_begin + * @param seqs_end + * @param comp + * @param min_sequence + * @param stable + * @pre (seqs_end - seqs_begin > 0) */ + template + typename std::iterator_traits::value_type::first_type>::difference_type + prepare_unguarded(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, Comparator comp, + int& min_sequence, bool stable) + { + _GLIBCXX_CALL(seqs_end - seqs_begin) + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + + if ((*seqs_begin).first == (*seqs_begin).second) + { + // Empty sequence found, it's the first one. + min_sequence = 0; + return -1; + } + + // Last element in sequence. + value_type min = *((*seqs_begin).second - 1); + min_sequence = 0; + for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++) + { + if ((*s).first == (*s).second) + { + // Empty sequence found. + min_sequence = static_cast(s - seqs_begin); + return -1; + } + + // Last element in sequence. + const value_type& v = *((*s).second - 1); + if (comp(v, min)) //strictly smaller + { + min = v; + min_sequence = static_cast(s - seqs_begin); + } + } + + difference_type overhang_size = 0; + + int s = 0; + for (s = 0; s <= min_sequence; s++) + { + RandomAccessIterator1 split; + if (stable) + split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second, + min, comp); + else + split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, + min, comp); + + overhang_size += seqs_begin[s].second - split; + } + + for (; s < (seqs_end - seqs_begin); s++) + { + RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp); + overhang_size += seqs_begin[s].second - split; + } + + // So many elements will be left over afterwards. + return overhang_size; + } + + /** Prepare a set of sequences to be merged with a (end) guard (sentinel) + * @param seqs_begin + * @param seqs_end + * @param comp */ + template + typename std::iterator_traits::value_type::first_type>::difference_type + prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + Comparator comp) + { + _GLIBCXX_CALL(seqs_end - seqs_begin) + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + typedef typename std::iterator_traits::difference_type + difference_type; + + // Last element in sequence. + value_type max; + bool max_found = false; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) + { + if ((*s).first == (*s).second) + continue; + + // Last element in sequence. + value_type& v = *((*s).second - 1); + + // Strictly greater. + if (!max_found || comp(max, v)) + max = v; + max_found = true; + } + + difference_type overhang_size = 0; + + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) + { + RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp); + overhang_size += (*s).second - split; + + // Set sentinel. + *((*s).second) = max; + } + + // So many elements will be left over afterwards. + return overhang_size; + } + + /** @brief Highly efficient 3-way merging procedure. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Unused, stable anyway. + * @return End iterator of output sequence. */ + template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> + RandomAccessIterator3 + multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length); + + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + if (length == 0) + return target; + + iterator + seq0(seqs_begin[0].first, seqs_begin[0].second, comp), + seq1(seqs_begin[1].first, seqs_begin[1].second, comp), + seq2(seqs_begin[2].first, seqs_begin[2].second, comp); + + if (seq0 <= seq1) + { + if (seq1 <= seq2) + goto s012; + else + if (seq2 < seq0) + goto s201; + else + goto s021; + } + else + { + if (seq1 <= seq2) + { + if (seq0 <= seq2) + goto s102; + else + goto s120; + } + else + goto s210; + } + +#define Merge3Case(a,b,c,c0,c1) \ + s ## a ## b ## c : \ + *target = *seq ## a; \ + ++target; \ + length--; \ + ++seq ## a; \ + if (length == 0) goto finish; \ + if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ + if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ + goto s ## b ## c ## a; + + Merge3Case(0, 1, 2, <=, <=); + Merge3Case(1, 2, 0, <=, < ); + Merge3Case(2, 0, 1, < , < ); + Merge3Case(1, 0, 2, < , <=); + Merge3Case(0, 2, 1, <=, <=); + Merge3Case(2, 1, 0, < , < ); + +#undef Merge3Case + + finish: + ; + + seqs_begin[0].first = seq0; + seqs_begin[1].first = seq1; + seqs_begin[2].first = seq2; + + return target; + } + + template + RandomAccessIterator3 + multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length); + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + int min_seq; + RandomAccessIterator3 target_end; + + // Stable anyway. + difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); + + difference_type total_length = 0; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + total_length += LENGTH(*s); + + if (overhang != -1) + { + difference_type unguarded_length = std::min(length, total_length - overhang); + target_end = multiway_merge_3_variant + (seqs_begin, seqs_end, target, comp, unguarded_length, stable); + overhang = length - unguarded_length; + } + else + { + // Empty sequence found. + overhang = length; + target_end = target; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + switch (min_seq) + { + case 0: + // Iterators will be advanced accordingly. + target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second, + seqs_begin[2].first, seqs_begin[2].second, + target_end, overhang, comp); + break; + case 1: + target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, + seqs_begin[2].first, seqs_begin[2].second, + target_end, overhang, comp); + break; + case 2: + target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, + seqs_begin[1].first, seqs_begin[1].second, + target_end, overhang, comp); + break; + default: + _GLIBCXX_PARALLEL_ASSERT(false); + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + return target_end; + } + + /** @brief Highly efficient 4-way merging procedure. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Unused, stable anyway. + * @return End iterator of output sequence. */ + template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> + RandomAccessIterator3 + multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length); + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + iterator + seq0(seqs_begin[0].first, seqs_begin[0].second, comp), + seq1(seqs_begin[1].first, seqs_begin[1].second, comp), + seq2(seqs_begin[2].first, seqs_begin[2].second, comp), + seq3(seqs_begin[3].first, seqs_begin[3].second, comp); + +#define Decision(a,b,c,d) { \ + if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \ + if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \ + if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \ + goto s ## a ## b ## c ## d; } + + if (seq0 <= seq1) + { + if (seq1 <= seq2) + Decision(0,1,2,3) + else + if (seq2 < seq0) + Decision(2,0,1,3) + else + Decision(0,2,1,3) + } + else + { + if (seq1 <= seq2) + { + if (seq0 <= seq2) + Decision(1,0,2,3) + else + Decision(1,2,0,3) + } + else + Decision(2,1,0,3) + } + +#define Merge4Case(a,b,c,d,c0,c1,c2) \ + s ## a ## b ## c ## d: \ + if (length == 0) goto finish; \ + *target = *seq ## a; \ + ++target; \ + length--; \ + ++seq ## a; \ + if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \ + if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \ + if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \ + goto s ## b ## c ## d ## a; + + Merge4Case(0, 1, 2, 3, <=, <=, <=); + Merge4Case(0, 1, 3, 2, <=, <=, <=); + Merge4Case(0, 2, 1, 3, <=, <=, <=); + Merge4Case(0, 2, 3, 1, <=, <=, <=); + Merge4Case(0, 3, 1, 2, <=, <=, <=); + Merge4Case(0, 3, 2, 1, <=, <=, <=); + Merge4Case(1, 0, 2, 3, < , <=, <=); + Merge4Case(1, 0, 3, 2, < , <=, <=); + Merge4Case(1, 2, 0, 3, <=, < , <=); + Merge4Case(1, 2, 3, 0, <=, <=, < ); + Merge4Case(1, 3, 0, 2, <=, < , <=); + Merge4Case(1, 3, 2, 0, <=, <=, < ); + Merge4Case(2, 0, 1, 3, < , < , <=); + Merge4Case(2, 0, 3, 1, < , <=, < ); + Merge4Case(2, 1, 0, 3, < , < , <=); + Merge4Case(2, 1, 3, 0, < , <=, < ); + Merge4Case(2, 3, 0, 1, <=, < , < ); + Merge4Case(2, 3, 1, 0, <=, < , < ); + Merge4Case(3, 0, 1, 2, < , < , < ); + Merge4Case(3, 0, 2, 1, < , < , < ); + Merge4Case(3, 1, 0, 2, < , < , < ); + Merge4Case(3, 1, 2, 0, < , < , < ); + Merge4Case(3, 2, 0, 1, < , < , < ); + Merge4Case(3, 2, 1, 0, < , < , < ); + +#undef Merge4Case +#undef Decision + + finish: + ; + + seqs_begin[0].first = seq0; + seqs_begin[1].first = seq1; + seqs_begin[2].first = seq2; + seqs_begin[3].first = seq3; + + return target; + } + + template + RandomAccessIterator3 + multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length); + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + int min_seq; + RandomAccessIterator3 target_end; + + // Stable anyway. + difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); + + difference_type total_length = 0; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + total_length += LENGTH(*s); + + if (overhang != -1) + { + difference_type unguarded_length = std::min(length, total_length - overhang); + target_end = multiway_merge_4_variant + (seqs_begin, seqs_end, target, comp, unguarded_length, stable); + overhang = length - unguarded_length; + } + else + { + // Empty sequence found. + overhang = length; + target_end = target; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + std::vector > one_missing(seqs_begin, seqs_end); + one_missing.erase(one_missing.begin() + min_seq); //remove + + target_end = multiway_merge_3_variant(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable); + + // Insert back again. + one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]); + // Write back modified iterators. + copy(one_missing.begin(), one_missing.end(), seqs_begin); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + return target_end; + } + + /** @brief Basic multi-way merging procedure. + * + * The head elements are kept in a sorted array, new heads are + * inserted linearly. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @return End iterator of output sequence. + */ + template + RandomAccessIterator3 + multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + // Num remaining pieces. + int k = static_cast(seqs_end - seqs_begin), nrp; + + value_type* pl = new value_type[k]; + int* source = new int[k]; + difference_type total_length = 0; + +#define POS(i) seqs_begin[(i)].first +#define STOPS(i) seqs_begin[(i)].second + + // Write entries into queue. + nrp = 0; + for (int pi = 0; pi < k; pi++) + { + if (STOPS(pi) != POS(pi)) + { + pl[nrp] = *(POS(pi)); + source[nrp] = pi; + nrp++; + total_length += LENGTH(seqs_begin[pi]); + } + } + + if (stable) + { + for (int k = 0; k < nrp - 1; k++) + for (int pi = nrp - 1; pi > k; pi--) + if (comp(pl[pi], pl[pi - 1]) || + (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1])) + { + std::swap(pl[pi - 1], pl[pi]); + std::swap(source[pi - 1], source[pi]); + } + } + else + { + for (int k = 0; k < nrp - 1; k++) + for (int pi = nrp - 1; pi > k; pi--) + if (comp(pl[pi], pl[pi-1])) + { + std::swap(pl[pi-1], pl[pi]); + std::swap(source[pi-1], source[pi]); + } + } + + // Iterate. + if (stable) + { + int j; + while (nrp > 0 && length > 0) + { + if (source[0] < source[1]) + { + // pl[0] <= pl[1] + while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0) + { + *target = pl[0]; + ++target; + ++POS(source[0]); + length--; + if (POS(source[0]) == STOPS(source[0])) + { + // Move everything to the left. + for (int s = 0; s < nrp - 1; s++) + { + pl[s] = pl[s + 1]; + source[s] = source[s + 1]; + } + nrp--; + break; + } + else + pl[0] = *(POS(source[0])); + } + } + else + { + // pl[0] < pl[1] + while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0) + { + *target = pl[0]; + ++target; + ++POS(source[0]); + length--; + if (POS(source[0]) == STOPS(source[0])) + { + for (int s = 0; s < nrp - 1; s++) + { + pl[s] = pl[s + 1]; + source[s] = source[s + 1]; + } + nrp--; + break; + } + else + pl[0] = *(POS(source[0])); + } + } + + // Sink down. + j = 1; + while ((j < nrp) && (comp(pl[j], pl[j - 1]) || + (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1])))) + { + std::swap(pl[j - 1], pl[j]); + std::swap(source[j - 1], source[j]); + j++; + } + } + } + else + { + int j; + while (nrp > 0 && length > 0) + { + // pl[0] <= pl[1] + while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0) + { + *target = pl[0]; + ++target; + ++POS(source[0]); + length--; + if (POS(source[0]) == STOPS(source[0])) + { + for (int s = 0; s < (nrp - 1); s++) + { + pl[s] = pl[s + 1]; + source[s] = source[s + 1]; + } + nrp--; + break; + } + else + pl[0] = *(POS(source[0])); + } + + // Sink down. + j = 1; + while ((j < nrp) && comp(pl[j], pl[j - 1])) + { + std::swap(pl[j - 1], pl[j]); + std::swap(source[j - 1], source[j]); + j++; + } + } + } + + delete[] pl; + delete[] source; + + return target; + } + + /** @brief Multi-way merging procedure for a high branching factor, + * guarded case. + * + * The head elements are kept in a loser tree. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @return End iterator of output sequence. + */ + template + RandomAccessIterator3 + multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + int k = static_cast(seqs_end - seqs_begin); + + LT lt(k, comp); + + difference_type total_length = 0; + + for (int t = 0; t < k; t++) + { + if (stable) + { + if (seqs_begin[t].first == seqs_begin[t].second) + lt.insert_start_stable(value_type(), t, true); + else + lt.insert_start_stable(*seqs_begin[t].first, t, false); + } + else + { + if (seqs_begin[t].first == seqs_begin[t].second) + lt.insert_start(value_type(), t, true); + else + lt.insert_start(*seqs_begin[t].first, t, false); + } + + total_length += LENGTH(seqs_begin[t]); + } + + if (stable) + lt.init_stable(); + else + lt.init(); + + total_length = std::min(total_length, length); + + int source; + + if (stable) + { + for (difference_type i = 0; i < total_length; i++) + { + // Take out. + source = lt.get_min_source(); + + *(target++) = *(seqs_begin[source].first++); + + // Feed. + if (seqs_begin[source].first == seqs_begin[source].second) + lt.delete_min_insert_stable(value_type(), true); + else + // Replace from same source. + lt.delete_min_insert_stable(*seqs_begin[source].first, false); + + } + } + else + { + for (difference_type i = 0; i < total_length; i++) + { + //take out + source = lt.get_min_source(); + + *(target++) = *(seqs_begin[source].first++); + + // Feed. + if (seqs_begin[source].first == seqs_begin[source].second) + lt.delete_min_insert(value_type(), true); + else + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); + } + } + + return target; + } + + /** @brief Multi-way merging procedure for a high branching factor, + * unguarded case. + * + * The head elements are kept in a loser tree. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @return End iterator of output sequence. + * @pre No input will run out of elements during the merge. + */ + template + RandomAccessIterator3 + multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length) + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + int k = seqs_end - seqs_begin; + + LT lt(k, comp); + + difference_type total_length = 0; + + for (int t = 0; t < k; t++) + { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second); +#endif + if (stable) + lt.insert_start_stable(*seqs_begin[t].first, t, false); + else + lt.insert_start(*seqs_begin[t].first, t, false); + + total_length += LENGTH(seqs_begin[t]); + } + + if (stable) + lt.init_stable(); + else + lt.init(); + + // Do not go past end. + length = std::min(total_length, length); + + int source; + +#if _GLIBCXX_ASSERTIONS + difference_type i = 0; +#endif + + if (stable) + { + RandomAccessIterator3 target_end = target + length; + while (target < target_end) + { + // Take out. + source = lt.get_min_source(); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1))); +#endif + + *(target++) = *(seqs_begin[source].first++); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1)); + i++; +#endif + // Feed. + // Replace from same source. + lt.delete_min_insert_stable(*seqs_begin[source].first, false); + + } + } + else + { + RandomAccessIterator3 target_end = target + length; + while (target < target_end) + { + // Take out. + source = lt.get_min_source(); + +#if _GLIBCXX_ASSERTIONS + if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1))) + printf(" %i %i %i\n", length, i, source); + _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1))); +#endif + + *(target++) = *(seqs_begin[source].first++); + +#if _GLIBCXX_ASSERTIONS + if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1))) + printf(" %i %i %i\n", length, i, source); + _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)); + i++; +#endif + // Feed. + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); + } + } + + return target; + } + + template + struct loser_tree_traits + { + typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT; + }; + + + /*#define NO_POINTER(T) \ + template \ + struct loser_tree_traits \ + { \ + typedef LoserTreePointer LT; \ + };*/ + // + // NO_POINTER(unsigned char) + // NO_POINTER(char) + // NO_POINTER(unsigned short) + // NO_POINTER(short) + // NO_POINTER(unsigned int) + // NO_POINTER(int) + // NO_POINTER(unsigned long) + // NO_POINTER(long) + // NO_POINTER(unsigned long long) + // NO_POINTER(long long) + // + // #undef NO_POINTER + + template + struct loser_tree_traits_unguarded + { + typedef LoserTreeUnguarded<_ValueTp, Comparator> LT; + }; + + /*#define NO_POINTER_UNGUARDED(T) \ + template \ + struct loser_tree_traits_unguarded \ + { \ + typedef LoserTreePointerUnguarded LT; \ + };*/ + // + // NO_POINTER_UNGUARDED(unsigned char) + // NO_POINTER_UNGUARDED(char) + // NO_POINTER_UNGUARDED(unsigned short) + // NO_POINTER_UNGUARDED(short) + // NO_POINTER_UNGUARDED(unsigned int) + // NO_POINTER_UNGUARDED(int) + // NO_POINTER_UNGUARDED(unsigned long) + // NO_POINTER_UNGUARDED(long) + // NO_POINTER_UNGUARDED(unsigned long long) + // NO_POINTER_UNGUARDED(long long) + // + // #undef NO_POINTER_UNGUARDED + + template + RandomAccessIterator3 + multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + int min_seq; + RandomAccessIterator3 target_end; + difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, + comp, min_seq, stable); + + difference_type total_length = 0; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) + total_length += LENGTH(*s); + + if (overhang != -1) + { + difference_type unguarded_length = std::min(length, total_length - overhang); + target_end = multiway_merge_loser_tree_unguarded + ::LT> + (seqs_begin, seqs_end, target, comp, unguarded_length, stable); + overhang = length - unguarded_length; + } + else + { + // Empty sequence found. + overhang = length; + target_end = target; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + target_end = multiway_merge_loser_tree + ::LT> + (seqs_begin, seqs_end, target_end, comp, overhang, stable); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + return target_end; + } + + template + RandomAccessIterator3 + multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef std::iterator_traits traits_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + + RandomAccessIterator3 target_end; + difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp); + + difference_type total_length = 0; + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) + { + total_length += LENGTH(*s); + + // Sentinel spot. + (*s).second++; + } + + difference_type unguarded_length = std::min(length, total_length - overhang); + target_end = multiway_merge_loser_tree_unguarded + ::LT> + (seqs_begin, seqs_end, target, comp, unguarded_length, stable); + overhang = length - unguarded_length; + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + // Copy rest stable. + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++) + { + // Restore. + (*s).second--; + difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s)); + target_end = std::copy((*s).first, (*s).first + local_length, target_end); + (*s).first += local_length; + overhang -= local_length; + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(overhang == 0); + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); +#endif + + return target_end; + } + + /** @brief Sequential multi-way merging switch. + * + * The decision if based on the branching factor and runtime settings. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @param sentinel The sequences have a sentinel element. + * @return End iterator of output sequence. */ + template + RandomAccessIterator3 + multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + +#if _GLIBCXX_ASSERTIONS + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) + _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp)); +#endif + + RandomAccessIterator3 return_target = target; + int k = static_cast(seqs_end - seqs_begin); + + Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm; + + if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL) + mwma = Settings::LOSER_TREE_COMBINED; + + switch (k) + { + case 0: + break; + case 1: + return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target); + seqs_begin[0].first += length; + break; + case 2: + return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp); + break; + case 3: + switch (mwma) + { + case Settings::LOSER_TREE_COMBINED: + return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable); + break; + case Settings::LOSER_TREE_SENTINEL: + return_target = multiway_merge_3_variant(seqs_begin, seqs_end, target, comp, length, stable); + break; + default: + return_target = multiway_merge_3_variant(seqs_begin, seqs_end, target, comp, length, stable); + break; + } + break; + case 4: + switch (mwma) + { + case Settings::LOSER_TREE_COMBINED: + return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable); + break; + case Settings::LOSER_TREE_SENTINEL: + return_target = multiway_merge_4_variant(seqs_begin, seqs_end, target, comp, length, stable); + break; + default: + return_target = multiway_merge_4_variant(seqs_begin, seqs_end, target, comp, length, stable); + break; + } + break; + default: + { + switch (mwma) + { + case Settings::BUBBLE: + return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable); + break; +#if _GLIBCXX_LOSER_TREE_EXPLICIT + case Settings::LOSER_TREE_EXPLICIT: + return_target = multiway_merge_loser_tree >(seqs_begin, seqs_end, target, comp, length, stable); + break; +#endif +#if _GLIBCXX_LOSER_TREE + case Settings::LOSER_TREE: + return_target = multiway_merge_loser_tree >(seqs_begin, seqs_end, target, comp, length, stable); + break; +#endif +#if _GLIBCXX_LOSER_TREE_COMBINED + case Settings::LOSER_TREE_COMBINED: + return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable); + break; +#endif +#if _GLIBCXX_LOSER_TREE_SENTINEL + case Settings::LOSER_TREE_SENTINEL: + return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable); + break; +#endif + default: + // multiway_merge algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + break; + } + } + } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); +#endif + + return return_target; + } + + /** @brief Parallel multi-way merge routine. + * + * The decision if based on the branching factor and runtime settings. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @param sentinel Ignored. + * @return End iterator of output sequence. + */ + template + RandomAccessIterator3 + parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel) + { + _GLIBCXX_CALL(length) + + typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + +#if _GLIBCXX_ASSERTIONS + for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++) + _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp)); +#endif + + // k sequences. + int k = static_cast(seqs_end - seqs_begin); + + difference_type total_length = 0; + for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++) + total_length += LENGTH(*raii); + + _GLIBCXX_CALL(total_length) + + if (total_length == 0 || k == 0) + return target; + + thread_index_t num_threads = static_cast(std::min(static_cast(get_max_threads()), total_length)); + + Timing* t = new Timing[num_threads]; + + for (int pr = 0; pr < num_threads; pr++) + t[pr].tic(); + + bool tight = (total_length == length); + + // Thread t will have to merge pieces[iam][0..k - 1] + std::vector >* pieces = new std::vector >[num_threads]; + for (int s = 0; s < num_threads; s++) + pieces[s].resize(k); + + difference_type num_samples = Settings::merge_oversampling * num_threads; + + if (Settings::multiway_merge_splitting == Settings::SAMPLING) + { + value_type* samples = new value_type[k * num_samples]; + // Sample. + for (int s = 0; s < k; s++) + for (int i = 0; (difference_type)i < num_samples; i++) + { + difference_type sample_index = static_cast(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length)); + samples[s * num_samples + i] = seqs_begin[s].first[sample_index]; + } + + if (stable) + __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp); + else + __gnu_sequential::sort(samples, samples + (num_samples * k), comp); + + for (int slab = 0; slab < num_threads; slab++) + // For each slab / processor. + for (int seq = 0; seq < k; seq++) + { + // For each sequence. + if (slab > 0) + pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first; + else + { + // Absolute beginning. + pieces[slab][seq].first = 0; + } + if ((slab + 1) < num_threads) + pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first; + else + pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending + } + delete[] samples; + } + else + { + // (Settings::multiway_merge_splitting == Settings::EXACT). + std::vector* offsets = new std::vector[num_threads]; + std::vector > se(k); + + copy(seqs_begin, seqs_end, se.begin()); + + difference_type borders[num_threads + 1]; + equally_split(length, num_threads, borders); + + for (int s = 0; s < (num_threads - 1); s++) + { + offsets[s].resize(k); + multiseq_partition(se.begin(), se.end(), borders[s + 1], + offsets[s].begin(), comp); + + // Last one also needed and available. + if (!tight) + { + offsets[num_threads - 1].resize(k); + multiseq_partition(se.begin(), se.end(), (difference_type)length, + offsets[num_threads - 1].begin(), comp); + } + } + + + for (int slab = 0; slab < num_threads; slab++) + { + // For each slab / processor. + for (int seq = 0; seq < k; seq++) + { + // For each sequence. + if (slab == 0) + { + // Absolute beginning. + pieces[slab][seq].first = 0; + } + else + pieces[slab][seq].first = pieces[slab - 1][seq].second; + if (!tight || slab < (num_threads - 1)) + pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first; + else + { + // slab == num_threads - 1 + pieces[slab][seq].second = LENGTH(seqs_begin[seq]); + } + } + } + delete[] offsets; + } + + for (int pr = 0; pr < num_threads; pr++) + t[pr].tic(); + +# pragma omp parallel num_threads(num_threads) + { + thread_index_t iam = omp_get_thread_num(); + + t[iam].tic(); + + difference_type target_position = 0; + + for (int c = 0; c < k; c++) + target_position += pieces[iam][c].first; + + if (k > 2) + { + std::pair* chunks = new std::pair[k]; + + difference_type local_length = 0; + for (int s = 0; s < k; s++) + { + chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second); + local_length += LENGTH(chunks[s]); + } + + multiway_merge(chunks, chunks + k, target + target_position, comp, + std::min(local_length, length - target_position), + stable, false, sequential_tag()); + + delete[] chunks; + } + else if (k == 2) + { + RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first; + merge_advance(begin0, + seqs_begin[0].first + pieces[iam][0].second, + begin1, + seqs_begin[1].first + pieces[iam][1].second, + target + target_position, + (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first), + comp); + } + + t[iam].tic(); + + } + + for (int pr = 0; pr < num_threads; pr++) + t[pr].tic(); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); +#endif + + // Update ends of sequences. + for (int s = 0; s < k; s++) + seqs_begin[s].first += pieces[num_threads - 1][s].second; + + delete[] pieces; + + for (int pr = 0; pr < num_threads; pr++) + t[pr].tic(); + for (int pr = 0; pr < num_threads; pr++) + t[pr].print(); + delete[] t; + + return target + length; + } + + /** + * @brief Multi-way merging front-end. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @return End iterator of output sequence. + */ + template + RandomAccessIterator3 + multiway_merge(RandomAccessIteratorPairIterator seqs_begin, + RandomAccessIteratorPairIterator seqs_end, + RandomAccessIterator3 target, Comparator comp, + _DifferenceTp length, bool stable) + { + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + if (seqs_begin == seqs_end) + return target; + + RandomAccessIterator3 target_end; + if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) + target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false); + else + target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag()); + + return target_end; + } + + /** @brief Multi-way merging front-end. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @return End iterator of output sequence. + * @pre For each @c i, @c seqs_begin[i].second must be the end + * marker of the sequence, but also reference the one more sentinel + * element. */ + template + RandomAccessIterator3 + multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin, + RandomAccessIteratorPairIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, + _DifferenceTp length, + bool stable) + { + typedef _DifferenceTp difference_type; + + if (seqs_begin == seqs_end) + return target; + + _GLIBCXX_CALL(seqs_end - seqs_begin) + + if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) + return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits::difference_type)length, stable, true); + else + return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag()); + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h new file mode 100644 index 00000000000..7f0f3c06922 --- /dev/null +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -0,0 +1,413 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/multiway_mergesort.h + * @brief Parallel multiway merge sort. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MERGESORT_H +#define _GLIBCXX_PARALLEL_MERGESORT_H 1 + +#include + +#include +#include +#include +#include +#include + +namespace __gnu_parallel +{ + + /** @brief Subsequence description. */ + template + struct Piece + { + typedef _DifferenceTp difference_type; + + /** @brief Begin of subsequence. */ + difference_type begin; + + /** @brief End of subsequence. */ + difference_type end; + }; + + /** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ + template + struct PMWMSSortingData + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Input begin. */ + RandomAccessIterator source; + + /** @brief Start indices, per thread. */ + difference_type* starts; + + /** @brief Temporary arrays for each thread. + * + * Indirection Allows using the temporary storage in different + * ways, without code duplication. + * @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */ + value_type** temporaries; + +#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST + /** @brief Storage in which to sort. */ + RandomAccessIterator* sorting_places; + + /** @brief Storage into which to merge. */ + value_type** merging_places; +#else + /** @brief Storage in which to sort. */ + value_type** sorting_places; + + /** @brief Storage into which to merge. */ + RandomAccessIterator* merging_places; +#endif + /** @brief Samples. */ + value_type* samples; + + /** @brief Offsets to add to the found positions. */ + difference_type* offsets; + + /** @brief Pieces of data to merge @c [thread][sequence] */ + std::vector >* pieces; + }; + + /** @brief Thread local data for PMWMS. */ + template + struct PMWMSSorterPU + { + /** @brief Total number of thread involved. */ + thread_index_t num_threads; + /** @brief Number of owning thread. */ + thread_index_t iam; + /** @brief Stable sorting desired. */ + bool stable; + /** @brief Pointer to global data. */ + PMWMSSortingData* sd; + }; + + /** + * @brief Select samples from a sequence. + * @param d Pointer to thread-local data. Result will be placed in + * @c d->ds->samples. + * @param num_samples Number of samples to select. + */ + template + inline void + determine_samples(PMWMSSorterPU* d, + _DifferenceTp& num_samples) + { + typedef _DifferenceTp difference_type; + + PMWMSSortingData* sd = d->sd; + + num_samples = Settings::sort_mwms_oversampling * d->num_threads - 1; + + difference_type es[num_samples + 2]; + equally_split(sd->starts[d->iam + 1] - sd->starts[d->iam], num_samples + 1, es); + + for (difference_type i = 0; i < num_samples; i++) + sd->samples[d->iam * num_samples + i] = sd->source[sd->starts[d->iam] + es[i + 1]]; + } + + /** @brief PMWMS code executed by each thread. + * @param d Pointer to thread-local data. + * @param comp Comparator. + */ + template + inline void + parallel_sort_mwms_pu(PMWMSSorterPU* d, + Comparator& comp) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + Timing t; + + t.tic(); + + PMWMSSortingData* sd = d->sd; + thread_index_t iam = d->iam; + + // Length of this thread's chunk, before merging. + difference_type length_local = sd->starts[iam + 1] - sd->starts[iam]; + +#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST + typedef RandomAccessIterator SortingPlacesIterator; + + // Sort in input storage. + sd->sorting_places[iam] = sd->source + sd->starts[iam]; +#else + typedef value_type* SortingPlacesIterator; + + // Sort in temporary storage, leave space for sentinel. + sd->sorting_places[iam] = sd->temporaries[iam] = static_cast(::operator new(sizeof(value_type) *(length_local + 1))); + + // Copy there. + std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]); +#endif + + // Sort locally. + if (d->stable) + __gnu_sequential::stable_sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp); + else + __gnu_sequential::sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp)); +#endif + + // Invariant: locally sorted subsequence in sd->sorting_places[iam], + // sd->sorting_places[iam] + length_local. + t.tic("local sort"); + + if (Settings::sort_splitting == Settings::SAMPLING) + { + difference_type num_samples; + determine_samples(d, num_samples); + +#pragma omp barrier + + t.tic("sample/wait"); + +#pragma omp single + __gnu_sequential::sort(sd->samples, sd->samples + (num_samples * d->num_threads), comp); + +#pragma omp barrier + + for (int s = 0; s < d->num_threads; s++) + { + // For each sequence. + if (num_samples * iam > 0) + sd->pieces[iam][s].begin = std::lower_bound(sd->sorting_places[s], + sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s], + sd->samples[num_samples * iam], + comp) + - sd->sorting_places[s]; + else + // Absolute beginning. + sd->pieces[iam][s].begin = 0; + + if ((num_samples * (iam + 1)) < (num_samples * d->num_threads)) + sd->pieces[iam][s].end = std::lower_bound(sd->sorting_places[s], + sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s], sd->samples[num_samples * (iam + 1)], comp) + - sd->sorting_places[s]; + else + // Absolute end. + sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; + } + + } + else if (Settings::sort_splitting == Settings::EXACT) + { +#pragma omp barrier + + t.tic("wait"); + + std::vector > seqs(d->num_threads); + for (int s = 0; s < d->num_threads; s++) + seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]); + + std::vector offsets(d->num_threads); + + // If not last thread. + if (iam < d->num_threads - 1) + multiseq_partition(seqs.begin(), seqs.end(), sd->starts[iam + 1], offsets.begin(), comp); + + for (int seq = 0; seq < d->num_threads; seq++) + { + // For each sequence. + if (iam < (d->num_threads - 1)) + sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; + else + // Absolute end of this sequence. + sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq]; + } + +#pragma omp barrier + + for (int seq = 0; seq < d->num_threads; seq++) + { + // For each sequence. + if (iam > 0) + sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end; + else + // Absolute beginning. + sd->pieces[iam][seq].begin = 0; + } + } + + t.tic("split"); + + // Offset from target begin, length after merging. + difference_type offset = 0, length_am = 0; + for (int s = 0; s < d->num_threads; s++) + { + length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; + offset += sd->pieces[iam][s].begin; + } + +#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST + // Merge to temporary storage, uninitialized creation not possible + // since there is no multiway_merge calling the placement new + // instead of the assignment operator. + sd->merging_places[iam] = sd->temporaries[iam] = new value_type[length_am]; +#else + // Merge directly to target. + sd->merging_places[iam] = sd->source + offset; +#endif + std::vector > seqs(d->num_threads); + + for (int s = 0; s < d->num_threads; s++) + { + seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, sd->sorting_places[s] + sd->pieces[iam][s].end); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(seqs[s].first, seqs[s].second, comp)); +#endif + } + + multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag()); + + t.tic("merge"); + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp)); +#endif + +# pragma omp barrier + +#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST + // Write back. + std::copy(sd->merging_places[iam], sd->merging_places[iam] + length_am, + sd->source + offset); +#endif + + delete[] sd->temporaries[iam]; + + t.tic("copy back"); + + t.print(); + } + + /** @brief PMWMS main call. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @param stable Stable sorting. + */ + template + inline void + parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, typename std::iterator_traits::difference_type n, int num_threads, bool stable) + { + _GLIBCXX_CALL(n) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (n <= 1) + return; + + // At least one element per thread. + if (num_threads > n) + num_threads = static_cast(n); + + PMWMSSortingData sd; + + sd.source = begin; + sd.temporaries = new value_type*[num_threads]; + +#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST + sd.sorting_places = new RandomAccessIterator[num_threads]; + sd.merging_places = new value_type*[num_threads]; +#else + sd.sorting_places = new value_type*[num_threads]; + sd.merging_places = new RandomAccessIterator[num_threads]; +#endif + + if (Settings::sort_splitting == Settings::SAMPLING) + sd.samples = new value_type[num_threads * (Settings::sort_mwms_oversampling * num_threads - 1)]; + else + sd.samples = NULL; + + sd.offsets = new difference_type[num_threads - 1]; + sd.pieces = new std::vector >[num_threads]; + for (int s = 0; s < num_threads; s++) + sd.pieces[s].resize(num_threads); + PMWMSSorterPU* pus = new PMWMSSorterPU[num_threads]; + difference_type* starts = sd.starts = new difference_type[num_threads + 1]; + + difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0; + for (int i = 0; i < num_threads; i++) + { + starts[i] = start; + start += (i < split) ? (chunk_length + 1) : chunk_length; + pus[i].num_threads = num_threads; + pus[i].iam = i; + pus[i].sd = &sd; + pus[i].stable = stable; + } + starts[num_threads] = start; + + // Now sort in parallel. +#pragma omp parallel num_threads(num_threads) + parallel_sort_mwms_pu(&(pus[omp_get_thread_num()]), comp); + + // XXX sd as RAII + delete[] starts; + delete[] sd.temporaries; + delete[] sd.sorting_places; + delete[] sd.merging_places; + + if (Settings::sort_splitting == Settings::SAMPLING) + delete[] sd.samples; + + delete[] sd.offsets; + delete[] sd.pieces; + + delete[] pus; + } + +} + +#endif diff --git a/libstdc++-v3/include/parallel/numeric b/libstdc++-v3/include/parallel/numeric new file mode 100644 index 00000000000..3209a58a3e6 --- /dev/null +++ b/libstdc++-v3/include/parallel/numeric @@ -0,0 +1,322 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** + * @file parallel/numeric +* + * @brief Parallel STL fucntion calls corresponding to stl_numeric.h. + * The functions defined here mainly do case switches and + * call the actual parallelized versions in other files. + * Inlining policy: Functions that basically only contain one function call, + * are declared inline. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_NUMERIC_H +#define _GLIBCXX_PARALLEL_NUMERIC_H 1 + +#include +#include +#include +#include +#include +#include +#include + +namespace std +{ +namespace __parallel +{ + // Sequential fallback. + template + inline T + accumulate(InputIterator begin, InputIterator end, T init, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::accumulate(begin, end, init); } + + // Sequential fallback. + template + inline T + accumulate(InputIterator begin, InputIterator end, T init, + BinaryOperation binary_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::accumulate(begin, end, init, binary_op); } + + // Sequential fallback for input iterator case. + template + inline T + accumulate_switch(InputIterator begin, InputIterator end, T init, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { return accumulate(begin, end, init, __gnu_parallel::sequential_tag()); } + + // Public interface. + template + inline T + accumulate(InputIterator begin, InputIterator end, T init, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + return accumulate_switch(begin, end, init, std::plus::value_type>(), typename std::iterator_traits::iterator_category(), parallelism_tag); + } + + // Sequential fallback for input iterator case. + template + T + accumulate_switch(InputIterator begin, InputIterator end, T init, BinaryOperation binary_op, IteratorTag, __gnu_parallel::parallelism parallelism_tag) + { + return accumulate(begin, end, init, binary_op, __gnu_parallel::sequential_tag()); + } + + // Parallel algorithm for random access iterators. + template + T + accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end, T init, BinaryOperation binary_op, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::accumulate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + T res = init; + __gnu_parallel::accumulate_selector<_RandomAccessIterator> my_selector; + __gnu_parallel::for_each_template_random_access(begin, end, __gnu_parallel::nothing(), my_selector, __gnu_parallel::accumulate_binop_reduct(binary_op), res, res, -1, parallelism_tag); + return res; + } + else + return accumulate(begin, end, init, binary_op, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline T + accumulate(InputIterator begin, InputIterator end, T init, BinaryOperation binary_op, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + return accumulate_switch(begin, end, init, binary_op, typename std::iterator_traits::iterator_category(), parallelism_tag); + } + + + // Sequential fallback. + template + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init, binary_op1, binary_op2); + } + + // Sequential fallback. + template + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init); + } + + // Parallel algorithm for random access iterators. + template + T + inner_product_switch(RandomAccessIterator1 first1, RandomAccessIterator1 last1, RandomAccessIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1) >= __gnu_parallel::Settings::accumulate_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + T res = init; + __gnu_parallel::inner_product_selector my_selector(first1, first2); + __gnu_parallel::for_each_template_random_access(first1, last1, binary_op2, my_selector, binary_op1, res, res, -1, parallelism_tag); + return res; + } + else + return inner_product(first1, last1, first2, init, __gnu_parallel::sequential_tag()); + } + + // No parallelism for input iterators. + template + inline T + inner_product_switch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, IteratorTag1, IteratorTag2, __gnu_parallel::parallelism parallelism_tag) + { + return _GLIBCXX_STD_P::inner_product(first1, last1, first2, init, binary_op1, binary_op2); + } + + template + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryFunction1 binary_op1, BinaryFunction2 binary_op2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits traits1_type; + typedef typename traits1_type::iterator_category iterator1_category; + + typedef iterator_traits traits2_type; + typedef typename traits2_type::iterator_category iterator2_category; + + return inner_product_switch(first1, last1, first2, init, binary_op1, binary_op2, iterator1_category(), iterator2_category(), parallelism_tag); + } + + template + inline T + inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced) + { + typedef iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + + return inner_product(first1, last1, first2, init, std::plus(), + std::multiplies(), parallelism_tag); + } + + // Sequential fallback. + template + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partial_sum(begin, end, result); } + + // Sequential fallback. + template + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + BinaryOperation bin_op, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); } + + // Sequential fallback for input iterator case. + template + inline OutputIterator + partial_sum_switch(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op, IteratorTag1, IteratorTag2) + { + return _GLIBCXX_STD_P::partial_sum(begin, end, result, bin_op); + } + + // Parallel algorithm for random access iterators. + template + OutputIterator + partial_sum_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + random_access_iterator_tag, random_access_iterator_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::partial_sum_minimal_n)) + return __gnu_parallel::parallel_partial_sum(begin, end, result, bin_op); + else + return partial_sum(begin, end, result, bin_op, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result) + { + typedef typename iterator_traits::value_type value_type; + return partial_sum(begin, end, result, std::plus()); + } + + // Public interface + template + inline OutputIterator + partial_sum(InputIterator begin, InputIterator end, OutputIterator result, + BinaryOperation binary_op) + { + typedef iterator_traits traitsi_type; + typedef typename traitsi_type::iterator_category iteratori_category; + + typedef iterator_traits traitso_type; + typedef typename traitso_type::iterator_category iteratoro_category; + + return partial_sum_switch(begin, end, result, binary_op, + iteratori_category(), iteratoro_category()); + } + + // Sequential fallback. + template + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, __gnu_parallel::sequential_tag) + { return _GLIBCXX_STD_P::adjacent_difference(begin, end, result); } + + // Sequential fallback. + template + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + __gnu_parallel::sequential_tag) + { + return _GLIBCXX_STD_P::adjacent_difference(begin, end, result, bin_op); + } + + // Sequential fallback for input iterator case. + template + inline OutputIterator + adjacent_difference_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + IteratorTag1, IteratorTag2, __gnu_parallel::parallelism) + { return adjacent_difference(begin, end, result, bin_op); } + + // Parallel algorithm for random access iterators. + template + OutputIterator + adjacent_difference_switch(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + random_access_iterator_tag, random_access_iterator_tag, + __gnu_parallel::parallelism parallelism_tag) + { + if (_GLIBCXX_PARALLEL_CONDITION(static_cast<__gnu_parallel::sequence_index_t>(end - begin) >= __gnu_parallel::Settings::adjacent_difference_minimal_n && __gnu_parallel::is_parallel(parallelism_tag))) + { + bool dummy = true; + typedef __gnu_parallel::iterator_pair ip; + *result = *begin; + ip begin_pair(begin + 1, result + 1), end_pair(end, result + (end - begin)); + __gnu_parallel::adjacent_difference_selector functionality; + __gnu_parallel::for_each_template_random_access(begin_pair, end_pair, bin_op, functionality, __gnu_parallel::dummy_reduct(), dummy, dummy, -1, parallelism_tag); + return functionality.finish_iterator; + } + else + return adjacent_difference(begin, end, result, bin_op, __gnu_parallel::sequential_tag()); + } + + // Public interface. + template + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, + __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + return adjacent_difference(begin, end, result, std::minus()); + } + + // Public interface. + template + inline OutputIterator + adjacent_difference(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation binary_op, + __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced) + { + typedef iterator_traits traitsi_type; + typedef typename traitsi_type::iterator_category iteratori_category; + + typedef iterator_traits traitso_type; + typedef typename traitso_type::iterator_category iteratoro_category; + + return adjacent_difference_switch(begin, end, result, binary_op, + iteratori_category(), + iteratoro_category(), parallelism_tag); + } +} // end namespace +} // end namespace + +#endif /* _GLIBCXX_NUMERIC_H */ diff --git a/libstdc++-v3/include/parallel/numericfwd.h b/libstdc++-v3/include/parallel/numericfwd.h new file mode 100644 index 00000000000..75fa3505f97 --- /dev/null +++ b/libstdc++-v3/include/parallel/numericfwd.h @@ -0,0 +1,152 @@ +// parallel extensions -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/numericfwd.h + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +#ifndef _GLIBCXX_PARALLEL_NUMERICFWD_H +#define _GLIBCXX_PARALLEL_NUMERICFWD_H 1 + +#pragma GCC system_header + +#include +#include + +namespace std +{ +namespace __parallel +{ + template + inline T + accumulate(_IIter, _IIter, T, __gnu_parallel::sequential_tag); + + template + inline T + accumulate(_IIter, _IIter, T, _BinaryOper, __gnu_parallel::sequential_tag); + + template + inline T + accumulate(_IIter, _IIter, T, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + inline T + accumulate(_IIter, _IIter, T, _BinaryOper, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + inline T + accumulate_switch(_IIter, _IIter, T, _Tag, __gnu_parallel::parallelism parallelism_tag); + + template + T + accumulate_switch(_IIter, _IIter, T, _BinaryOper, _Tag, __gnu_parallel::parallelism parallelism_tag); + + template + T + accumulate_switch(_RAIter, _RAIter, T, _BinaryOper, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag); + + + template + inline _OIter + adjacent_difference(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::sequential_tag); + + template + inline _OIter + adjacent_difference(_IIter, _IIter, _OIter, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _OIter + adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_balanced); + + template + inline _OIter + adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2, __gnu_parallel::parallelism); + + template + _OIter + adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag); + + + template + inline T + inner_product(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, __gnu_parallel::sequential_tag); + + template + inline T + inner_product(_IIter1, _IIter1, _IIter2, T, __gnu_parallel::sequential_tag); + + template + inline T + inner_product(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + inline T + inner_product(_IIter1, _IIter1, _IIter2, T, __gnu_parallel::parallelism parallelism_tag = __gnu_parallel::parallel_unbalanced); + + template + T + inner_product_switch(_RAIter1, _RAIter1, _RAIter2, T, BinaryFunction1, BinaryFunction2, random_access_iterator_tag, random_access_iterator_tag, __gnu_parallel::parallelism parallelism_tag); + + template + inline T + inner_product_switch(_IIter1, _IIter1, _IIter2, T, BinaryFunction1, BinaryFunction2, _Tag1, _Tag2, __gnu_parallel::parallelism parallelism_tag); + + + template + inline _OIter + partial_sum(_IIter, _IIter, _OIter, __gnu_parallel::sequential_tag); + + template + inline _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper, __gnu_parallel::sequential_tag); + + template + inline _OIter + partial_sum(_IIter, _IIter, _OIter result); + + template + inline _OIter + partial_sum(_IIter, _IIter, _OIter, _BinaryOper); + + template + inline _OIter + partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2); + + template + _OIter + partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, random_access_iterator_tag, random_access_iterator_tag); +} // end namespace +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h new file mode 100644 index 00000000000..23fe6f4a95f --- /dev/null +++ b/libstdc++-v3/include/parallel/omp_loop.h @@ -0,0 +1,105 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/omp_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_H 1 + +#include + +#include +#include + +namespace __gnu_parallel +{ + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, etc.). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + Op + for_each_template_random_access_omp_loop(RandomAccessIterator begin, RandomAccessIterator end, Op o, Fu& f, Red r, Result base, Result& output, typename std::iterator_traits::difference_type bound) + { + typedef typename std::iterator_traits::difference_type difference_type; + + thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : static_cast((end - begin)); + Result *thread_results = new Result[num_threads]; + difference_type length = end - begin; + + for (thread_index_t i = 0; i < num_threads; i++) + { + thread_results[i] = r(thread_results[i], f(o, begin+i)); + } + +#pragma omp parallel num_threads(num_threads) + { +#pragma omp for schedule(dynamic, Settings::workstealing_chunk_size) + for (difference_type pos = 0; pos < length; pos++) + { + thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos)); + } + } + + for (thread_index_t i = 0; i < num_threads; i++) + { + output = r(output, thread_results[i]); + } + + delete [] thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + return o; + } +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h new file mode 100644 index 00000000000..22acb2de348 --- /dev/null +++ b/libstdc++-v3/include/parallel/omp_loop_static.h @@ -0,0 +1,111 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/omp_loop_static.h + * @brief Parallelization of embarrassingly parallel execution by + * means of an OpenMP for loop with static scheduling. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H +#define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1 + +#include + +#include +#include + +namespace __gnu_parallel +{ + + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop with static scheduling. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already processed + * elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + Op + for_each_template_random_access_omp_loop_static(RandomAccessIterator begin, + RandomAccessIterator end, + Op o, Fu& f, Red r, + Result base, Result& output, + typename std::iterator_traits::difference_type bound) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + + thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : (end - begin); + Result *thread_results = new Result[num_threads]; + difference_type length = end - begin; + + for (thread_index_t i = 0; i < num_threads; i++) + { + thread_results[i] = r(thread_results[i], f(o, begin+i)); + } + +#pragma omp parallel num_threads(num_threads) + { +#pragma omp for schedule(static, Settings::workstealing_chunk_size) + for (difference_type pos = 0; pos < length; pos++) + { + thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos)); + } + } + + for (thread_index_t i = 0; i < num_threads; i++) + { + output = r(output, thread_results[i]); + } + + delete [] thread_results; + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + return o; + } +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/par_loop.h b/libstdc++-v3/include/parallel/par_loop.h new file mode 100644 index 00000000000..98604cf1da4 --- /dev/null +++ b/libstdc++-v3/include/parallel/par_loop.h @@ -0,0 +1,120 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/par_loop.h + * @brief Parallelization of embarrassingly parallel execution by + * means of equal splitting. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H +#define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 + +#include +#include + +namespace __gnu_parallel +{ + + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using hand-crafted parallelization by equal splitting + * the work. + * + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param o User-supplied functor (comparator, predicate, adding + * functor, ...) + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + Op + for_each_template_random_access_ed(RandomAccessIterator begin, + RandomAccessIterator end, Op o, Fu& f, + Red r, Result base, Result& output, + typename std::iterator_traits::difference_type bound) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + + const difference_type length = end - begin; + const difference_type settings_threads = static_cast(get_max_threads()); + const difference_type dmin = settings_threads < length ? settings_threads : length; + const difference_type dmax = dmin > 1 ? dmin : 1; + + thread_index_t num_threads = static_cast(dmax); + + + Result *thread_results = new Result[num_threads]; + +#pragma omp parallel num_threads(num_threads) + { + // Neutral element. + Result reduct = Result(); + + thread_index_t p = num_threads; + thread_index_t iam = omp_get_thread_num(); + difference_type start = iam * length / p; + difference_type limit = (iam == p - 1) ? length : (iam + 1) * length / p; + + if (start < limit) + { + reduct = f(o, begin + start); + start++; + } + + for (; start < limit; start++) + reduct = r(reduct, f(o, begin + start)); + + thread_results[iam] = reduct; + } + + for (thread_index_t i = 0; i < num_threads; i++) + output = r(output, thread_results[i]); + + // Points to last element processed (needed as return value for + // some algorithms like transform). + f.finish_iterator = begin + length; + + return o; + } + +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/parallel.h b/libstdc++-v3/include/parallel/parallel.h new file mode 100644 index 00000000000..63246126b8f --- /dev/null +++ b/libstdc++-v3/include/parallel/parallel.h @@ -0,0 +1,48 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/parallel.h + * @brief End-user include file. Provides advanced settings and + * tuning options. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze and Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARALLEL_H +#define _GLIBCXX_PARALLEL_PARALLEL_H 1 + +#include +#include +#include +#include +#include + +#endif diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h new file mode 100644 index 00000000000..909b171d6f4 --- /dev/null +++ b/libstdc++-v3/include/parallel/partial_sum.h @@ -0,0 +1,191 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/partial_sum.h + * @brief Parallel implementation of std::partial_sum(), i. e. prefix + * sums. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H +#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 + + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + // Problem: there is no 0-element given. + + /** @brief Base case prefix sum routine. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ + template + inline OutputIterator + parallel_partial_sum_basecase(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits::value_type value) + { + if (begin == end) + return result; + + while (begin != end) + { + value = bin_op(value, *begin); + *result = value; + result++; + begin++; + } + return result; + } + + /** @brief Parallel partial sum implmenetation, two-phase approach, + no recursion. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @return End iterator of output sequence. + */ + template + OutputIterator + parallel_partial_sum_linear(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op, + typename std::iterator_traits::difference_type n, int num_threads) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (num_threads > (n - 1)) + num_threads = static_cast(n - 1); + if (num_threads < 2) + { + *result = *begin; + return parallel_partial_sum_basecase(begin + 1, end, result + 1, bin_op, *begin); + } + + difference_type borders[num_threads + 2]; + + if (Settings::partial_sum_dilatation == 1.0f) + equally_split(n, num_threads + 1, borders); + else + { + difference_type chunk_length = (int)((double)n / ((double)num_threads + Settings::partial_sum_dilatation)), borderstart = n - num_threads * chunk_length; + borders[0] = 0; + for (int i = 1; i < (num_threads + 1); i++) + { + borders[i] = borderstart; + borderstart += chunk_length; + } + borders[num_threads + 1] = n; + } + + value_type* sums = new value_type[num_threads]; + OutputIterator target_end; + +#pragma omp parallel num_threads(num_threads) + { + int id = omp_get_thread_num(); + if (id == 0) + { + *result = *begin; + parallel_partial_sum_basecase(begin + 1, begin + borders[1], result + 1, bin_op, *begin); + sums[0] = *(result + borders[1] - 1); + } + else + { + sums[id] = std::accumulate(begin + borders[id] + 1, begin + borders[id + 1], *(begin + borders[id]), bin_op, __gnu_parallel::sequential_tag()); + } + +#pragma omp barrier + +#pragma omp single + parallel_partial_sum_basecase(sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); + +#pragma omp barrier + + // Still same team. + parallel_partial_sum_basecase(begin + borders[id + 1], begin + borders[id + 2], result + borders[id + 1], bin_op, sums[id]); + } + + delete[] sums; + + return result + n; + } + + /** @brief Parallel partial sum front-end. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param result Begin iterator of output sequence. + * @param bin_op Associative binary function. + * @return End iterator of output sequence. */ + template + OutputIterator + parallel_partial_sum(InputIterator begin, InputIterator end, + OutputIterator result, BinaryOperation bin_op) + { + _GLIBCXX_CALL(begin - end); + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + int num_threads = get_max_threads(); + + switch (Settings::partial_sum_algorithm) + { + case Settings::LINEAR: + // Need an initial offset. + return parallel_partial_sum_linear(begin, end, result, bin_op, + n, num_threads); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return end; + } + } +} + +#endif diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h new file mode 100644 index 00000000000..3c2917f0e2a --- /dev/null +++ b/libstdc++-v3/include/parallel/partition.h @@ -0,0 +1,389 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/partition.h + * @brief Parallel implementation of std::partition(), + * std::nth_element(), and std::partial_sort(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_PARTITION_H +#define _GLIBCXX_PARALLEL_PARTITION_H 1 + +#include +#include +#include +#include + +/** @brief Decide whether to declare certain variable volatile in this file. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ + /** @brief Parallel implementation of std::partition. + * @param begin Begin iterator of input sequence to split. + * @param end End iterator of input sequence to split. + * @param pred Partition predicate, possibly including some kind of pivot. + * @param max_num_threads Maximum number of threads to use for this task. + * @return Number of elements not fulfilling the predicate. */ + template + inline typename std::iterator_traits::difference_type + parallel_partition(RandomAccessIterator begin, RandomAccessIterator end, + Predicate pred, thread_index_t max_num_threads) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + _GLIBCXX_CALL(n) + + // Shared. + _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1; + _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right, leftnew, rightnew; + bool* reserved_left, * reserved_right; + + reserved_left = new bool[max_num_threads]; + reserved_right = new bool[max_num_threads]; + + difference_type chunk_size; + if (Settings::partition_chunk_share > 0.0) + chunk_size = std::max((difference_type)Settings::partition_chunk_size, (difference_type)((double)n * Settings::partition_chunk_share / (double)max_num_threads)); + else + chunk_size = Settings::partition_chunk_size; + + // At least good for two processors. + while (right - left + 1 >= 2 * max_num_threads * chunk_size) + { + difference_type num_chunks = (right - left + 1) / chunk_size; + thread_index_t num_threads = (int)std::min((difference_type)max_num_threads, num_chunks / 2); + + for (int r = 0; r < num_threads; r++) + { + reserved_left[r] = false; + reserved_right[r] = false; + } + leftover_left = 0; + leftover_right = 0; + +#pragma omp parallel num_threads(num_threads) + { + // Private. + difference_type thread_left, thread_left_border, thread_right, thread_right_border; + thread_left = left + 1; + + // Just to satify the condition below. + thread_left_border = thread_left - 1; + thread_right = n - 1; + thread_right_border = thread_right + 1; + + bool iam_finished = false; + while (!iam_finished) + { + if (thread_left > thread_left_border) +#pragma omp critical + { + if (left + (chunk_size - 1) > right) + iam_finished = true; + else + { + thread_left = left; + thread_left_border = left + (chunk_size - 1); + left += chunk_size; + } + } + + if (thread_right < thread_right_border) +#pragma omp critical + { + if (left > right - (chunk_size - 1)) + iam_finished = true; + else + { + thread_right = right; + thread_right_border = right - (chunk_size - 1); + right -= chunk_size; + } + } + + if (iam_finished) + break; + + // Swap as usual. + while (thread_left < thread_right) + { + while (pred(begin[thread_left]) && thread_left <= thread_left_border) + thread_left++; + while (!pred(begin[thread_right]) && thread_right >= thread_right_border) + thread_right--; + + if (thread_left > thread_left_border || thread_right < thread_right_border) + // Fetch new chunk(s). + break; + + std::swap(begin[thread_left], begin[thread_right]); + thread_left++; + thread_right--; + } + } + + // Now swap the leftover chunks to the right places. + if (thread_left <= thread_left_border) +#pragma omp atomic + leftover_left++; + if (thread_right >= thread_right_border) +#pragma omp atomic + leftover_right++; + +#pragma omp barrier + +#pragma omp single + { + leftnew = left - leftover_left * chunk_size; + rightnew = right + leftover_right * chunk_size; + } + +#pragma omp barrier + + // <=> thread_left_border + (chunk_size - 1) >= leftnew + if (thread_left <= thread_left_border + && thread_left_border >= leftnew) + { + // Chunk already in place, reserve spot. + reserved_left[(left - (thread_left_border + 1)) / chunk_size] = true; + } + + // <=> thread_right_border - (chunk_size - 1) <= rightnew + if (thread_right >= thread_right_border + && thread_right_border <= rightnew) + { + // Chunk already in place, reserve spot. + reserved_right[((thread_right_border - 1) - right) / chunk_size] = true; + } + +#pragma omp barrier + + if (thread_left <= thread_left_border && thread_left_border < leftnew) + { + // Find spot and swap. + difference_type swapstart = -1; +#pragma omp critical + { + for (int r = 0; r < leftover_left; r++) + if (!reserved_left[r]) + { + reserved_left[r] = true; + swapstart = left - (r + 1) * chunk_size; + break; + } + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); +#endif + + std::swap_ranges(begin + thread_left_border - (chunk_size - 1), begin + thread_left_border + 1, begin + swapstart); + } + + if (thread_right >= thread_right_border + && thread_right_border > rightnew) + { + // Find spot and swap + difference_type swapstart = -1; +#pragma omp critical + { + for (int r = 0; r < leftover_right; r++) + if (!reserved_right[r]) + { + reserved_right[r] = true; + swapstart = right + r * chunk_size + 1; + break; + } + } + +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); +#endif + + std::swap_ranges(begin + thread_right_border, begin + thread_right_border + chunk_size, begin + swapstart); + } +#if _GLIBCXX_ASSERTIONS +#pragma omp barrier + +#pragma omp single + { + for (int r = 0; r < leftover_left; r++) + _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]); + for (int r = 0; r < leftover_right; r++) + _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]); + } + +#pragma omp barrier +#endif + +#pragma omp barrier + left = leftnew; + right = rightnew; + } + } // end "recursion" + + difference_type final_left = left, final_right = right; + + while (final_left < final_right) + { + // Go right until key is geq than pivot. + while (pred(begin[final_left]) && final_left < final_right) + final_left++; + + // Go left until key is less than pivot. + while (!pred(begin[final_right]) && final_left < final_right) + final_right--; + + if (final_left == final_right) + break; + std::swap(begin[final_left], begin[final_right]); + final_left++; + final_right--; + } + + // All elements on the left side are < piv, all elements on the + // right are >= piv + delete[] reserved_left; + delete[] reserved_right; + + // Element "between" final_left and final_right might not have + // been regarded yet + if (final_left < n && !pred(begin[final_left])) + // Really swapped. + return final_left; + else + return final_left + 1; + } + + /** + * @brief Parallel implementation of std::nth_element(). + * @param begin Begin iterator of input sequence. + * @param nth Iterator of element that must be in position afterwards. + * @param end End iterator of input sequence. + * @param comp Comparator. + */ + template + void + parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL(end - begin) + + RandomAccessIterator split; + value_type pivot; + random_number rng; + + difference_type minimum_length = std::max(2, Settings::partition_minimal_n); + + // Break if input range to small. + while (static_cast(end - begin) >= minimum_length) + { + difference_type n = end - begin; + + RandomAccessIterator pivot_pos = begin + rng(n); + + // Swap pivot_pos value to end. + if (pivot_pos != (end - 1)) + std::swap(*pivot_pos, *(end - 1)); + pivot_pos = end - 1; + + // XXX Comparator must have first_value_type, second_value_type, result_type + // Comparator == __gnu_parallel::lexicographic > + // pivot_pos == std::pair* + // XXX binder2nd only for RandomAccessIterators?? + __gnu_parallel::binder2nd pred(comp, *pivot_pos); + + // Divide, leave pivot unchanged in last place. + RandomAccessIterator split_pos1, split_pos2; + split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads()); + + // Left side: < pivot_pos; right side: >= pivot_pos + + // Swap pivot back to middle. + if (split_pos1 != pivot_pos) + std::swap(*split_pos1, *pivot_pos); + pivot_pos = split_pos1; + + // In case all elements are equal, split_pos1 == 0 + if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7)) + { + // Very unequal split, one part smaller than one 128th + // elements not stricly larger than the pivot. + __gnu_parallel::unary_negate<__gnu_parallel::binder1st, value_type> pred(__gnu_parallel::binder1st(comp, *pivot_pos)); + + // Find other end of pivot-equal range. + split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred); + } + else + // Only skip the pivot. + split_pos2 = split_pos1 + 1; + + // Compare iterators. + if (split_pos2 <= nth) + begin = split_pos2; + else if (nth < split_pos1) + end = split_pos1; + else + break; + } + + // Only at most Settings::partition_minimal_n elements left. + __gnu_sequential::sort(begin, end, comp); + } + + /** @brief Parallel implementation of std::partial_sort(). + * @param begin Begin iterator of input sequence. + * @param middle Sort until this position. + * @param end End iterator of input sequence. + * @param comp Comparator. */ + template + void + parallel_partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, RandomAccessIterator end, Comparator comp) + { + parallel_nth_element(begin, middle, end, comp); + std::sort(begin, middle, comp); + } + +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif diff --git a/libstdc++-v3/include/parallel/queue.h b/libstdc++-v3/include/parallel/queue.h new file mode 100644 index 00000000000..9d2143b5787 --- /dev/null +++ b/libstdc++-v3/include/parallel/queue.h @@ -0,0 +1,153 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/queue.h + * @brief Lock-free double-ended queue. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUEUE_H +#define _GLIBCXX_PARALLEL_QUEUE_H 1 + +#include +#include +#include + +/** @brief Decide whether to declare certain variable volatile in this file. */ +#define _GLIBCXX_VOLATILE volatile + +namespace __gnu_parallel +{ + /**@brief Double-ended queue of bounded size, allowing lock-free + * atomic access. push_front() and pop_front() must not be called + * concurrently to each other, while pop_back() can be called + * concurrently at all times. + * @c empty(), @c size(), and @c top() are intentionally not provided. + * Calling them would not make sense in a concurrent setting. + * @param T Contained element type. */ + template + class RestrictedBoundedConcurrentQueue + { + private: + /** @brief Array of elements, seen as cyclic buffer. */ + T* base; + + /** @brief Maximal number of elements contained at the same time. */ + sequence_index_t max_size; + + /** @brief Cyclic begin and end pointers contained in one + atomically changeable value. */ + _GLIBCXX_VOLATILE lcas_t borders; + + public: + /** @brief Constructor. Not to be called concurrent, of course. + * @param max_size Maximal number of elements to be contained. */ + RestrictedBoundedConcurrentQueue(sequence_index_t max_size) + { + this->max_size = max_size; + base = new T[max_size]; + borders = encode2(0, 0); +#pragma omp flush + } + + /** @brief Destructor. Not to be called concurrent, of course. */ + ~RestrictedBoundedConcurrentQueue() + { + delete[] base; + } + + /** @brief Pushes one element into the queue at the front end. + * Must not be called concurrently with pop_front(). */ + void push_front(const T& t) + { + lcas_t former_borders = borders; + int former_front, former_back; + decode2(former_borders, former_front, former_back); + *(base + former_front % max_size) = t; +#if _GLIBCXX_ASSERTIONS + // Otherwise: front - back > max_size eventually. + _GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back) <= max_size); +#endif + fetch_and_add(&borders, encode2(1, 0)); + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool pop_front(T& t) + { + int former_front, former_back; +#pragma omp flush + decode2(borders, former_front, former_back); + while (former_front > former_back) + { + // Chance. + lcas_t former_borders = encode2(former_front, former_back); + lcas_t new_borders = encode2(former_front - 1, former_back); + if (compare_and_swap(&borders, former_borders, new_borders)) + { + t = *(base + (former_front - 1) % max_size); + return true; + } +#pragma omp flush + decode2(borders, former_front, former_back); + } + return false; + } + + /** @brief Pops one element from the queue at the front end. + * Must not be called concurrently with pop_front(). */ + bool pop_back(T& t) //queue behavior + { + int former_front, former_back; +#pragma omp flush + decode2(borders, former_front, former_back); + while (former_front > former_back) + { + // Chance. + lcas_t former_borders = encode2(former_front, former_back); + lcas_t new_borders = encode2(former_front, former_back + 1); + if (compare_and_swap(&borders, former_borders, new_borders)) + { + t = *(base + former_back % max_size); + return true; + } +#pragma omp flush + decode2(borders, former_front, former_back); + } + return false; + } + }; +} //namespace __gnu_parallel + +#undef _GLIBCXX_VOLATILE + +#endif diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h new file mode 100644 index 00000000000..e3df87a66d6 --- /dev/null +++ b/libstdc++-v3/include/parallel/quicksort.h @@ -0,0 +1,172 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/quicksort.h + * @brief Implementation of a unbalanced parallel quicksort (in-place). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_QUICKSORT_H +#define _GLIBCXX_PARALLEL_QUICKSORT_H 1 + +#include +#include + +namespace __gnu_parallel +{ + /** @brief Unbalanced quicksort divide step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param pivot_rank Desired rank of the pivot. + * @param num_samples Chosse pivot from that many samples. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template + inline typename std::iterator_traits::difference_type + parallel_sort_qs_divide(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + typename std::iterator_traits::difference_type pivot_rank, + typename std::iterator_traits::difference_type num_samples, thread_index_t num_threads) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + num_samples = std::min(num_samples, n); + value_type samples[num_samples]; + + for (difference_type s = 0; s < num_samples; s++) + samples[s] = begin[(unsigned long long)s * n / num_samples]; + + __gnu_sequential::sort(samples, samples + num_samples, comp); + + value_type& pivot = samples[pivot_rank * num_samples / n]; + + __gnu_parallel::binder2nd pred(comp, pivot); + difference_type split = parallel_partition(begin, end, pred, num_threads); + + return split; + } + + /** @brief Unbalanced quicksort conquer step. + * @param begin Begin iterator of subsequence. + * @param end End iterator of subsequence. + * @param comp Comparator. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template + inline void + parallel_sort_qs_conquer(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, int num_threads) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (num_threads <= 1) + { + __gnu_sequential::sort(begin, end, comp); + return; + } + + difference_type n = end - begin, pivot_rank; + + if (n <= 1) + return; + + thread_index_t num_processors_left; + + if ((num_threads % 2) == 1) + num_processors_left = num_threads / 2 + 1; + else + num_processors_left = num_threads / 2; + + pivot_rank = n * num_processors_left / num_threads; + + difference_type split = parallel_sort_qs_divide(begin, end, comp, pivot_rank, +Settings::sort_qs_num_samples_preset, num_threads); + +#pragma omp parallel sections + { +#pragma omp section + parallel_sort_qs_conquer(begin, begin + split, comp, num_processors_left); +#pragma omp section + parallel_sort_qs_conquer(begin + split, end, comp, num_threads - num_processors_left); + } + } + + + + /** @brief Unbalanced quicksort main call. + * @param begin Begin iterator of input sequence. + * @param end End iterator input sequence, ignored. + * @param comp Comparator. + * @param n Length of input sequence. + * @param num_threads Number of threads that are allowed to work on + * this part. + */ + template + inline void + parallel_sort_qs(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + typename std::iterator_traits::difference_type n, int num_threads) + { + _GLIBCXX_CALL(n) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (n == 0) + return; + + // At least one element per processor. + if (num_threads > n) + num_threads = static_cast(n); + + Settings::sort_qs_num_samples_preset = 100; + + // Hard to avoid. + omp_set_num_threads(num_threads); + + bool old_nested = (omp_get_nested() != 0); + omp_set_nested(true); + parallel_sort_qs_conquer(begin, begin + n, comp, num_threads); + omp_set_nested(old_nested); + } + +} //namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/random_number.h b/libstdc++-v3/include/parallel/random_number.h new file mode 100644 index 00000000000..266ed0ab251 --- /dev/null +++ b/libstdc++-v3/include/parallel/random_number.h @@ -0,0 +1,386 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/random_number.h + * @brief Random number generator based on the Mersenne twister. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_NUMBER_H +#define _GLIBCXX_PARALLEL_RANDOM_NUMBER_H 1 + +#include + +namespace __gnu_parallel +{ + // XXX use tr1 random number. + // http://www.math.keio.ac.jp/matumoto/emt.html + template + class mersenne_twister + { + public: + typedef UIntType result_type; + static const int word_size = w; + static const int state_size = n; + static const int shift_size = m; + static const int mask_bits = r; + static const UIntType parameter_a = a; + static const int output_u = u; + static const int output_s = s; + static const UIntType output_b = b; + static const int output_t = t; + static const UIntType output_c = c; + static const int output_l = l; + + static const bool has_fixed_range = false; + + mersenne_twister() { seed(); } + +#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x520) + // Work around overload resolution problem (Gennadiy E. Rozental) + explicit mersenne_twister(const UIntType& value) +#else + explicit mersenne_twister(UIntType value) +#endif + { seed(value); } + template mersenne_twister(It& first, It last) { seed(first,last); } + + template + explicit mersenne_twister(Generator & gen) { seed(gen); } + + // compiler-generated copy ctor and assignment operator are fine + + void seed() { seed(UIntType(5489)); } + +#if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x520) + // Work around overload resolution problem (Gennadiy E. Rozental) + void seed(const UIntType& value) +#else + void seed(UIntType value) +#endif + { + // New seeding algorithm from + // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html + // In the previous versions, MSBs of the seed affected only MSBs of the + // state x[]. + const UIntType mask = ~0u; + x[0] = value & mask; + for (i = 1; i < n; i++) { + // See Knuth "The Art of Computer Programming" Vol. 2, 3rd ed., page 106 + x[i] = (1812433253UL * (x[i-1] ^ (x[i-1] >> (w-2))) + i) & mask; + } + } + + // For GCC, moving this function out-of-line prevents inlining, which may + // reduce overall object code size. However, MSVC does not grok + // out-of-line definitions of member function templates. + template + void seed(Generator & gen) + { + // I could have used std::generate_n, but it takes "gen" by value + for (int j = 0; j < n; j++) + x[j] = gen(); + i = n; + } + + template + void seed(It& first, It last) + { + int j; + for (j = 0; j < n && first != last; ++j, ++first) + x[j] = *first; + i = n; + /* if (first == last && j < n) + throw std::invalid_argument("mersenne_twister::seed");*/ + } + + result_type min() const { return 0; } + result_type max() const + { + // avoid "left shift count >= with of type" warning + result_type res = 0; + for (int i = 0; i < w; ++i) + res |= (1u << i); + return res; + } + + result_type operator()(); + static bool validation(result_type v) { return val == v; } + +#ifndef BOOST_NO_OPERATORS_IN_NAMESPACE + + friend bool operator==(const mersenne_twister& x, const mersenne_twister& y) + { + for (int j = 0; j < state_size; ++j) + if (x.compute(j) != y.compute(j)) + return false; + return true; + } + + friend bool operator!=(const mersenne_twister& x, const mersenne_twister& y) + { return !(x == y); } +#else + // Use a member function; Streamable concept not supported. + bool operator==(const mersenne_twister& rhs) const + { + for (int j = 0; j < state_size; ++j) + if (compute(j) != rhs.compute(j)) + return false; + return true; + } + + bool operator!=(const mersenne_twister& rhs) const + { return !(*this == rhs); } +#endif + + private: + // returns x(i-n+index), where index is in 0..n-1 + UIntType compute(unsigned int index) const + { + // equivalent to (i-n+index) % 2n, but doesn't produce negative numbers + return x[ (i + n + index) % (2*n) ]; + } + void twist(int block); + + // state representation: next output is o(x(i)) + // x[0] ... x[k] x[k+1] ... x[n-1] x[n] ... x[2*n-1] represents + // x(i-k) ... x(i) x(i+1) ... x(i-k+n-1) x(i-k-n) ... x[i(i-k-1)] + // The goal is to always have x(i-n) ... x(i-1) available for + // operator== and save/restore. + + UIntType x[2*n]; + int i; + }; + +#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION + // A definition is required even for integral static constants + template + const bool mersenne_twister::has_fixed_range; + template + const int mersenne_twister::state_size; + template + const int mersenne_twister::shift_size; + template + const int mersenne_twister::mask_bits; + template + const UIntType mersenne_twister::parameter_a; + template + const int mersenne_twister::output_u; + template + const int mersenne_twister::output_s; + template + const UIntType mersenne_twister::output_b; + template + const int mersenne_twister::output_t; + template + const UIntType mersenne_twister::output_c; + template + const int mersenne_twister::output_l; +#endif + + template + void mersenne_twister::twist(int block) + { + const UIntType upper_mask = (~0u) << r; + const UIntType lower_mask = ~upper_mask; + + if (block == 0) { + for (int j = n; j < 2*n; j++) { + UIntType y = (x[j-n] & upper_mask) | (x[j-(n-1)] & lower_mask); + x[j] = x[j-(n-m)] ^ (y >> 1) ^ (y&1 ? a : 0); + } + } else if (block == 1) { + // split loop to avoid costly modulo operations + { // extra scope for MSVC brokenness w.r.t. for scope + for (int j = 0; j < n-m; j++) { + UIntType y = (x[j+n] & upper_mask) | (x[j+n+1] & lower_mask); + x[j] = x[j+n+m] ^ (y >> 1) ^ (y&1 ? a : 0); + } + } + + for (int j = n-m; j < n-1; j++) { + UIntType y = (x[j+n] & upper_mask) | (x[j+n+1] & lower_mask); + x[j] = x[j-(n-m)] ^ (y >> 1) ^ (y&1 ? a : 0); + } + // last iteration + UIntType y = (x[2*n-1] & upper_mask) | (x[0] & lower_mask); + x[n-1] = x[m-1] ^ (y >> 1) ^ (y&1 ? a : 0); + i = 0; + } + } + + template + inline typename mersenne_twister::result_type + mersenne_twister::operator()() + { + if (i == n) + twist(0); + else if (i >= 2*n) + twist(1); + // Step 4 + UIntType z = x[i]; + ++i; + z ^= (z >> u); + z ^= ((z << s) & b); + z ^= ((z << t) & c); + z ^= (z >> l); + return z; + } + + + typedef mersenne_twister mt11213b; + + // validation by experiment from mt19937.c + typedef mersenne_twister mt19937; + + /** @brief Random number generator, based on the Mersenne twister. */ + class random_number + { + private: + mt19937 mt; + uint64 supremum, RAND_SUP; + double supremum_reciprocal, RAND_SUP_REC; + + uint64 cache; /* assumed to be twice as long as the usual random number */ + int bits_left; /* bit results */ + + static inline uint32 scale_down(uint64 x, +#if _GLIBCXX_SCALE_DOWN_FPU + uint64 /*supremum*/, double supremum_reciprocal) +#else + uint64 supremum, double /*supremum_reciprocal*/) +#endif + { +#if _GLIBCXX_SCALE_DOWN_FPU + return (uint32)(x * supremum_reciprocal); +#else + return static_cast(x % supremum); +#endif + } + +public: + /** @brief Default constructor. Seed with 0. */ + random_number() : + mt(0), + supremum(0x100000000ULL), + RAND_SUP(1ULL << (sizeof(uint32) * 8)), + supremum_reciprocal((double)supremum / (double)RAND_SUP), + RAND_SUP_REC(1.0 / (double)RAND_SUP), + cache(0), bits_left(0) + { + } + + /** @brief Constructor. + * @param seed Random seed. + * @param supremum Generate integer random numbers in the interval @c [0,supremum). */ + random_number(uint32 seed, uint64 supremum = 0x100000000ULL) : + mt(seed), + supremum(supremum), + RAND_SUP(1ULL << (sizeof(uint32) * 8)), + supremum_reciprocal((double)supremum / (double)RAND_SUP), + RAND_SUP_REC(1.0 / (double)RAND_SUP), + cache(0), bits_left(0) + { + } + + /** @brief Generate unsigned random 32-bit integer. */ + inline uint32 operator()() + { + return scale_down(mt(), supremum, supremum_reciprocal); + } + + /** @brief Generate unsigned random 32-bit integer in the interval @c [0,local_supremum). */ + inline uint32 operator()(uint64 local_supremum) + { + return scale_down(mt(), local_supremum, (double)local_supremum * RAND_SUP_REC); + } + + /** @brief Set the random seed. + * @param seed to set. */ + inline void set_seed(uint32 seed) + { + mt.seed(seed); + cache = mt(); + bits_left = 32; + } + + /** @brief Generate a number of random bits, compile-time parameter. */ + template + inline unsigned long genrand_bits() + { + unsigned long res = cache & ((1 << bits) - 1); + cache = cache >> bits; + bits_left -= bits; + if (bits_left < 32) + { + cache |= (((uint64)mt()) << bits_left); + bits_left += 32; + } + return res; + } + + /** @brief Generate a number of random bits, run-time parameter. + * @param bits Number of bits to generate. */ + inline unsigned long genrand_bits(int bits) + { + unsigned long res = cache & ((1 << bits) - 1); + cache = cache >> bits; + bits_left -= bits; + if (bits_left < 32) + { + cache |= (((uint64)mt()) << bits_left); + bits_left += 32; + } + return res; + } + +}; + +} // namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h new file mode 100644 index 00000000000..f18f7774840 --- /dev/null +++ b/libstdc++-v3/include/parallel/random_shuffle.h @@ -0,0 +1,516 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/random_shuffle.h + * @brief Parallel implementation of std::random_shuffle(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H +#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 + +#include + +#include +#include + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + /** @brief Type to hold the index of a bin. + * + * Since many variables of this type are allocated, it should be + * chosen as small as possible. + */ + typedef unsigned short bin_index; + + /** @brief Data known to every thread participating in + __gnu_parallel::parallel_random_shuffle(). */ + template + struct DRandomShufflingGlobalData + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Begin iterator of the source. */ + RandomAccessIterator& source; + + /** @brief Temporary arrays for each thread. */ + value_type** temporaries; + + /** @brief Two-dimensional array to hold the thread-bin distribution. + * + * Dimensions (num_threads + 1) x (num_bins + 1). */ + difference_type** dist; + + /** @brief Start indexes of the threads' chunks. */ + difference_type* starts; + + /** @brief Number of the thread that will further process the + corresponding bin. */ + thread_index_t* bin_proc; + + /** @brief Number of bins to distribute to. */ + int num_bins; + + /** @brief Number of bits needed to address the bins. */ + int num_bits; + + /** @brief Constructor. */ + DRandomShufflingGlobalData(RandomAccessIterator& _source) + : source(_source) { } + }; + + /** @brief Local data for a thread participating in + __gnu_parallel::parallel_random_shuffle(). + */ + template + struct DRSSorterPU + { + /** @brief Number of threads participating in total. */ + int num_threads; + + /** @brief Number of owning thread. */ + int iam; + + /** @brief Begin index for bins taken care of by this thread. */ + bin_index bins_begin; + + /** @brief End index for bins taken care of by this thread. */ + bin_index bins_end; + + /** @brief Random seed for this thread. */ + uint32 seed; + + /** @brief Pointer to global data. */ + DRandomShufflingGlobalData* sd; + }; + + /** @brief Generate a random number in @c [0,2^logp). + * @param logp Logarithm (basis 2) of the upper range bound. + * @param rng Random number generator to use. + */ + template + inline int random_number_pow2(int logp, RandomNumberGenerator& rng) + { + return rng.genrand_bits(logp); + } + + /** @brief Random shuffle code executed by each thread. + * @param pus Arary of thread-local data records. */ + template + inline void parallel_random_shuffle_drs_pu(DRSSorterPU* pus) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + Timing t; + t.tic(); + + DRSSorterPU* d = &pus[omp_get_thread_num()]; + DRandomShufflingGlobalData* sd = d->sd; + thread_index_t iam = d->iam; + + // Indexing: dist[bin][processor] + difference_type length = sd->starts[iam + 1] - sd->starts[iam]; + bin_index* oracles = new bin_index[length]; + difference_type* dist = new difference_type[sd->num_bins + 1]; + bin_index* bin_proc = new bin_index[sd->num_bins]; + value_type** temporaries = new value_type*[d->num_threads]; + + // Compute oracles and count appearances. + for (bin_index b = 0; b < sd->num_bins + 1; b++) + dist[b] = 0; + int num_bits = sd->num_bits; + + random_number rng(d->seed); + + // First main loop. + for (difference_type i = 0; i < length; i++) + { + bin_index oracle = random_number_pow2(num_bits, rng); + oracles[i] = oracle; + + // To allow prefix (partial) sum. + dist[oracle + 1]++; + } + + for (bin_index b = 0; b < sd->num_bins + 1; b++) + sd->dist[b][iam + 1] = dist[b]; + + t.tic(); + +#pragma omp barrier + + t.tic(); + +#pragma omp single + { + // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the + // total number of items in bin s + for (bin_index s = 0; s < sd->num_bins; s++) + partial_sum(sd->dist[s + 1], sd->dist[s + 1] + d->num_threads + 1, sd->dist[s + 1]); + } + +#pragma omp barrier + + t.tic(); + + sequence_index_t offset = 0, global_offset = 0; + for (bin_index s = 0; s < d->bins_begin; s++) + global_offset += sd->dist[s + 1][d->num_threads]; + +#pragma omp barrier + + for (bin_index s = d->bins_begin; s < d->bins_end; s++) + { + for (int t = 0; t < d->num_threads + 1; t++) + sd->dist[s + 1][t] += offset; + offset = sd->dist[s + 1][d->num_threads]; + } + + sd->temporaries[iam] = new value_type[offset]; + + t.tic(); + +#pragma omp barrier + + t.tic(); + + // Draw local copies to avoid false sharing. + for (bin_index b = 0; b < sd->num_bins + 1; b++) + dist[b] = sd->dist[b][iam]; + for (bin_index b = 0; b < sd->num_bins; b++) + bin_proc[b] = sd->bin_proc[b]; + for (thread_index_t t = 0; t < d->num_threads; t++) + temporaries[t] = sd->temporaries[t]; + + RandomAccessIterator source = sd->source; + difference_type start = sd->starts[iam]; + + // Distribute according to oracles, second main loop. + for (difference_type i = 0; i < length; i++) + { + bin_index target_bin = oracles[i]; + thread_index_t target_p = bin_proc[target_bin]; + + // Last column [d->num_threads] stays unchanged. + temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start); + } + + delete[] oracles; + delete[] dist; + delete[] bin_proc; + delete[] temporaries; + + t.tic(); + +#pragma omp barrier + + t.tic(); + + // Shuffle bins internally. + for (bin_index b = d->bins_begin; b < d->bins_end; b++) + { + value_type* begin = sd->temporaries[iam] + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]), + * end = sd->temporaries[iam] + sd->dist[b + 1][d->num_threads]; + sequential_random_shuffle(begin, end, rng); + std::copy(begin, end, sd->source + global_offset + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads])); + } + + delete[] sd->temporaries[iam]; + + t.tic(); + + t.print(); + } + + /** @brief Round up to the next greater power of 2. + * @param x Integer to round up */ + template + T round_up_to_pow2(T x) + { + if (x <= 1) + return 1; + else + return (T)1 << (log2(x - 1) + 1); + } + + /** @brief Main parallel random shuffle step. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + * @param rng Random number generator to use. + */ + template + inline void + parallel_random_shuffle_drs(RandomAccessIterator begin, RandomAccessIterator end, typename std::iterator_traits::difference_type n, int num_threads, RandomNumberGenerator& rng) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL(n) + + if (num_threads > n) + num_threads = static_cast(n); + + bin_index num_bins, num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first. + + // Must fit into L1. + num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type)))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + num_bins = std::min(n, (difference_type)num_bins_cache); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); + + if (num_bins < num_bins_cache) + { +#endif + // Now try the L2 cache + // Must fit into L2 + num_bins_cache = static_cast(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type))))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2. + num_bins = static_cast(std::min(n, (difference_type)num_bins_cache)); + // Power of 2 and at least one element per bin, at most the TLB size. +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin. + num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + num_threads = std::min((bin_index)num_threads, (bin_index)num_bins); + + if (num_threads <= 1) + return sequential_random_shuffle(begin, end, rng); + + DRandomShufflingGlobalData sd(begin); + + DRSSorterPU* pus = new DRSSorterPU[num_threads]; + + sd.temporaries = new value_type*[num_threads]; + //sd.oracles = new bin_index[n]; + sd.dist = new difference_type*[num_bins + 1]; + sd.bin_proc = new thread_index_t[num_bins]; + for (bin_index b = 0; b < num_bins + 1; b++) + sd.dist[b] = new difference_type[num_threads + 1]; + for (bin_index b = 0; b < (num_bins + 1); b++) + { + sd.dist[0][0] = 0; + sd.dist[b][0] = 0; + } + difference_type* starts = sd.starts = new difference_type[num_threads + 1]; + int bin_cursor = 0; + sd.num_bins = num_bins; + sd.num_bits = log2(num_bins); + + difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0; + int bin_chunk_length = num_bins / num_threads, bin_split = num_bins % num_threads; + for (int i = 0; i < num_threads; i++) + { + starts[i] = start; + start += (i < split) ? (chunk_length + 1) : chunk_length; + int j = pus[i].bins_begin = bin_cursor; + + // Range of bins for this processor. + bin_cursor += (i < bin_split) ? (bin_chunk_length + 1) : bin_chunk_length; + pus[i].bins_end = bin_cursor; + for (; j < bin_cursor; j++) + sd.bin_proc[j] = i; + pus[i].num_threads = num_threads; + pus[i].iam = i; + pus[i].seed = rng(std::numeric_limits::max()); + pus[i].sd = &sd; + } + starts[num_threads] = start; + + // Now shuffle in parallel. +#pragma omp parallel num_threads(num_threads) + parallel_random_shuffle_drs_pu(pus); + + delete[] starts; + delete[] sd.bin_proc; + for (int s = 0; s < (num_bins + 1); s++) + delete[] sd.dist[s]; + delete[] sd.dist; + delete[] sd.temporaries; + + delete[] pus; + } + + /** @brief Sequential cache-efficient random shuffle. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param rng Random number generator to use. + */ + template + inline void + sequential_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator& rng) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + bin_index num_bins, num_bins_cache; + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + // Try the L1 cache first, must fit into L1. + num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type)))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size + num_bins = std::min(n, (difference_type)num_bins_cache); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); + + if (num_bins < num_bins_cache) + { +#endif + // Now try the L2 cache, must fit into L2. + num_bins_cache = static_cast(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type))))); + num_bins_cache = round_up_to_pow2(num_bins_cache); + + // No more buckets than TLB entries, power of 2 + // Power of 2 and at least one element per bin, at most the TLB size. + num_bins = static_cast(std::min(n, (difference_type)num_bins_cache)); + +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB + // 2 TLB entries needed per bin + num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); +#endif + num_bins = round_up_to_pow2(num_bins); +#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 + } +#endif + + int num_bits = log2(num_bins); + + if (num_bins > 1) + { + value_type* target = new value_type[n]; + bin_index* oracles = new bin_index[n]; + difference_type* dist0 = new difference_type[num_bins + 1], * dist1 = new difference_type[num_bins + 1]; + + for (int b = 0; b < num_bins + 1; b++) + dist0[b] = 0; + + Timing t; + t.tic(); + + random_number bitrng(rng(0xFFFFFFFF)); + + for (difference_type i = 0; i < n; i++) + { + bin_index oracle = random_number_pow2(num_bits, bitrng); + oracles[i] = oracle; + + // To allow prefix (partial) sum. + dist0[oracle + 1]++; + } + + t.tic(); + + // Sum up bins. + partial_sum(dist0, dist0 + num_bins + 1, dist0); + + for (int b = 0; b < num_bins + 1; b++) + dist1[b] = dist0[b]; + + t.tic(); + + // Distribute according to oracles. + for (difference_type i = 0; i < n; i++) + target[(dist0[oracles[i]])++] = *(begin + i); + + for (int b = 0; b < num_bins; b++) + { + sequential_random_shuffle(target + dist1[b], target + dist1[b + 1], + rng); + t.tic(); + } + t.print(); + + delete[] dist0; + delete[] dist1; + delete[] oracles; + delete[] target; + } + else + __gnu_sequential::random_shuffle(begin, end, rng); + } + + /** @brief Parallel random public call. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param rng Random number generator to use. + */ + template + inline void + parallel_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end, RandomNumberGenerator rng = random_number()) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + difference_type n = end - begin; + parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ; + } + +} + +#endif diff --git a/libstdc++-v3/include/parallel/search.h b/libstdc++-v3/include/parallel/search.h new file mode 100644 index 00000000000..754150ced9d --- /dev/null +++ b/libstdc++-v3/include/parallel/search.h @@ -0,0 +1,157 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/search.h + * @brief Parallel implementation base for std::search() and + * std::search_n(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SEARCH_H +#define _GLIBCXX_PARALLEL_SEARCH_H 1 + +#include + +#include +#include + + +namespace __gnu_parallel +{ + /** + * @brief Precalculate advances for Knuth-Morris-Pratt algorithm. + * @param elements Begin iterator of sequence to search for. + * @param length Length of sequence to search for. + * @param advances Returned offsets. + */ + template + void + calc_borders(RandomAccessIterator elements, _DifferenceTp length, _DifferenceTp* off) + { + typedef _DifferenceTp difference_type; + + off[0] = -1; + if (length > 1) + off[1] = 0; + difference_type k = 0; + for (difference_type j = 2; j <= length; j++) + { + while ((k >= 0) && (elements[k] != elements[j-1])) + k = off[k]; + off[j] = ++k; + } + } + + // Generic parallel find algorithm (requires random access iterator). + + /** @brief Parallel std::search. + * @param begin1 Begin iterator of first sequence. + * @param end1 End iterator of first sequence. + * @param begin2 Begin iterator of second sequence. + * @param end2 End iterator of second sequence. + * @param pred Find predicate. + * @return Place of finding in first sequences. */ + template + _RandomAccessIterator1 + search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1, + _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2, + Pred pred) + { + typedef std::iterator_traits<_RandomAccessIterator1> traits_type; + typedef typename traits_type::difference_type difference_type; + + _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2)); + + difference_type pattern_length = end2 - begin2; + + // Pattern too short. + if(pattern_length <= 0) + return end1; + + // Last point to start search. + difference_type input_length = (end1 - begin1) - pattern_length; + + // Where is first occurence of pattern? defaults to end. + difference_type res = (end1 - begin1); + + // Pattern too long. + if (input_length < 0) + return end1; + + thread_index_t num_threads = std::max(1, std::min(input_length, __gnu_parallel::get_max_threads())); + + difference_type borders[num_threads + 1]; + __gnu_parallel::equally_split(input_length, num_threads, borders); + + difference_type advances[pattern_length]; + calc_borders(begin2, pattern_length, advances); + +#pragma omp parallel num_threads(num_threads) + { + thread_index_t iam = omp_get_thread_num(); + + difference_type start = borders[iam], stop = borders[iam + 1]; + + difference_type pos_in_pattern = 0; + bool found_pattern = false; + + while (start <= stop && !found_pattern) + { + // Get new value of res. +#pragma omp flush(res) + // No chance for this thread to find first occurence. + if (res < start) + break; + while (pred(begin1[start + pos_in_pattern], begin2[pos_in_pattern])) + { + ++pos_in_pattern; + if (pos_in_pattern == pattern_length) + { + // Found new candidate for res. +#pragma omp critical (res) + res = std::min(res, start); + found_pattern = true; + break; + } + } + // Make safe jump. + start += (pos_in_pattern - advances[pos_in_pattern]); + pos_in_pattern = (advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern]; + } + } + + // Return iterator on found element. + return (begin1 + res); + } +} // end namespace + +#endif diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h new file mode 100644 index 00000000000..006176de46f --- /dev/null +++ b/libstdc++-v3/include/parallel/set_operations.h @@ -0,0 +1,529 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** + * @file parallel/set_operations.h + * @brief Parallel implementations of set operations for random-access + * iterators. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Marius Elvert and Felix Bondarenko. + +#ifndef _GLIBCXX_PARALLEL_SET_OPERATIONS_H +#define _GLIBCXX_PARALLEL_SET_OPERATIONS_H 1 + +#include + +#include +#include + +namespace __gnu_parallel +{ + template + inline OutputIterator + copy_tail(std::pair b, + std::pair e, OutputIterator r) + { + if (b.first != e.first) + { + do + { + *r++ = *b.first++; + } + while (b.first != e.first); + } + else + { + while (b.second != e.second) + *r++ = *b.second++; + } + return r; + } + + template + struct symmetric_difference_func + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair iterator_pair; + + symmetric_difference_func(Comparator c) : comp(c) {} + + Comparator comp; + + inline OutputIterator invoke(InputIterator a, InputIterator b, + InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + ++r; + } + else if (comp(*c, *a)) + { + *r = *c; + ++c; + ++r; + } + else + { + ++a; + ++c; + } + } + return std::copy(c, d, std::copy(a, b, r)); + } + + inline difference_type + count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { + ++a; + ++counter; + } + else if (comp(*c, *a)) + { + ++c; + ++counter; + } + else + { + ++a; + ++c; + } + } + + return counter + (b - a) + (d - c); + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return std::copy(c, d, out); } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + + }; + + + template + struct difference_func + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair iterator_pair; + + difference_func(Comparator c) : comp(c) {} + + Comparator comp; + + inline OutputIterator + invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + ++r; + } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + } + } + return std::copy(a, b, r); + } + + inline difference_type + count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { + ++a; + ++counter; + } + else if (comp(*c, *a)) + { ++c; } + else + { ++a; ++c; } + } + + return counter + (b - a); + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return out; } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + }; + + + template + struct intersection_func + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair iterator_pair; + + intersection_func(Comparator c) : comp(c) {} + + Comparator comp; + + inline OutputIterator + invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, + OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + *r = *a; + ++a; + ++c; + ++r; + } + } + + return r; + } + + inline difference_type + count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + ++counter; + } + } + + return counter; + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return out; } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return out; } + }; + + template + struct union_func + { + typedef typename std::iterator_traits::difference_type difference_type; + + union_func(Comparator c) : comp(c) {} + + Comparator comp; + + inline OutputIterator + invoke(InputIterator a, const InputIterator b, InputIterator c, + const InputIterator d, OutputIterator r) const + { + while (a != b && c != d) + { + if (comp(*a, *c)) + { + *r = *a; + ++a; + } + else if (comp(*c, *a)) + { + *r = *c; + ++c; + } + else + { + *r = *a; + ++a; + ++c; + } + ++r; + } + return std::copy(c, d, std::copy(a, b, r)); + } + + inline difference_type + count(InputIterator a, const InputIterator b, InputIterator c, + const InputIterator d) const + { + difference_type counter = 0; + + while (a != b && c != d) + { + if (comp(*a, *c)) + { ++a; } + else if (comp(*c, *a)) + { ++c; } + else + { + ++a; + ++c; + } + ++counter; + } + + counter += (b - a); + counter += (d - c); + return counter; + } + + inline OutputIterator + first_empty(InputIterator c, InputIterator d, OutputIterator out) const + { return std::copy(c, d, out); } + + inline OutputIterator + second_empty(InputIterator a, InputIterator b, OutputIterator out) const + { return std::copy(a, b, out); } + }; + + template + OutputIterator + parallel_set_operation(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Operation op) + { + _GLIBCXX_CALL((end1 - begin1) + (end2 - begin2)) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + typedef typename std::pair iterator_pair; + + + if (begin1 == end1) + return op.first_empty(begin2, end2, result); + + if (begin2 == end2) + return op.second_empty(begin1, end1, result); + + const difference_type size = (end1 - begin1) + (end2 - begin2); + + thread_index_t num_threads = std::min(std::min(end1 - begin1, end2 - begin2), get_max_threads()); + + difference_type borders[num_threads + 2]; + equally_split(size, num_threads + 1, borders); + + const iterator_pair sequence[ 2 ] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ; + + iterator_pair block_begins[num_threads + 1]; + + // Very start. + block_begins[0] = std::make_pair(begin1, begin2); + difference_type length[num_threads]; + + OutputIterator return_value = result; + +#pragma omp parallel num_threads(num_threads) + { + Timing t; + + t.tic(); + + // Result from multiseq_partition. + InputIterator offset[2]; + const int iam = omp_get_thread_num(); + + const difference_type rank = borders[iam + 1]; + + multiseq_partition(sequence, sequence + 2, rank, offset, op.comp); + + // allowed to read? + // together + // *(offset[ 0 ] - 1) == *offset[ 1 ] + if (offset[ 0 ] != begin1 && offset[ 1 ] != end2 + && !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ]) + && !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1))) + { + // Avoid split between globally equal elements: move one to + // front in first sequence. + --offset[ 0 ]; + } + + iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]); + + t.tic(); + + // Make sure all threads have their block_begin result written out. +#pragma omp barrier + + t.tic(); + + iterator_pair block_begin = block_begins[ iam ]; + + // Begin working for the first block, while the others except + // the last start to count. + if (iam == 0) + { + // The first thread can copy already. + length[ iam ] = op.invoke(block_begin.first, block_end.first, block_begin.second, block_end.second, result) - result; + } + else + { + length[ iam ] = op.count(block_begin.first, block_end.first, + block_begin.second, block_end.second); + } + + t.tic(); + + // Make sure everyone wrote their lengths. +#pragma omp barrier + + t.tic(); + OutputIterator r = result; + + if (iam == 0) + { + // Do the last block. + for (int i = 0; i < num_threads; ++i) + r += length[i]; + + block_begin = block_begins[num_threads]; + + // Return the result iterator of the last block. + return_value = op.invoke(block_begin.first, end1, block_begin.second, end2, r); + + } + else + { + for (int i = 0; i < iam; ++i) + r += length[ i ]; + + // Reset begins for copy pass. + op.invoke(block_begin.first, block_end.first, + block_begin.second, block_end.second, r); + } + + t.tic(); + t.print(); + } + return return_value; + } + + + template + OutputIterator + parallel_set_union(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + union_func< InputIterator, OutputIterator, Comparator>(comp)); + } + + template + OutputIterator + parallel_set_intersection(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + intersection_func(comp)); + } + + + template + OutputIterator + set_intersection(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result) + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + + return set_intersection(begin1, end1, begin2, end2, result, + std::less()); + } + + template + OutputIterator + parallel_set_difference(InputIterator begin1, InputIterator end1, + InputIterator begin2, InputIterator end2, + OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + difference_func(comp)); + } + + template + OutputIterator + parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result, Comparator comp) + { + return parallel_set_operation(begin1, end1, begin2, end2, result, + symmetric_difference_func(comp)); + } + +} + +#endif // _GLIBCXX_SET_ALGORITHM_ + + + + + + + + diff --git a/libstdc++-v3/include/parallel/settings.h b/libstdc++-v3/include/parallel/settings.h new file mode 100644 index 00000000000..cec9d8225c9 --- /dev/null +++ b/libstdc++-v3/include/parallel/settings.h @@ -0,0 +1,388 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/settings.h + * @brief Settings and tuning parameters, heuristics to decide + * whether to use parallelized algorithms. + * This file is a GNU parallel extension to the Standard C++ Library. + * + * @section parallelization_decision The decision whether to run an algorithm in parallel. + * + * There are several ways the user can switch on and off the + * parallel execution of an algorithm, both at compile- and + * run-time. + * + * Only sequential execution can be forced at compile-time. + * This reduces code size and protects code parts that have + * non-thread-safe side effects. + * + * Ultimately forcing parallel execution at compile-time does + * make much sense. + * Often, the sequential algorithm implementation is used as + * a subroutine, so no reduction in code size can be achieved. + * Also, the machine the program is run on might have only one + * processor core, so to avoid overhead, the algorithm is + * executed sequentially. + * + * To force sequential execution of an algorithm ultimately + * at compile-time, the user must add the tag + * __gnu_parallel::sequential_tag() to the end of the + * parameter list, e. g. + * + * \code + * std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag()); + * \endcode + * + * This is compatible with all overloaded algorithm variants. + * No additional code will be instantiated, at all. + * The same holds for most algorithm calls with iterators + * not providing random access. + * + * If the algorithm call is not forced to be executed sequentially + * at compile-time, the decision is made at run-time, for each call. + * First, the two (conceptually) global variables + * __gnu_parallel::Settings::force_sequential and + * __gnu_parallel::Settings::force_parallel are executed. + * If the former one is true, the sequential algorithm is executed. + * If the latter one is true and the former one is false, + * the algorithm is executed in parallel. + * + * If none of these conditions has fired so far, a heuristic is used. + * The parallel algorithm implementation is called only if the + * input size is sufficiently large. + * For most algorithms, the input size is the (combined) length of + * the input sequence(s). + * The threshold can be set by the user, individually for each + * algorithm. + * The according variables are called + * __gnu_parallel::Settings::[algorithm]_minimal_n . + * + * For some of the algorithms, there are even more tuning options, + * e. g. the ability to choose from multiple algorithm variants. + * See the __gnu_parallel::Settings class for details. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_SETTINGS_H +#define _GLIBCXX_PARALLEL_SETTINGS_H 1 + +#include +#include + +/** + * @brief The extensible condition on whether the parallel variant of + * an algorithm sould be called. + * @param c A condition that is overruled by + * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on + * the input size. + */ +#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::Settings::force_parallel)) + +namespace __gnu_parallel +{ + // NB: Including this file cannot produce (unresolved) symbols from + // the OpenMP runtime unless the parallel mode is actually invoked + // and active, which imples that the OpenMP runtime is actually + // going to be linked in. + inline int + get_max_threads() + { return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; } + +namespace +{ + // XXX look at _Tune in mt_allocator.h + /** @brief Run-time settings for the parallel mode. */ + struct Settings + { + /** @brief Different parallel sorting algorithms to choose + from: multi-way mergesort, quicksort, load-balanced + quicksort. */ + enum SortAlgorithm + { MWMS, QS, QS_BALANCED }; + + /** @brief Different merging algorithms: bubblesort-alike, + loser-tree variants, enum sentinel */ + enum MultiwayMergeAlgorithm + { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST }; + + /** @brief Different splitting strategies for sorting/merging: + by sampling, exact */ + enum Splitting + { SAMPLING, EXACT }; + + /** @brief Different partial sum algorithms: recursive, linear */ + enum PartialSumAlgorithm + { RECURSIVE, LINEAR }; + + /** @brief Different find distribution strategies: growing + blocks, equal-sized blocks, equal splitting. */ + enum FindDistribution + { GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT }; + + /** @brief Force all algorithms to be executed sequentially. + * This setting cannot be overwritten. */ + static volatile bool force_sequential; + + /** @brief Force all algorithms to be executed in parallel. + * This setting can be overriden by __gnu_parallel::sequential_tag + * (compile-time), and force_sequential (run-time). */ + static volatile bool force_parallel; + + /** @brief Algorithm to use for sorting. */ + static volatile SortAlgorithm sort_algorithm; + + /** @brief Strategy to use for splitting the input when + sorting (MWMS). */ + static volatile Splitting sort_splitting; + + /** @brief Minimal input size for parallel sorting. */ + static volatile sequence_index_t sort_minimal_n; + /** @brief Oversampling factor for parallel std::sort (MWMS). */ + static volatile unsigned int sort_mwms_oversampling; + /** @brief Such many samples to take to find a good pivot + (quicksort). */ + static volatile unsigned int sort_qs_num_samples_preset; + + /** @brief Maximal subsequence length to swtich to unbalanced + * base case. Applies to std::sort with dynamically + * load-balanced quicksort. */ + static volatile sequence_index_t sort_qsb_base_case_maximal_n; + + /** @brief Minimal input size for parallel std::partition. */ + static volatile sequence_index_t partition_minimal_n; + + /** @brief Chunk size for parallel std::partition. */ + static volatile sequence_index_t partition_chunk_size; + + /** @brief Chunk size for parallel std::partition, relative to + * input size. If >0.0, this value overrides + * partition_chunk_size. */ + static volatile double partition_chunk_share; + + /** @brief Minimal input size for parallel std::nth_element. */ + static volatile sequence_index_t nth_element_minimal_n; + + /** @brief Minimal input size for parallel std::partial_sort. */ + static volatile sequence_index_t partial_sort_minimal_n; + + /** @brief Minimal input size for parallel std::adjacent_difference. */ + static volatile unsigned int adjacent_difference_minimal_n; + + /** @brief Minimal input size for parallel std::partial_sum. */ + static volatile unsigned int partial_sum_minimal_n; + + /** @brief Algorithm to use for std::partial_sum. */ + static volatile PartialSumAlgorithm partial_sum_algorithm; + + /** @brief Assume "sum and write result" to be that factor + * slower than just "sum". This value is used for + * std::partial_sum. */ + static volatile float partial_sum_dilatation; + + /** @brief Minimal input size for parallel std::random_shuffle. */ + static volatile unsigned int random_shuffle_minimal_n; + + /** @brief Minimal input size for parallel std::merge. */ + static volatile sequence_index_t merge_minimal_n; + + /** @brief Splitting strategy for parallel std::merge. */ + static volatile Splitting merge_splitting; + + /** @brief Oversampling factor for parallel std::merge. + * Such many samples per thread are collected. */ + static volatile unsigned int merge_oversampling; + + /** @brief Algorithm to use for parallel + __gnu_parallel::multiway_merge. */ + static volatile MultiwayMergeAlgorithm multiway_merge_algorithm; + + /** @brief Splitting strategy to use for parallel + __gnu_parallel::multiway_merge. */ + static volatile Splitting multiway_merge_splitting; + + //// Oversampling factor for parallel __gnu_parallel::multiway_merge. + static volatile unsigned int multiway_merge_oversampling; + + /// Minimal input size for parallel __gnu_parallel::multiway_merge. + static volatile sequence_index_t multiway_merge_minimal_n; + + /// Oversampling factor for parallel __gnu_parallel::multiway_merge. + static volatile int multiway_merge_minimal_k; + + /** @brief Minimal input size for parallel std::unique_copy. */ + static volatile sequence_index_t unique_copy_minimal_n; + + static volatile sequence_index_t workstealing_chunk_size; + + /** @brief Minimal input size for parallel std::for_each. */ + static volatile sequence_index_t for_each_minimal_n; + + /** @brief Minimal input size for parallel std::count and + std::count_if. */ + static volatile sequence_index_t count_minimal_n; + + /** @brief Minimal input size for parallel std::transform. */ + static volatile sequence_index_t transform_minimal_n; + + /** @brief Minimal input size for parallel std::replace and + std::replace_if. */ + static volatile sequence_index_t replace_minimal_n; + + /** @brief Minimal input size for parallel std::generate. */ + static volatile sequence_index_t generate_minimal_n; + + /** @brief Minimal input size for parallel std::fill. */ + static volatile sequence_index_t fill_minimal_n; + + /** @brief Minimal input size for parallel std::min_element. */ + static volatile sequence_index_t min_element_minimal_n; + + /** @brief Minimal input size for parallel std::max_element. */ + static volatile sequence_index_t max_element_minimal_n; + + /** @brief Minimal input size for parallel std::accumulate. */ + static volatile sequence_index_t accumulate_minimal_n; + + /** @brief Distribution strategy for parallel std::find. */ + static volatile FindDistribution find_distribution; + + /** @brief Start with looking for that many elements + sequentially, for std::find. */ + static volatile sequence_index_t find_sequential_search_size; + + /** @brief Initial block size for parallel std::find. */ + static volatile sequence_index_t find_initial_block_size; + + /** @brief Maximal block size for parallel std::find. */ + static volatile sequence_index_t find_maximum_block_size; + + /** @brief Block size increase factor for parallel std::find. */ + static volatile double find_increasing_factor; + + //set operations + /** @brief Minimal input size for parallel std::set_union. */ + static volatile sequence_index_t set_union_minimal_n; + + /** @brief Minimal input size for parallel + std::set_symmetric_difference. */ + static volatile sequence_index_t set_symmetric_difference_minimal_n; + + /** @brief Minimal input size for parallel std::set_difference. */ + static volatile sequence_index_t set_difference_minimal_n; + + /** @brief Minimal input size for parallel std::set_intersection. */ + static volatile sequence_index_t set_intersection_minimal_n; + + //hardware dependent tuning parameters + /** @brief Size of the L1 cache in bytes (underestimation). */ + static volatile unsigned long long L1_cache_size; + + /** @brief Size of the L2 cache in bytes (underestimation). */ + static volatile unsigned long long L2_cache_size; + + /** @brief Size of the Translation Lookaside Buffer + (underestimation). */ + static volatile unsigned int TLB_size; + + /** @brief Overestimation of cache line size. Used to avoid + * false sharing, i. e. elements of different threads are at + * least this amount apart. */ + static unsigned int cache_line_size; + + //statistics + /** @brief Statistic on the number of stolen ranges in + load-balanced quicksort.*/ + static volatile sequence_index_t qsb_steals; + }; + + volatile bool Settings::force_parallel = false; + volatile bool Settings::force_sequential = false; + volatile Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS; + volatile Settings::Splitting Settings::sort_splitting = Settings::EXACT; + volatile sequence_index_t Settings::sort_minimal_n = 1000; + + volatile unsigned int Settings::sort_mwms_oversampling = 10; + volatile unsigned int Settings::sort_qs_num_samples_preset = 100; + volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100; + volatile sequence_index_t Settings::partition_minimal_n = 1000; + volatile sequence_index_t Settings::nth_element_minimal_n = 1000; + volatile sequence_index_t Settings::partial_sort_minimal_n = 1000; + volatile sequence_index_t Settings::partition_chunk_size = 1000; + volatile double Settings::partition_chunk_share = 0.0; + volatile unsigned int Settings::adjacent_difference_minimal_n = 1000; + volatile Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR; + volatile unsigned int Settings::partial_sum_minimal_n = 1000; + volatile float Settings::partial_sum_dilatation = 1.0f; + volatile unsigned int Settings::random_shuffle_minimal_n = 1000; + volatile Settings::Splitting Settings::merge_splitting = Settings::EXACT; + volatile sequence_index_t Settings::merge_minimal_n = 1000; + volatile unsigned int Settings::merge_oversampling = 10; + volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000; + volatile int Settings::multiway_merge_minimal_k = 2; + + // unique copy + volatile sequence_index_t Settings::unique_copy_minimal_n = 10000; + volatile Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE; + volatile Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT; + volatile unsigned int Settings::multiway_merge_oversampling = 10; + volatile Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS; + volatile sequence_index_t Settings::find_sequential_search_size = 256; + volatile sequence_index_t Settings::find_initial_block_size = 256; + volatile sequence_index_t Settings::find_maximum_block_size = 8192; + volatile double Settings::find_increasing_factor = 2.0; + volatile sequence_index_t Settings::workstealing_chunk_size = 100; + volatile sequence_index_t Settings::for_each_minimal_n = 1000; + volatile sequence_index_t Settings::count_minimal_n = 1000; + volatile sequence_index_t Settings::transform_minimal_n = 1000; + volatile sequence_index_t Settings::replace_minimal_n = 1000; + volatile sequence_index_t Settings::generate_minimal_n = 1000; + volatile sequence_index_t Settings::fill_minimal_n = 1000; + volatile sequence_index_t Settings::min_element_minimal_n = 1000; + volatile sequence_index_t Settings::max_element_minimal_n = 1000; + volatile sequence_index_t Settings::accumulate_minimal_n = 1000; + + //set operations + volatile sequence_index_t Settings::set_union_minimal_n = 1000; + volatile sequence_index_t Settings::set_intersection_minimal_n = 1000; + volatile sequence_index_t Settings::set_difference_minimal_n = 1000; + volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000; + volatile unsigned long long Settings::L1_cache_size = 16 << 10; + volatile unsigned long long Settings::L2_cache_size = 256 << 10; + volatile unsigned int Settings::TLB_size = 128; + unsigned int Settings::cache_line_size = 64; + + //statistics + volatile sequence_index_t Settings::qsb_steals = 0; +} // end anonymous namespace + +} + +#endif /* _GLIBCXX_SETTINGS_H */ diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h new file mode 100644 index 00000000000..6b20edd9fab --- /dev/null +++ b/libstdc++-v3/include/parallel/sort.h @@ -0,0 +1,104 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/sort.h + * @brief Parallel sorting algorithm switch. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_SORT_H +#define _GLIBCXX_PARALLEL_SORT_H 1 + +#include +#include +#include + +#if _GLIBCXX_ASSERTIONS +#include +#endif + +#if _GLIBCXX_MERGESORT +#include +#endif + +#if _GLIBCXX_QUICKSORT +#include +#endif + +#if _GLIBCXX_BAL_QUICKSORT +#include +#endif + +namespace __gnu_parallel +{ + /** + * @brief Choose a parallel sorting algorithm. + * @param begin Begin iterator of input sequence. + * @param end End iterator of input sequence. + * @param comp Comparator. + * @param stable Sort stable. + * @callgraph + */ + template + inline void + parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, bool stable) + { + _GLIBCXX_CALL(end - begin) + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + if (begin != end) + { + difference_type n = end - begin; + + if (false) ; +#if _GLIBCXX_MERGESORT + else if (Settings::sort_algorithm == Settings::MWMS || stable) + parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); +#endif +#if _GLIBCXX_QUICKSORT + else if (Settings::sort_algorithm == Settings::QS && !stable) + parallel_sort_qs(begin, end, comp, n, get_max_threads()); +#endif +#if _GLIBCXX_BAL_QUICKSORT + else if (Settings::sort_algorithm == Settings::QS_BALANCED && !stable) + parallel_sort_qsb(begin, end, comp, n, get_max_threads()); +#endif + else + __gnu_sequential::sort(begin, end, comp); + } + } +} // end namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h new file mode 100644 index 00000000000..80926b44384 --- /dev/null +++ b/libstdc++-v3/include/parallel/tags.h @@ -0,0 +1,124 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** + * @file parallel/tags.h + * @brief Tags for compile-time options. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TAGS_H +#define _GLIBCXX_PARALLEL_TAGS_H 1 + +// Parallel mode namespaces. +namespace std +{ + namespace __parallel { } +} + +/** + * @namespace __gnu_sequential + * @brief GNU sequential classes for public use. + */ +namespace __gnu_sequential { } + +/** + * @namespace __gnu_parallel + * @brief GNU parallel classes for public use. + */ +namespace __gnu_parallel +{ + // Import all the parallel versions of components in namespace std. + using namespace std::__parallel; + + enum parallelism + { + /// Not parallel. + sequential, + + /// Parallel unbalanced (equal-sized chunks). + parallel_unbalanced, + + /// Parallel balanced (work-stealing). + parallel_balanced, + + /// Parallel with OpenMP dynamic load-balancing. + parallel_omp_loop, + + /// Parallel with OpenMP static load-balancing. + parallel_omp_loop_static, + + /// Parallel with OpenMP taskqueue construct. + parallel_taskqueue + }; + + inline bool + is_parallel(const parallelism __p) { return __p != sequential; } + + + /** @brief Forces sequential execution at compile time. */ + struct sequential_tag { }; + + /** @brief Recommends parallel execution at compile time. */ + struct parallel_tag { }; + + /** @brief Recommends parallel execution using dynamic + load-balancing at compile time. */ + struct balanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using static + load-balancing at compile time. */ + struct unbalanced_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP dynamic + load-balancing at compile time. */ + struct omp_loop_tag : public parallel_tag { }; + + /** @brief Recommends parallel execution using OpenMP static + load-balancing at compile time. */ + struct omp_loop_static_tag : public parallel_tag { }; + + + /** @brief Selects the growing block size variant for std::find(). + @see _GLIBCXX_FIND_GROWING_BLOCKS */ + struct growing_blocks_tag { }; + + /** @brief Selects the constant block size variant for std::find(). + @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */ + struct constant_size_blocks_tag { }; + + /** @brief Selects the equal splitting variant for std::find(). + @see _GLIBCXX_FIND_EQUAL_SPLIT */ + struct equal_split_tag { }; +} + +#endif /* _GLIBCXX_TAGS_H */ diff --git a/libstdc++-v3/include/parallel/timing.h b/libstdc++-v3/include/parallel/timing.h new file mode 100644 index 00000000000..f1f75225c15 --- /dev/null +++ b/libstdc++-v3/include/parallel/timing.h @@ -0,0 +1,217 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/timing.h + * @brief Provides a simple tool to do performance debugging, also in + * parallel code. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_TIMING_H +#define _GLIBCXX_PARALLEL_TIMING_H 1 + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + // XXX integrate with existing performance testing infrastructure. + /** @brief Type of of point in time, used for the Timing classes. */ + typedef double point_in_time; + + template + class Timing; + + /** @brief A class that provides simple run time measurements, also + for parallel code. + * @param tag If parallel_tag, then the measurements are actually done. + * Otherwise, no code at all is emitted by the compiler. */ + template + class Timing + { + private: + static const int max_points_in_time = 100; + point_in_time points_in_time[max_points_in_time]; + point_in_time active, last_start; + int pos; + char* str; + const char* tags[max_points_in_time]; + + public: + Timing() + { + str = NULL; + pos = 0; + active = 0.0; + last_start = -1.0; + } + + ~Timing() + { + delete[] str; + } + + /** @brief Take a running time measurement. + * @param tag Optional description that will be output again with + * the timings. + * It should describe the operation before the tic(). To time a + * series of @c n operations, there should be @c n+1 calls to + * tic(), and one call to print(). */ + inline void + tic(const char* tag = NULL) + { + points_in_time[pos] = omp_get_wtime(); + tags[pos] = tag; + pos++; + } + + /** @brief Start the running time measurement. + * + * Should be paired with stop(). */ + inline void + start() + { + _GLIBCXX_PARALLEL_ASSERT(last_start == -1.0); + last_start = omp_get_wtime(); + } + + /** @brief Stop the running time measurement. + * + * Should be paired with start(). */ + inline void + stop() + { + _GLIBCXX_PARALLEL_ASSERT(last_start != -1.0); + active += (omp_get_wtime() - last_start); + last_start = -1.0; + } + + /** @brief Reset running time accumulation. */ + inline void + reset() + { + active = 0.0; + last_start = -1.0; + } + + /** @brief Accumulate the time between all pairs of start() and + stop() so far */ + inline point_in_time + active_time() + { return active; } + + /** @brief Total time between first and last tic() */ + inline point_in_time + total_time() + { return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; } + + private: + /** @brief Construct string to print out, presenting the timings. */ + const char* + c_str() + { + // Avoid stream library here, to avoid cyclic dependencies in + // header files. + char tmp[1000]; + + if (!str) + str = new char[pos * 200]; + else + str[0] = '\0'; + + sprintf(str, "t %2d T[ms]", omp_get_thread_num()); + strcat(str, "\n"); + + for (int i = 0; i < pos; ) + { + point_in_time last = points_in_time[i]; + i++; + if (i == pos) + break; + if (tags[i] == NULL) + sprintf(tmp, "%2d: ", i - 1); + else + sprintf(tmp, "%20s: ", tags[i]); + strcat(str, tmp); + + sprintf(tmp, "%7.2f ", (points_in_time[i] - last) * 1000.0); + strcat(str, tmp); + strcat(str, "\n"); + } + + return str; + } + + public: + /** @brief Print the running times between the tic()s. */ + void + print() + { + printf("print\n"); +#pragma omp barrier +#pragma omp master + printf("\n\n"); +#pragma omp critical + printf("%s\n", c_str()); + } + }; + + /** @brief A class that provides simple run time measurements, also + for parallel code. + * @param tag If parallel_tag, then the measurements are actually done, + * otherwise, no code at all is emitted by the compiler. + */ + template + class Timing + { + private: + static const char* empty_string; + + public: + inline void tic(const char* /*tag*/ = NULL) { } + inline void start() { } + inline void stop() { } + inline void reset() { } + inline point_in_time active_time() { return -1.0; } + inline point_in_time total_time() { return -1.0; } + inline const char* c_str() { return empty_string; } + inline void print() { } + }; + + template + const char* Timing::empty_string = ""; + +} + +#endif diff --git a/libstdc++-v3/include/parallel/tree.h b/libstdc++-v3/include/parallel/tree.h new file mode 100644 index 00000000000..8aa9269394d --- /dev/null +++ b/libstdc++-v3/include/parallel/tree.h @@ -0,0 +1,3574 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/tree.h + * @brief Parallel red-black tree operations. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Leonor Frias Moya, Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_TREE_H +#define _GLIBCXX_PARALLEL_TREE_H 1 + +#include +#include +#include +#include +#include +#include +#include +//#include +#include + +#include + +//#define _GLIBCXX_TIMING +#ifdef _GLIBCXX_TIMING +#define _timing_tag parallel_tag +#else +#define _timing_tag sequential_tag +#endif + +namespace std +{ + // XXX Declaration should go to stl_tree.h. + void + _Rb_tree_rotate_left(_Rb_tree_node_base* const __x, + _Rb_tree_node_base*& __root); + + void + _Rb_tree_rotate_right(_Rb_tree_node_base* const __x, + _Rb_tree_node_base*& __root); +} + + +namespace __gnu_parallel +{ + // XXX move into parallel/type_traits.h if doesn't work. + /** @brief Helper class: remove the const modifier from the first + component, if present. Set kind component. + * @param T Simple type, nothing to unconst */ + template + struct unconst_first_component + { + /** @brief New type after removing the const */ + typedef T type; + }; + + /** @brief Helper class: remove the const modifier from the first + component, if present. Map kind component + * @param Key First component, from which to remove the const modifier + * @param Load Second component + * @sa unconst_first_component */ + template + struct unconst_first_component > + { + /** @brief New type after removing the const */ + typedef std::pair type; + }; + + /** @brief Helper class: set the appropriate comparator to deal with + * repetitions. Comparator for unique dictionaries. + * + * StrictlyLess and LessEqual are part of a mechanism to deal with + * repetitions transparently whatever the actual policy is. + * @param _Key Keys to compare + * @param _Compare Comparator equal to conceptual < */ + template + struct StrictlyLess : public std::binary_function<_Key, _Key, bool> + { + /** @brief Comparator equal to conceptual < */ + _Compare c; + + /** @brief Constructor given a Comparator */ + StrictlyLess(const _Compare& _c) : c(_c) { } + + /** @brief Copy constructor */ + StrictlyLess(const StrictlyLess<_Key, _Compare>& strictly_less) + : c(strictly_less.c) { } + + /** @brief Operator() */ + bool operator()(const _Key& k1, const _Key& k2) const + { + return c(k1, k2); + } + }; + + /** @brief Helper class: set the appropriate comparator to deal with + * repetitions. Comparator for non-unique dictionaries. + * + * StrictlyLess and LessEqual are part of a mechanism to deal with + * repetitions transparently whatever the actual policy is. + * @param _Key Keys to compare + * @param _Compare Comparator equal to conceptual <= */ + template + struct LessEqual : public std::binary_function<_Key, _Key, bool> + { + /** @brief Comparator equal to conceptual < */ + _Compare c; + + /** @brief Constructor given a Comparator */ + LessEqual(const _Compare& _c) : c(_c) { } + + /** @brief Copy constructor */ + LessEqual(const LessEqual<_Key, _Compare>& less_equal) + : c(less_equal.c) { } + + /** @brief Operator() */ + bool operator()(const _Key& k1, const _Key& k2) const + { return !c(k2, k1); } + }; + + + /** @brief Parallel red-black tree. + * + * Extension of the sequential red-black tree. Specifically, + * parallel bulk insertion operations are provided. + * @param _Key Keys to compare + * @param _Val Elements to store in the tree + * @param _KeyOfValue Obtains the key from an element < + * @param _Compare Comparator equal to conceptual < + * @param _Alloc Allocator for the elements */ + template > + class _Rb_tree : public std::_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc> + { + private: + /** @brief Sequential tree */ + typedef std::_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc> base_type; + + /** @brief Renaming of base node type */ + typedef typename std::_Rb_tree_node<_Val> _Rb_tree_node; + + /** @brief Renaming of libstdc++ node type */ + typedef typename std::_Rb_tree_node_base _Rb_tree_node_base; + + /** @brief Renaming of base key_type */ + typedef typename base_type::key_type key_type; + + /** @brief Renaming of base value_type */ + typedef typename base_type::value_type value_type; + + /** @brief Helper class to unconst the first component of + * value_type if exists. + * + * This helper class is needed for map, but may discard qualifiers + * for set; however, a set with a const element type is not useful + * and should fail in some other place anyway. + */ + typedef typename unconst_first_component::type nc_value_type; + + /** @brief Pointer to a node */ + typedef _Rb_tree_node* _Rb_tree_node_ptr; + + /** @brief Wrapper comparator class to deal with repetitions + transparently according to dictionary type with key _Key and + comparator _Compare. Unique dictionaries object + */ + StrictlyLess<_Key, _Compare> strictly_less; + + /** @brief Wrapper comparator class to deal with repetitions + transparently according to dictionary type with key _Key and + comparator _Compare. Non-unique dictionaries object + */ + LessEqual<_Key, _Compare> less_equal; + + public: + /** @brief Renaming of base size_type */ + typedef typename base_type::size_type size_type; + + /** @brief Constructor with a given comparator and allocator. + * + * Delegates the basic initialization to the sequential class and + * initializes the helper comparators of the parallel class + * @param c Comparator object with which to initialize the class + * comparator and the helper comparators + * @param a Allocator object with which to initialize the class comparator + */ + _Rb_tree(const _Compare& c, const _Alloc& a) + : base_type(c, a), strictly_less(base_type::_M_impl._M_key_compare), less_equal(base_type::_M_impl._M_key_compare) + { } + + /** @brief Copy constructor. + * + * Delegates the basic initialization to the sequential class and + * initializes the helper comparators of the parallel class + * @param __x Parallel red-black instance to copy + */ + _Rb_tree(const _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __x) + : base_type(__x), strictly_less(base_type::_M_impl._M_key_compare), less_equal(base_type::_M_impl._M_key_compare) + { } + + /** @brief Parallel replacement of the sequential + * std::_Rb_tree::_M_insert_unique() + * + * Parallel bulk insertion and construction. If the container is + * empty, bulk construction is performed. Otherwise, bulk + * insertion is performed + * @param __first First element of the input + * @param __last Last element of the input + */ + template + void + _M_insert_unique(_InputIterator __first, _InputIterator __last) + { + if (__first==__last) return; + if (_GLIBCXX_PARALLEL_CONDITION(true)) + if (base_type::_M_impl._M_node_count == 0) + { + _M_bulk_insertion_construction(__first, __last, true, strictly_less); + _GLIBCXX_PARALLEL_ASSERT(rb_verify()); + } + else + { + _M_bulk_insertion_construction(__first, __last, false, strictly_less); + _GLIBCXX_PARALLEL_ASSERT(rb_verify()); + } + else + { + base_type::_M_insert_unique(__first, __last); + } + } + + /** @brief Parallel replacement of the sequential + * std::_Rb_tree::_M_insert_equal() + * + * Parallel bulk insertion and construction. If the container is + * empty, bulk construction is performed. Otherwise, bulk + * insertion is performed + * @param __first First element of the input + * @param __last Last element of the input */ + template + void + _M_insert_equal(_InputIterator __first, _InputIterator __last) + { + if (__first==__last) return; + if (_GLIBCXX_PARALLEL_CONDITION(true)) + if (base_type::_M_impl._M_node_count == 0) + _M_bulk_insertion_construction(__first, __last, true, less_equal); + else + _M_bulk_insertion_construction(__first, __last, false, less_equal); + else + base_type::_M_insert_equal(__first, __last); + _GLIBCXX_PARALLEL_ASSERT(rb_verify()); + } + + private: + + /** @brief Helper class of _Rb_tree: node linking. + * + * Nodes linking forming an almost complete tree. The last level + * is coloured red, the rest are black + * @param ranker Calculates the position of a node in an array of nodes + */ + template + class nodes_initializer + { + /** @brief Renaming of tree size_type */ + + typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type; + public: + + /** @brief mask[%i]= 0..01..1, where the number of 1s is %i+1 */ + size_type mask[sizeof(size_type)*8]; + + /** @brief Array of nodes (initial address) */ + const _Rb_tree_node_ptr* r_init; + + /** @brief Total number of (used) nodes */ + size_type n; + + /** @brief Rank of the last tree node that can be calculated + taking into account a complete tree + */ + size_type splitting_point; + + /** @brief Rank of the tree root */ + size_type rank_root; + + /** @brief Height of the tree */ + int height; + + /** @brief Number of threads into which divide the work */ + const thread_index_t num_threads; + + /** @brief Helper object to mind potential gaps in r_init */ + const ranker& rank; + + /** @brief Constructor + * @param r Array of nodes + * @param _n Total number of (used) nodes + * @param _num_threads Number of threads into which divide the work + * @param _rank Helper object to mind potential gaps in @c r_init */ + nodes_initializer(const _Rb_tree_node_ptr* r, const size_type _n, const thread_index_t _num_threads, const ranker& _rank): + r_init(r), + n(_n), + num_threads(_num_threads), + rank(_rank) + { + height = log2(n); + splitting_point = 2 * (n - ((1 << height) - 1)) -1; + + // Rank root. + size_type max = 1 << (height + 1); + rank_root= (max-2) >> 1; + if (rank_root > splitting_point) + rank_root = complete_to_original(rank_root); + + mask[0] = 0x1; + for (unsigned int i = 1; i < sizeof(size_type)*8; ++i) + { + mask[i] = (mask[i-1] << 1) + 1; + } + } + + /** @brief Query for tree height + * @return Tree height */ + int get_height() const + { + return height; + } + + /** @brief Query for the splitting point + * @return Splitting point */ + size_type get_shifted_splitting_point() const + { + return rank.get_shifted_rank(splitting_point, 0); + } + + /** @brief Query for the tree root node + * @return Tree root node */ + _Rb_tree_node_ptr get_root() const + { + return r_init[rank.get_shifted_rank(rank_root,num_threads/2)]; + } + + /** @brief Calculation of the parent position in the array of nodes + * @hideinitializer */ +#define CALCULATE_PARENT \ + if (p_s> splitting_point) \ + p_s = complete_to_original(p_s); \ + int s_r = rank.get_shifted_rank(p_s,iam); \ + r->_M_parent = r_init[s_r]; \ + \ + /** @brief Link a node with its parent and children taking into + account that its rank (without gaps) is different to that in + a complete tree + * @param r Pointer to the node + * @param iam Partition of the array in which the node is, where + * iam is in [0..num_threads) + * @sa link_complete */ + void link_incomplete(const _Rb_tree_node_ptr& r, const int iam) const + { + size_type real_pos = rank.get_real_rank(&r-r_init, iam); + size_type l_s, r_s, p_s; + int mod_pos= original_to_complete(real_pos); + int zero= first_0_right(mod_pos); + + // 1. Convert n to n', where n' will be its rank if the tree + // was complete + // 2. Calculate neighbours for n' + // 3. Convert the neighbours n1', n2' and n3' to their + // appropiate values n1, n2, n3. Note that it must be + // checked that this neighbours reallly exist. + calculate_shifts_pos_level(mod_pos, zero, l_s, r_s, p_s); + if (l_s > splitting_point) + { + _GLIBCXX_PARALLEL_ASSERT(r_s > splitting_point); + if (zero == 1) + { + r->_M_left = 0; + r->_M_right = 0; + } + else + { + r->_M_left= r_init[rank.get_shifted_rank(complete_to_original(l_s),iam)]; + r->_M_right= r_init[rank.get_shifted_rank(complete_to_original(r_s),iam)]; + } + + } + else{ + r->_M_left= r_init[rank.get_shifted_rank(l_s,iam)]; + if (zero != 1) + { + r->_M_right= r_init[rank.get_shifted_rank(complete_to_original(r_s),iam)]; + } + else + { + r->_M_right = 0; + } + } + r->_M_color = std::_S_black; + CALCULATE_PARENT; + } + + /** @brief Link a node with its parent and children taking into + account that its rank (without gaps) is the same as that in + a complete tree + * @param r Pointer to the node + * @param iam Partition of the array in which the node is, where + * iam is in [0..@c num_threads) + * @sa link_incomplete + */ + void link_complete(const _Rb_tree_node_ptr& r, const int iam) const + { + size_type real_pos = rank.get_real_rank(&r-r_init, iam); + size_type p_s; + + // Test if it is a leaf on the last not necessarily full level + if ((real_pos & mask[0]) == 0) + { + if ((real_pos & 0x2) == 0) + p_s = real_pos + 1; + else + p_s = real_pos - 1; + r->_M_color = std::_S_red; + r->_M_left = 0; + r->_M_right = 0; + } + else + { + size_type l_s, r_s; + int zero = first_0_right(real_pos); + calculate_shifts_pos_level(real_pos, zero, l_s, r_s, p_s); + r->_M_color = std::_S_black; + + r->_M_left = r_init[rank.get_shifted_rank(l_s,iam)]; + if (r_s > splitting_point) + r_s = complete_to_original(r_s); + r->_M_right = r_init[rank.get_shifted_rank(r_s,iam)]; + } + CALCULATE_PARENT; + } + +#undef CALCULATE_PARENT + + private: + /** @brief Change of "base": Convert the rank in the actual tree + into the corresponding rank if the tree was complete + * @param pos Rank in the actual incomplete tree + * @return Rank in the corresponding complete tree + * @sa complete_to_original */ + int original_to_complete(const int pos) const + { + return (pos << 1) - splitting_point; + } + + /** @brief Change of "base": Convert the rank if the tree was + complete into the corresponding rank in the actual tree + * @param pos Rank in the complete tree + * @return Rank in the actual incomplete tree + * @sa original_to_complete */ + int complete_to_original(const int pos) const + { + return (pos + splitting_point) >> 1; + } + + + /** @brief Calculate the rank in the complete tree of the parent + and children of a node + * @param pos Rank in the complete tree of the node whose parent + * and children rank must be calculated + * @param level Tree level in which the node at pos is in + * (starting to count at leaves). @pre @c level > 1 + * @param left_shift Rank in the complete tree of the left child + * of pos (out parameter) + * @param right_shift Rank in the complete tree of the right + * child of pos (out parameter) + * @param parent_shift Rank in the complete tree of the parent + * of pos (out parameter) + */ + void calculate_shifts_pos_level(const size_type pos, const int level, size_type& left_shift, size_type& right_shift, size_type& parent_shift) const + { + int stride = 1 << (level -1); + left_shift = pos - stride; + right_shift = pos + stride; + if (((pos >> (level + 1)) & 0x1) == 0) + parent_shift = pos + 2*stride; + else + parent_shift = pos - 2*stride; + } + + /** @brief Search for the first 0 bit (growing the weight) + * @param x Binary number (corresponding to a rank in the tree) + * whose first 0 bit must be calculated + * @return Position of the first 0 bit in @c x (starting to + * count with 1) + */ + int first_0_right(const size_type x) const + { + if ((x & 0x2) == 0) + return 1; + else + return first_0_right_bs(x); + } + + /** @brief Search for the first 0 bit (growing the weight) using + * binary search + * + * Binary search can be used instead of a naïve loop using the + * masks in mask array + * @param x Binary number (corresponding to a rank in the tree) + * whose first 0 bit must be calculated + * @param k_beg Position in which to start searching. By default is 2. + * @return Position of the first 0 bit in x (starting to count with 1) */ + int first_0_right_bs(const size_type x, int k_beg=2) const + { + int k_end = sizeof(size_type)*8; + size_type not_x = x ^ mask[k_end-1]; + while ((k_end-k_beg) > 1) + { + int k = k_beg + (k_end-k_beg)/2; + if ((not_x & mask[k-1]) != 0) + k_end = k; + else + k_beg = k; + } + return k_beg; + } + }; + + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + /** @brief Helper class of nodes_initializer: mind the gaps of an + array of nodes. + * + * Get absolute positions in an array of nodes taking into account + * the gaps in it @sa ranker_no_gaps + */ + class ranker_gaps + { + /** @brief Renaming of tree's size_type */ + typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type; + + /** @brief Array containing the beginning ranks of all the + num_threads partitions just considering the valid nodes, not + the gaps */ + size_type* beg_partition; + + /** @brief Array containing the beginning ranks of all the + num_threads partitions considering the valid nodes and the + gaps */ + const size_type* beg_shift_partition; + + /** @brief Array containing the number of accumulated gaps at + the beginning of each partition */ + const size_type* rank_shift; + + /** @brief Number of partitions (and threads that work on it) */ + const thread_index_t num_threads; + + public: + /** @brief Constructor + * @param size_p Pointer to the array containing the beginning + * ranks of all the @c _num_threads partitions considering the + * valid nodes and the gaps + * @param shift_r Array containing the number of accumulated + * gaps at the beginning of each partition + * @param _num_threads Number of partitions (and threads that + * work on it) */ + ranker_gaps(const size_type* size_p, const size_type* shift_r, const thread_index_t _num_threads) : + beg_shift_partition(size_p), + rank_shift(shift_r), + num_threads(_num_threads) + { + beg_partition = new size_type[num_threads+1]; + beg_partition[0] = 0; + for (int i = 1; i <= num_threads; ++i) + { + beg_partition[i] = beg_partition[i-1] + (beg_shift_partition[i] - beg_shift_partition[i-1]) - (rank_shift[i] - rank_shift[i-1]); + + } + + // Ghost element, strictly larger than any index requested. + ++beg_partition[num_threads]; + } + + /** @brief Destructor + * Needs to be defined to deallocate the dynamic memory that has + * been allocated for beg_partition array + */ + ~ranker_gaps() + { + delete[] beg_partition; + } + + /** @brief Convert a rank in the array of nodes considering + valid nodes and gaps, to the corresponding considering only + the valid nodes + * @param pos Rank in the array of nodes considering valid nodes and gaps + * @param index Partition which the rank belongs to + * @return Rank in the array of nodes considering only the valid nodes + * @sa get_shifted_rank + */ + size_type get_real_rank(const size_type pos, const int index) const + { + return pos - rank_shift[index]; + } + + /** @brief Inverse of get_real_rank: Convert a rank in the array + of nodes considering only valid nodes, to the corresponding + considering valid nodes and gaps + * @param pos Rank in the array of nodes considering only valid nodes + * @param index Partition which the rank is most likely to + * belong to (ie. the corresponding if there were no gaps) + * @pre 0 <= @c pos <= number_of_distinct_elements + * @return Rank in the array of nodes considering valid nodes and gaps + * @post 0 <= @c return <= number_of_elements + * @sa get_real_rank() + */ + size_type get_shifted_rank(const size_type pos, const int index) const + { + // Heuristic. + if (beg_partition[index] <= pos and pos < beg_partition[index+1]) + return pos + rank_shift[index]; + else + // Called rarely, do not hinder inlining. + return get_shifted_rank_loop(pos,index); + } + + /** @brief Helper method of get_shifted_rank: in case the given + index in get_shifted_rank is not correct, look for it and + then calculate the rank + * @param pos Rank in the array of nodes considering only valid nodes + * @param index Partition which the rank should have belong to + * if there were no gaps + * @return Rank in the array of nodes considering valid nodes and gaps + */ + size_type get_shifted_rank_loop(const size_type pos, int index) const + { + while (pos >= beg_partition[index+1]) + ++index; + while (pos < beg_partition[index]) + --index; + _GLIBCXX_PARALLEL_ASSERT(0 <= index && index < num_threads); + return pos + rank_shift[index]; + } + }; + + /** @brief Helper class of nodes_initializer: access an array of + * nodes with no gaps + * + * Get absolute positions in an array of nodes taking into account + * that there are no gaps in it. @sa ranker_gaps */ + class ranker_no_gaps + { + /** @brief Renaming of tree's size_type */ + typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type; + + public: + /** @brief Convert a rank in the array of nodes considering + * valid nodes and gaps, to the corresponding considering only + * the valid nodes + * + * As there are no gaps in this case, get_shifted_rank() and + * get_real_rank() are synonyms and make no change on pos + * @param pos Rank in the array of nodes considering valid nodes and gaps + * @param index Partition which the rank belongs to, unused here + * @return Rank in the array of nodes considering only the valid nodes */ + size_type get_real_rank(const size_type pos, const int index) const + { + return pos; + } + + /** @brief Inverse of get_real_rank: Convert a rank in the array + * of nodes considering only valid nodes, to the corresponding + * considering valid nodes and gaps + * + * As there are no gaps in this case, get_shifted_rank() and + * get_real_rank() are synonyms and make no change on pos + * @param pos Rank in the array of nodes considering only valid nodes + * @param index Partition which the rank belongs to, unused here + * @return Rank in the array of nodes considering valid nodes and gaps + */ + size_type get_shifted_rank(const size_type pos, const int index) const + { + return pos; + } + }; + + + /** @brief Helper comparator class: Invert a binary comparator + * @param _Comp Comparator to invert + * @param _Iterator Iterator to the elements to compare */ + template + class gr_or_eq + { + /** @brief Renaming value_type of _Iterator */ + typedef typename std::iterator_traits<_Iterator>::value_type value_type; + + /** @brief Comparator to be inverted */ + const _Comp comp; + + public: + /** @brief Constructor + * @param c Comparator */ + gr_or_eq(const _Comp& c) : comp(c) { } + + /** @brief Operator() + * @param a First value to compare + * @param b Second value to compare */ + bool operator()(const value_type& a, const value_type& b) const + { + if (not (comp(_KeyOfValue()(a), _KeyOfValue()(b)))) + return true; + return false; + } + }; + + /** @brief Helper comparator class: Passed as a parameter of + list_partition to check that a sequence is sorted + * @param _InputIterator Iterator to the elements to compare + * @param _CompIsSorted Comparator to check for sortedness */ + template + class is_sorted_functor + { + /** @brief Element to compare with (first parameter of comp) */ + _InputIterator prev; + + /** @brief Comparator to check for sortedness */ + const _CompIsSorted comp; + + /** @brief Sum up the history of the operator() of this + * comparator class Its value is true if all calls to comp from + * this class have returned true. It is false otherwise */ + bool sorted; + + public: + /** @brief Constructor + * + * Sorted is set to true + * @param first Element to compare with the first time the + * operator() is called + * @param c Comparator to check for sortednes */ + is_sorted_functor(const _InputIterator first, const _CompIsSorted c) + : prev(first), comp(c), sorted(true) { } + + /** @brief Operator() with only one explicit parameter. Updates + the class member @c prev and sorted. + * @param it Iterator to the element which must be compared to + * the element pointed by the the class member @c prev */ + void operator()(const _InputIterator it) + { + if (sorted and it != prev and comp(_KeyOfValue()(*it),_KeyOfValue()(*prev))) + sorted = false; + prev = it; + } + + /** @brief Query method for sorted + * @return Current value of sorted */ + bool is_sorted() const + { + return sorted; + } + }; + + /** @brief Helper functor: sort the input based upon elements + instead of keys + * @param KeyComparator Comparator for the key of values */ + template + class ValueCompare + : public std::binary_function + { + /** @brief Comparator for the key of values */ + const KeyComparator comp; + + public: + /** @brief Constructor + * @param c Comparator for the key of values */ + ValueCompare(const KeyComparator& c): comp(c) { } + + /** @brief Operator(): Analogous to comp but for values and not keys + * @param v1 First value to compare + * @param v2 Second value to compare + * @return Result of the comparison */ + bool operator()(const value_type& v1, const value_type& v2) const + { return comp(_KeyOfValue()(v1),_KeyOfValue()(v2)); } + }; + + /** @brief Helper comparator: compare a key with the key in a node + * @param _Comparator Comparator for keys */ + template + struct compare_node_key + { + /** @brief Comparator for keys */ + const _Comparator& c; + + /** @brief Constructor + * @param _c Comparator for keys */ + compare_node_key(const _Comparator& _c) : c(_c) { } + + /** @brief Operator() with the first parameter being a node + * @param r Node whose key is to be compared + * @param k Key to be compared + * @return Result of the comparison */ + bool operator()(const _Rb_tree_node_ptr r, const key_type& k) const + { return c(base_type::_S_key(r),k); } + + /** @brief Operator() with the second parameter being a node + * @param k Key to be compared + * @param r Node whose key is to be compared + * @return Result of the comparison */ + bool operator()(const key_type& k, const _Rb_tree_node_ptr r) const + { return c(k, base_type::_S_key(r)); } + }; + + /** @brief Helper comparator: compare a key with the key of a value pointed by an iterator + * @param _Comparator Comparator for keys */ + template + struct compare_value_key + { + /** @brief Comparator for keys */ + const _Comparator& c; + + /** @brief Constructor + * @param _c Comparator for keys */ + compare_value_key(const _Comparator& _c) : c(_c){ } + + /** @brief Operator() with the first parameter being an iterator + * @param v Iterator to the value whose key is to be compared + * @param k Key to be compared + * @return Result of the comparison */ + bool operator()(const _Iterator& v, const key_type& k) const + { return c(_KeyOfValue()(*v),k); } + + /** @brief Operator() with the second parameter being an iterator + * @param k Key to be compared + * @param v Iterator to the value whose key is to be compared + * @return Result of the comparison */ + bool operator()(const key_type& k, const _Iterator& v) const + { return c(k, _KeyOfValue()(*v)); } + }; + + /** @brief Helper class of _Rb_tree to avoid some symmetric code + in tree operations */ + struct LeftRight + { + /** @brief Obtain the conceptual left child of a node + * @param parent Node whose child must be obtained + * @return Reference to the child node */ + static _Rb_tree_node_base*& left(_Rb_tree_node_base* parent) + { return parent->_M_left; } + + /** @brief Obtain the conceptual right child of a node + * @param parent Node whose child must be obtained + * @return Reference to the child node */ + static _Rb_tree_node_base*& right(_Rb_tree_node_base* parent) + { return parent->_M_right; } + }; + + /** @brief Helper class of _Rb_tree to avoid some symmetric code + in tree operations: inverse the symmetry + * @param S Symmetry to inverse + * @sa LeftRight */ + template + struct Opposite + { + /** @brief Obtain the conceptual left child of a node, inversing + the symmetry + * @param parent Node whose child must be obtained + * @return Reference to the child node */ + static _Rb_tree_node_base*& left(_Rb_tree_node_base* parent) + { return S::right(parent);} + + /** @brief Obtain the conceptual right child of a node, + inversing the symmetry + * @param parent Node whose child must be obtained + * @return Reference to the child node */ + static _Rb_tree_node_base*& right(_Rb_tree_node_base* parent) + { return S::left(parent);} + }; + + /** @brief Inverse symmetry of LeftRight */ + typedef Opposite RightLeft; + + /** @brief Helper comparator to compare value pointers, so that + the value is taken + * @param Comparator Comparator for values + * @param _ValuePtr Pointer to values */ + template + class PtrComparator : public std::binary_function<_ValuePtr, _ValuePtr, bool> + { + /** @brief Comparator for values */ + Comparator comp; + + public: + /** @brief Constructor + * @param comp Comparator for values */ + PtrComparator(Comparator comp) : comp(comp) { } + + /** @brief Operator(): compare the values instead of the pointers + * @param v1 Pointer to the first element to compare + * @param v2 Pointer to the second element to compare */ + bool operator()(const _ValuePtr& v1, const _ValuePtr& v2) const + { return comp(*v1,*v2); } + }; + + /** @brief Iterator whose elements are pointers + * @param value_type Type pointed by the pointers */ + template + class PtrIterator + { + public: + /** @brief The iterator category is random access iterator */ + typedef typename std::random_access_iterator_tag iterator_category; + typedef _ValueTp value_type; + typedef size_t difference_type; + typedef value_type* ValuePtr; + typedef ValuePtr& reference; + typedef value_type** pointer; + + /** @brief Element accessed by the iterator */ + value_type** ptr; + + /** @brief Trivial constructor */ + PtrIterator() { } + + /** @brief Constructor from an element */ + PtrIterator(const ValuePtr& __i) : ptr(&__i) { } + + /** @brief Constructor from a pointer */ + PtrIterator(const pointer& __i) : ptr(__i) { } + + /** @brief Copy constructor */ + PtrIterator(const PtrIterator& __i) : ptr(__i.ptr) { } + + reference + operator*() const + { return **ptr; } + + ValuePtr + operator->() const + { return *ptr; } + + /** @brief Bidirectional iterator requirement */ + PtrIterator& + operator++() + { + ++ptr; + return *this; + } + + /** @brief Bidirectional iterator requirement */ + PtrIterator + operator++(int) + { return PtrIterator(ptr++); } + + /** @brief Bidirectional iterator requirement */ + PtrIterator& + operator--() + { + --ptr; + return *this; + } + + /** @brief Bidirectional iterator requirement */ + PtrIterator + operator--(int) + { return PtrIterator(ptr--); } + + /** @brief Random access iterator requirement */ + reference + operator[](const difference_type& __n) const + { return *ptr[__n]; } + + /** @brief Random access iterator requirement */ + PtrIterator& + operator+=(const difference_type& __n) + { + ptr += __n; + return *this; + } + + /** @brief Random access iterator requirement */ + PtrIterator + operator+(const difference_type& __n) const + { return PtrIterator(ptr + __n); } + + /** @brief Random access iterator requirement */ + PtrIterator& + operator-=(const difference_type& __n) + { + ptr -= __n; + return *this; + } + + /** @brief Random access iterator requirement */ + PtrIterator + operator-(const difference_type& __n) const + { return PtrIterator(ptr - __n); } + + /** @brief Random access iterator requirement */ + difference_type + operator-(const PtrIterator& iter) const + { return ptr - iter.ptr; } + + /** @brief Random access iterator requirement */ + difference_type + operator+(const PtrIterator& iter) const + { return ptr + iter.ptr; } + + /** @brief Allow assignment of an element ValuePtr to the iterator */ + PtrIterator& operator=(const ValuePtr sptr) + { + ptr = &sptr; + return *this; + } + + PtrIterator& operator=(const PtrIterator& piter) + { + ptr = piter.ptr; + return *this; + } + + bool operator==(const PtrIterator& piter) + { return ptr == piter.ptr; } + + bool operator!=(const PtrIterator& piter) + { return ptr != piter.ptr; } + + }; + + + /** @brief Bulk insertion helper: synchronization and construction + of the tree bottom up */ + struct concat_problem + { + /** @brief Root of a tree. + * + * Input: Middle node to concatenate two subtrees. Out: Root of + * the resulting concatenated tree. */ + _Rb_tree_node_ptr t; + + /** @brief Black height of @c t */ + int black_h; + + /** @brief Synchronization variable. + * + * \li READY_YES: the root of the tree can be concatenated with + * the result of the children concatenation problems (both of + * them have finished). + * \li READY_NOT: at least one of the children + * concatenation_problem have not finished */ + int is_ready; + + /** @brief Parent concatenation problem to solve when @c + is_ready = READY_YES */ + concat_problem* par_problem; + + /** @brief Left concatenation problem */ + concat_problem* left_problem; + + /** @brief Right concatenation problem */ + concat_problem* right_problem; + + /** @brief Value NO for the synchronization variable. */ + static const int READY_NO = 0; + + /** @brief Value YES for the synchronization variable. */ + static const int READY_YES = 1; + + /** @brief Trivial constructor. + * + * Initialize the synchronization variable to not ready. */ + concat_problem(): is_ready(READY_NO) { } + + /** @brief Constructor. + * + * Initialize the synchronization variable to not ready. + * @param _t Root of a tree. + * @param _black_h Black height of @c _t + * @param _par_problem Parent concatenation problem to solve + * when @c is_ready = READY_YES + */ + concat_problem(const _Rb_tree_node_ptr _t, const int _black_h, concat_problem* _par_problem): + t(_t), + black_h(_black_h), + is_ready(READY_NO), + par_problem(_par_problem) + { + // The root of an insertion problem must be black. + if (t != NULL and t->_M_color == std::_S_red) + { + t->_M_color = std::_S_black; + ++black_h; + } + } + }; + + + /** @brief Bulk insertion helper: insertion of a sequence of + elements in a subtree + @invariant t, pos_beg and pos_end will not change after initialization + */ + struct insertion_problem + { + /** @brief Renaming of _Rb_tree @c size_type */ + typedef typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type size_type; + + /** @brief Root of the tree where the elements are to be inserted */ + _Rb_tree_node_ptr t; + + /** @brief Position of the first node in the array of nodes to + be inserted into @c t */ + size_type pos_beg; + + /** @brief Positition of the first node in the array of nodes + that won't be inserted into @c t */ + size_type pos_end; + + /** @brief Partition in the array of nodes of @c pos_beg and @c + pos_end (must be the same for both, and so gaps are + avoided) */ + int array_partition; + + /** @brief Concatenation problem to solve once the insertion + problem is finished */ + concat_problem* conc; + + /** @brief Trivial constructor. */ + insertion_problem() + { } + + /** @brief Constructor. + * @param b Position of the first node in the array of nodes to + * be inserted into @c _conc->t + * @param e Position of the first node in the array of nodes + * that won't be inserted into @c _conc->t + * @param array_p Partition in the array of nodes of @c b and @c e + * @param _conc Concatenation problem to solve once the + * insertion problem is finished + */ + insertion_problem(const size_type b, const size_type e, const int array_p, concat_problem* _conc) + : t(_conc->t), pos_beg(b), pos_end(e), array_partition(array_p), conc(_conc) + { + _GLIBCXX_PARALLEL_ASSERT(pos_beg <= pos_end); + + //The root of an insertion problem must be black!! + _GLIBCXX_PARALLEL_ASSERT(t == NULL or t->_M_color != std::_S_red); + } + }; + + + /** @brief Main bulk construction and insertion helper method + * @param __first First element in a sequence to be added into the tree + * @param __last End of the sequence of elements to be added into the tree + * @param is_construction If true, the tree was empty and so, this + * is constructed. Otherwise, the elements are added to an + * existing tree. + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + * The input sequence is preprocessed so that the bulk + * construction or insertion can be performed + * efficiently. Essentially, the sequence is checked for + * sortedness and iterators to the middle of the structure are + * saved so that afterwards the sequence can be processed + * effectively in parallel. */ + template + void + _M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + Timing<_timing_tag> t; + + t.tic(); + + thread_index_t num_threads = get_max_threads(); + size_type n; + size_type beg_partition[num_threads+1]; + _InputIterator access[num_threads+1]; + beg_partition[0] = 0; + bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first)); + + t.tic("is_sorted"); + + if (not is_sorted) + { + _M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal); + } + else + { + // The vector must be moved... all ranges must have at least + // one element, or make just sequential??? + if (static_cast(num_threads) > n) + { + int j = 1; + for (int i = 1; i <= num_threads; ++i) + { + if (beg_partition[j-1] != beg_partition[i]) + { + beg_partition[j] = beg_partition[i]; + access[j] = access[i]; + ++j; + } + } + num_threads = static_cast(n); + } + + if (is_construction) + _M_sorted_bulk_construction(access, beg_partition, n, num_threads, strictly_less_or_less_equal); + else + _M_sorted_bulk_insertion(access, beg_partition, n, num_threads, strictly_less_or_less_equal); + } + + t.tic("main work"); + + t.print(); + } + + /** @brief Bulk construction and insertion helper method on an + * input sequence which is not sorted + * + * The elements are copied, according to the copy policy, in order + * to be sorted. Then the + * _M_not_sorted_bulk_insertion_construction() method is called + * appropiately + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first element in each subsequence + * to be added into the tree. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * each subsequence to be added into the tree. + * @param n Size of the sequence to be inserted + * @param num_threads Number of threads and corresponding + * subsequences in which the insertion work is going to be shared + * @param is_construction If true, the tree was empty and so, this + * is constructed. Otherwise, the elements are added to an + * existing tree. + * @param strictly_less_or_less_equal Comparator to deal transparently with repetitions with respect to the uniqueness of the wrapping container */ + template + void + _M_not_sorted_bulk_insertion_construction(_InputIterator* access, + size_type* beg_partition, + const size_type n, + const thread_index_t num_threads, + const bool is_construction, + StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + // Copy entire elements. In the case of a map, we would be + // copying the pair. Therefore, the copy should be reconsidered + // when objects are big. Essentially two cases: + // - The key is small: make that the pair, is a pointer to data + // instead of a copy to it + // - The key is big: we simply have a pointer to the iterator +#if _GLIBCXX_TREE_FULL_COPY + nc_value_type* v = static_cast (::operator new(sizeof(nc_value_type)*(n+1))); + + uninitialized_copy_from_accessors(access, beg_partition, v, num_threads); + + _M_not_sorted_bulk_insertion_construction > + (beg_partition, v, ValueCompare<_Compare>(base_type::_M_impl._M_key_compare), n, num_threads, is_construction, strictly_less_or_less_equal); +#else + // For sorting, we cannot use the new PtrIterator because we + // want the pointers to be exchanged and not the elements. + typedef PtrComparator, nc_value_type*> this_ptr_comparator; + nc_value_type** v = static_cast (::operator new(sizeof(nc_value_type*)*(n+1))); + + uninitialized_ptr_copy_from_accessors(access, beg_partition, v, num_threads); + + _M_not_sorted_bulk_insertion_construction, this_ptr_comparator> + (beg_partition, v, this_ptr_comparator(ValueCompare<_Compare>(base_type::_M_impl._M_key_compare)), n, num_threads, is_construction, strictly_less_or_less_equal); +#endif + } + + /** @brief Bulk construction and insertion helper method on an + * input sequence which is not sorted + * + * The elements are sorted and its accessors calculated. Then, + * _M_sorted_bulk_construction() or _M_sorted_bulk_insertion() is + * called. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * each subsequence to be added into the tree. + * @param v Array of elements to be sorted (copy of the original sequence). + * @param comp Comparator to be used for sorting the elements + * @param n Size of the sequence to be inserted + * @param num_threads Number of threads and corresponding + * subsequences in which the insertion work is going to be shared + * @param is_construction If true, _M_sorted_bulk_construction() + * is called. Otherwise, _M_sorted_bulk_insertion() is called. + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void + _M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + // The accessors have been calculated for the non sorted. + Timing<_timing_tag> t; + + t.tic(); + + num_threads = static_cast(std::min(num_threads, n)); + + std::stable_sort(v, v+n, comp); + + t.tic("sort"); + + IteratorSortedElements sorted_access[num_threads+1]; + range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v)); + + t.tic("range_accessors"); + + // Partial template specialization not available. + if (is_construction) + _M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal); + else + _M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal); + delete v; + + t.tic("actual construction or insertion"); + + t.print(); + } + + /** @brief Construct a tree sequentially using the parallel routine + * @param r_array Array of nodes from which to take the nodes to + * build the tree + * @param pos_beg Position of the first node in the array of nodes + * to be part of the tree + * @param pos_end Position of the first node in the array of nodes + * that will not be part of the tree + * @param black_h Black height of the resulting tree (out) + */ + static _Rb_tree_node_ptr + simple_tree_construct(_Rb_tree_node_ptr* r_array, const size_type pos_beg, const size_type pos_end, int& black_h) + { + if (pos_beg == pos_end) + { + black_h = 0; + return NULL; + } + if (pos_beg+1 == pos_end) + { + // It is needed, not only for efficiency but because the + // last level in our tree construction is red. + make_leaf(r_array[pos_beg], black_h); + return r_array[pos_beg]; + } + + // Dummy b_p + size_type b_p[2]; + b_p[0] = 0; + b_p[1] = pos_end - pos_beg; + _Rb_tree_node_ptr* r= r_array + pos_beg; + size_type length = pos_end - pos_beg; + + ranker_no_gaps rank; + nodes_initializer nodes_init(r, length, 1, rank); + + black_h = nodes_init.get_height(); + + size_type split = nodes_init.get_shifted_splitting_point(); + for (size_type i = 0; i < split; ++i) + nodes_init.link_complete(r[i],0); + + for (size_type i = split; i < length; ++i) + nodes_init.link_incomplete(r[i],0); + + _Rb_tree_node_ptr t = nodes_init.get_root(); + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t)); + _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black); + return t; + } + + + /** @brief Allocation of an array of nodes and initilization of + their value fields from an input sequence. Done in parallel. + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence to + * be copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the subsequence from which to copy the data to initialize the + * nodes. + * @param n Size of the sequence and the array of nodes to be allocated. + * @param num_threads Number of threads and corresponding + * subsequences in which the allocation and initialization work is + * going to be shared + */ + template + _Rb_tree_node_ptr* _M_unsorted_bulk_allocation_and_initialization(const _Iterator* access, const size_type* beg_partition, const size_type n, const thread_index_t num_threads) + { + _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1))); + + // Allocate and initialize the nodes (don't check for uniqueness + // because the sequence is not necessarily sorted. +#pragma omp parallel num_threads(num_threads) + { +#if USE_PAPI + PAPI_register_thread(); +#endif + + int iam = omp_get_thread_num(); + _Iterator it = access[iam]; + size_type i = beg_partition[iam]; + while (it!= access[iam+1]) + { + r[i] = base_type::_M_create_node(*it); + ++i; + ++it; + } + } + return r; + } + + + /** @brief Allocation of an array of nodes and initilization of + * their value fields from an input sequence. Done in + * parallel. Besides, the sequence is checked for uniqueness while + * copying the elements, and if there are repetitions, gaps within + * the partitions are created. + * + * An extra ghost node pointer is reserved in the array to ease + * comparisons later while linking the nodes + * @pre The sequence is sorted. + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence to + * be copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the subsequence from which to copy the data to initialize the + * nodes. + * @param rank_shift Array of size @c num_threads + 1 containing + * the number of accumulated gaps at the beginning of each + * partition + * @param n Size of the sequence and the array of nodes (-1) to be + * allocated. + * @param num_threads Number of threads and corresponding + * subsequences in which the allocation and initialization work is + * going to be shared + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + _Rb_tree_node_ptr* _M_sorted_bulk_allocation_and_initialization(_Iterator* access, size_type* beg_partition, size_type* rank_shift, const size_type n, thread_index_t& num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + // Ghost node at the end to avoid extra comparisons in nodes_initializer. + _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1))); + r[n] = NULL; + + // Dealing with repetitions (EFFICIENCY ISSUE). + _Iterator access_copy[num_threads+1]; + for (int i = 0; i <= num_threads; ++i) + access_copy[i] = access[i]; + // Allocate and initialize the nodes +#pragma omp parallel num_threads(num_threads) + { +#if USE_PAPI + PAPI_register_thread(); +#endif + thread_index_t iam = omp_get_thread_num(); + _Iterator prev = access[iam]; + size_type i = beg_partition[iam]; + _Iterator it = prev; + if (iam != 0) + { + --prev; + // Dealing with repetitions (CORRECTNESS ISSUE). + while (it!= access_copy[iam+1] and not strictly_less_or_less_equal(_KeyOfValue()(*prev), _KeyOfValue()(*it))) + { + _GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev))); + ++it; + } + access[iam] = it; + if (it != access_copy[iam+1]){ + r[i] = base_type::_M_create_node(*it); + ++i; + prev=it; + ++it; + } + //} + } + else + { + r[i] = base_type::_M_create_node(*prev); + ++i; + ++it; + } + while (it!= access_copy[iam+1]) + { + /***** Dealing with repetitions (CORRECTNESS ISSUE) *****/ + if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it))) + { + r[i] = base_type::_M_create_node(*it); + ++i; + prev=it; + } + else{ + _GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev))); + } + ++it; + } + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + rank_shift[iam+1] = beg_partition[iam+1] - i; + } + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + rank_shift[0] = 0; + /* Guarantee that there are no empty intervals. + - If an empty interval is found, is joined with the previous one + (the rank_shift of the previous is augmented with all the new + repetitions) + */ + thread_index_t i = 1; + while (i <= num_threads and rank_shift[i] != (beg_partition[i] - beg_partition[i-1])) + { + rank_shift[i] += rank_shift[i-1]; + ++i; + } + if (i <= num_threads) + { + thread_index_t j = i - 1; + while (true) + { + do + { + rank_shift[j] += rank_shift[i]; + ++i; + } while (i <= num_threads and rank_shift[i] == (beg_partition[i] - beg_partition[i-1])); + + beg_partition[j] = beg_partition[i-1]; + access[j] = access[i-1]; + if (i > num_threads) break; + ++j; + + // Initialize with the previous. + rank_shift[j] = rank_shift[j-1]; + } + num_threads = j; + } + return r; + + } + + /** @brief Allocation of an array of nodes and initilization of + * their value fields from an input sequence. + * + * The allocation and initialization is done in parallel. Besides, + * the sequence is checked for uniqueness while copying the + * elements. However, in contrast to + * _M_sorted_bulk_allocation_and_initialization(), if there are + * repetitions, no gaps within the partitions are created. To do + * so efficiently, some extra memory is needed to compute a prefix + * sum. + * @pre The sequence is sorted. + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence to + * be copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the subsequence from which to copy the data to initialize the + * nodes. + * @param n Size of the sequence and the array of nodes (-1) to be + * allocated. + * @param num_threads Number of threads and corresponding + * subsequences in which the allocation and initialization work is + * going to be shared + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + _Rb_tree_node_ptr* _M_sorted_no_gapped_bulk_allocation_and_initialization(_Iterator* access, size_type* beg_partition, size_type& n, const thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + size_type* sums = static_cast (::operator new (sizeof(size_type)*n)); + // Allocate and initialize the nodes + /* try + {*/ +#pragma omp parallel num_threads(num_threads) + { +#if USE_PAPI + PAPI_register_thread(); +#endif + int iam = omp_get_thread_num(); + _Iterator prev = access[iam]; + size_type i = beg_partition[iam]; + _Iterator it = prev; + if (iam !=0) + { + --prev; + + // First iteration here, to update accessor in case was + // equal to the last element of the previous range + + // Dealing with repetitions (CORRECTNESS ISSUE). + if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it))) + { + sums[i] = 0; + prev=it; + } + else + { + sums[i] = 1; + } + ++i; + ++it; + } + else + { + sums[i] = 0; + ++i; + ++it; + } + while (it!= access[iam+1]) + { + // Dealing with repetitions (CORRECTNESS ISSUE). + if (strictly_less_or_less_equal(_KeyOfValue()(*prev),_KeyOfValue()(*it))) + { + sums[i] = 0; + prev=it; + } + else + sums[i] = 1; + ++i; + ++it; + } + } + // Should be done in parallel. + partial_sum(sums,sums + n, sums); + + n -= sums[n-1]; + _Rb_tree_node_ptr* r = static_cast<_Rb_tree_node_ptr*> (::operator new (sizeof(_Rb_tree_node_ptr)*(n+1))); + r[n]=0; + +#pragma omp parallel num_threads(num_threads) + { +#if USE_PAPI + PAPI_register_thread(); +#endif + int iam = omp_get_thread_num(); + _Iterator it = access[iam]; + size_type i = beg_partition[iam]; + size_type j = i; + size_type before = 0; + if (iam > 0) + { + before = sums[i-1]; + j -= sums[i-1]; + } + beg_partition[iam] = j; + while (it!= access[iam+1]) + { + while (it!= access[iam+1] and sums[i]!=before) + { + before = sums[i]; + ++i; + ++it; + } + if (it!= access[iam+1]) + { + r[j] = base_type::_M_create_node(*it); + ++j; + ++i; + ++it; + } + } + + } + beg_partition[num_threads] = n; + + // Update beginning of partitions. + ::operator delete(sums); + return r; + } + + /** @brief Main bulk construction method: perform the actual + initialization, allocation and finally node linking once the + input sequence has already been preprocessed. + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence to + * be copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the subsequence from which to copy the data to initialize the + * nodes. + * @param n Size of the sequence and the array of nodes (-1) to be + * allocated. + * @param num_threads Number of threads and corresponding + * subsequences in which the work is going to be shared + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void + _M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + Timing<_timing_tag> t; + + // Dealing with repetitions (EFFICIENCY ISSUE). + size_type rank_shift[num_threads+1]; + + t.tic(); + + _Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal); + + t.tic("bulk allocation and initialization"); + + // Link the tree appropiately. + // Dealing with repetitions (EFFICIENCY ISSUE). + ranker_gaps rank(beg_partition, rank_shift, num_threads); + nodes_initializer nodes_init(r, n - rank_shift[num_threads], num_threads, rank); + size_type split = nodes_init.get_shifted_splitting_point(); + +#pragma omp parallel num_threads(num_threads) + { +#if USE_PAPI + PAPI_register_thread(); +#endif + int iam = omp_get_thread_num(); + size_type beg = beg_partition[iam]; + // Dealing with repetitions (EFFICIENCY ISSUE). + size_type end = beg_partition[iam+1] - (rank_shift[iam+1] - rank_shift[iam]); + if (split >= end) + { + for (size_type i = beg; i < end; ++i) + { + nodes_init.link_complete(r[i],iam); + } + } + else + { + if (split <= beg) + { + for (size_type i = beg; i < end; ++i) + nodes_init.link_incomplete(r[i],iam); + } + else + { + for (size_type i = beg; i < split; ++i) + nodes_init.link_complete(r[i],iam); + for (size_type i = split; i < end; ++i) + nodes_init.link_incomplete(r[i],iam); + } + } + } + // If the execution reachs this point, there has been no + // exception, and so the structure can be initialized. + + // Join the tree laid on the array of ptrs with the header node. + // Dealing with repetitions (EFFICIENCY ISSUE). + base_type::_M_impl._M_node_count = n - rank_shift[num_threads]; + base_type::_M_impl._M_header._M_left = r[0]; + thread_index_t with_element = num_threads; + while ((beg_partition[with_element] - beg_partition[with_element-1]) == (rank_shift[with_element] - rank_shift[with_element-1])) + { + --with_element; + } + base_type::_M_impl._M_header._M_right = r[beg_partition[with_element] - (rank_shift[with_element] - rank_shift[with_element-1]) - 1]; + base_type::_M_impl._M_header._M_parent = nodes_init.get_root(); + nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header; + + t.tic("linking nodes"); + ::operator delete(r); + + t.tic("delete array of pointers"); + t.print(); + } + + + /** @brief Main bulk insertion method: perform the actual + initialization, allocation and finally insertion once the + input sequence has already been preprocessed. + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence to + * be copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the subsequence from which to copy the data to initialize the + * nodes. + * @param k Size of the sequence to be inserted (including the + * possible repeated elements among the sequence itself and + * against those elements already in the tree) + * @param num_threads Number of threads and corresponding + * subsequences in which the work is going to be shared + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void + _M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + _GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k); + Timing<_timing_tag> t; + + t.tic(); + + // num_thr-1 problems in the upper part of the tree + // num_thr problems to further parallelize + std::vector existing(num_threads,0); +#if _GLIBCXX_TREE_INITIAL_SPLITTING + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + size_type rank_shift[num_threads+1]; + + // Need to create them dynamically because they are so erased + concat_problem* conc[2*num_threads-1]; +#endif + _Rb_tree_node_ptr* r; + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + if (not strictly_less_or_less_equal(base_type::_S_key(base_type::_M_root()),base_type::_S_key(base_type::_M_root()) )) + { + // Unique container + // Set 1 and 2 could be done in parallel ... + // 1. Construct the nodes with their corresponding data +#if _GLIBCXX_TREE_INITIAL_SPLITTING + r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal); + t.tic("bulk allocation and initialization"); +#else + r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal); +#endif + } + else + { + // Not unique container. + r = _M_unsorted_bulk_allocation_and_initialization(access, beg_partition, k, num_threads); +#if _GLIBCXX_TREE_INITIAL_SPLITTING + // Trivial initialization of rank_shift. + for (int i=0; i <= num_threads; ++i) + rank_shift[i] = 0; +#endif + } +#if _GLIBCXX_TREE_INITIAL_SPLITTING + // Calculate position of last element to be inserted: must be + // done now, or otherwise becomes messy. + + /***** Dealing with + repetitions (EFFICIENCY ISSUE) *****/ + size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]); + + t.tic("last element to be inserted"); + + //2. Split the tree according to access in num_threads parts + //Initialize upper concat_problems + //Allocate them dinamically because they are afterwards so erased + for (int i=0; i < (2*num_threads-1); ++i) + { + conc[i] = new concat_problem (); + } + concat_problem* root_problem = _M_bulk_insertion_initialize_upper_problems(conc, 0, num_threads, NULL); + + // The first position of access and the last are ignored, so we + // have exactly num_threads subtrees. + bool before = omp_get_nested(); + omp_set_nested(true); + _M_bulk_insertion_split_tree_by_pivot(static_cast<_Rb_tree_node_ptr>(base_type::_M_root()), r, access, beg_partition, rank_shift, 0, num_threads-1, conc, num_threads, strictly_less_or_less_equal); + omp_set_nested(before); + + // Construct upper tree with the first elements of ranges if + // they are NULL We cannot do this by default because they could + // be repeated and would not be checked. + size_type r_s = 0; + for (int pos = 1; pos < num_threads; ++pos) + { + _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or conc[pos*2-1]->t == NULL or strictly_less_or_less_equal(base_type::_S_key(base_type::_S_maximum(conc[(pos-1)*2]->t)), base_type::_S_key(conc[pos*2-1]->t))); + _GLIBCXX_PARALLEL_ASSERT(conc[pos*2]->t == NULL or conc[pos*2-1]->t == NULL or strictly_less_or_less_equal( base_type::_S_key(conc[pos*2-1]->t), base_type::_S_key(base_type::_S_minimum(conc[pos*2]->t)))); + /***** Dealing with repetitions (CORRECTNESS ISSUE) *****/ + + // The first element of the range is the root. + if (conc[pos*2-1]->t == NULL or (not(strictly_less_or_less_equal(base_type::_S_key(static_cast<_Rb_tree_node_ptr>(conc[pos*2-1]->t)), _KeyOfValue()(*access[pos]))))) + { + // There was not a candidate element + // or + // Exists an initialized position in the array which + // corresponds to conc[pos*2-1]->t */ + if (conc[pos*2-1]->t == NULL) + { + size_t np = beg_partition[pos]; + _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or strictly_less_or_less_equal(base_type::_S_key(base_type::_S_maximum(conc[(pos-1)*2]->t)), base_type::_S_key(r[np]))); + _GLIBCXX_PARALLEL_ASSERT(conc[pos*2]->t == NULL or strictly_less_or_less_equal( base_type::_S_key(r[np]), base_type::_S_key(base_type::_S_minimum(conc[pos*2]->t)))); + conc[pos*2-1]->t = r[np]; + r[np]->_M_color = std::_S_black; + ++base_type::_M_impl._M_node_count; + } + else + { + base_type::_M_destroy_node(r[beg_partition[pos]]); + } + ++(access[pos]); + ++(beg_partition[pos]); + ++r_s; + } + _GLIBCXX_PARALLEL_ASSERT(conc[(pos-1)*2]->t == NULL or conc[(pos-1)*2]->t->_M_color == std::_S_black); + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + rank_shift[pos] += r_s; + } + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + rank_shift[num_threads] += r_s; +#else + concat_problem root_problem_on_stack(static_cast<_Rb_tree_node_ptr>(base_type::_M_root()), black_height(static_cast<_Rb_tree_node_ptr>(base_type::_M_root())), NULL); + concat_problem * root_problem = &root_problem_on_stack; + size_type last = k; +#endif + + t.tic("sorted_no_gapped..."); + + // 3. Split the range according to tree and create + // 3. insertion/concatenation problems to be solved in parallel +#if _GLIBCXX_TREE_DYNAMIC_BALANCING + size_type min_problem = (k/num_threads) / (log2(k/num_threads + 1)+1); +#else + size_type min_problem = base_type::size() + k; +#endif + + RestrictedBoundedConcurrentQueue* ins_problems[num_threads]; + +#pragma omp parallel num_threads(num_threads) + { + int num_thread = omp_get_thread_num(); + ins_problems[num_thread] = new RestrictedBoundedConcurrentQueue(2*(log2(base_type::size())+1)); +#if _GLIBCXX_TREE_INITIAL_SPLITTING + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + size_type end_k_thread = beg_partition[num_thread+1] - (rank_shift[num_thread+1] - rank_shift[num_thread]); + ins_problems[num_thread]->push_front(insertion_problem(beg_partition[num_thread], end_k_thread, num_thread, conc[num_thread*2])); +#else + // size_type end_k_thread = beg_partition[num_thread+1]; +#endif + insertion_problem ip_to_solve; + bool change; + +#if _GLIBCXX_TREE_INITIAL_SPLITTING +#pragma omp barrier +#else +#pragma omp single + ins_problems[num_thread]->push_front(insertion_problem(0, k, num_thread, root_problem)); +#endif + + do + { + // First do own work. + while (ins_problems[num_thread]->pop_front(ip_to_solve)) + { + _GLIBCXX_PARALLEL_ASSERT(ip_to_solve.pos_beg <= ip_to_solve.pos_end); + _M_bulk_insertion_split_sequence(r, ins_problems[num_thread], ip_to_solve, existing[num_thread], min_problem, strictly_less_or_less_equal); + + } + yield(); + change = false; + + //Then, try to steal from others (and become own). + for (int i=1; ipop_back(ip_to_solve)) + { + change = true; + _M_bulk_insertion_split_sequence(r, ins_problems[num_thread], ip_to_solve, existing[num_thread], min_problem, strictly_less_or_less_equal); + break; + } + } + } while (change); + } + + t.tic("merging"); + + // Update root and sizes. + base_type::_M_root() = root_problem->t; + root_problem->t->_M_parent = &(base_type::_M_impl._M_header); + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + + // Add the k elements that wanted to be inserted, minus the ones + // that were repeated. +#if _GLIBCXX_TREE_INITIAL_SPLITTING + base_type::_M_impl._M_node_count += (k - (rank_shift[num_threads])); +#else + base_type::_M_impl._M_node_count += k; +#endif + // Also then, take out the ones that were already existing in the tree. + for (int i = 0; i< num_threads; ++i) + { + base_type::_M_impl._M_node_count -= existing[i]; + } + // Update leftmost and rightmost. + /***** Dealing with repetitions (EFFICIENCY ISSUE) *****/ + if (not strictly_less_or_less_equal(base_type::_S_key(base_type::_M_root()), base_type::_S_key(base_type::_M_root()))){ + // Unique container. + if (base_type::_M_impl._M_key_compare(_KeyOfValue()(*(access[0])), base_type::_S_key(base_type::_M_leftmost()))) + base_type::_M_leftmost() = r[0]; + if (base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_M_rightmost()), _KeyOfValue()(*(--access[num_threads])))) + base_type::_M_rightmost() = r[last - 1]; + } + else{ + if (strictly_less_or_less_equal(_KeyOfValue()(*(access[0])), base_type::_S_key(base_type::_M_leftmost()))) + base_type::_M_leftmost() = base_type::_S_minimum(base_type::_M_root()); + if (strictly_less_or_less_equal(base_type::_S_key(base_type::_M_rightmost()), _KeyOfValue()(*(--access[num_threads])))) + base_type::_M_rightmost() = base_type::_S_maximum(base_type::_M_root()); + } + + + + +#if _GLIBCXX_TREE_INITIAL_SPLITTING + // Delete root problem + delete root_problem; +#endif + + // Delete queues + for (int pos = 0; pos < num_threads; ++pos) + { + delete ins_problems[pos]; + } + + // Delete array of pointers + ::operator delete(r); + + t.tic(); + t.print(); + } + + + /** @brief Divide a tree according to the splitter elements of a + * given sequence. + * + * The tree of the intial recursive call is divided in exactly + * num_threads partitions, some of which may be empty. Besides, + * some nodes may be extracted from it to afterwards concatenate + * the subtrees resulting from inserting the elements into it. + * This is done sequentially. It could be done in parallel but the + * performance is much worse. + * @param t Root of the tree to be splitted + * @param r Array of nodes to be inserted into the tree (here only + * used to look up its elements) + * @param access Array of iterators of size @c num_threads + + * 1. Each position contains the first value in the subsequence + * that has been copied into the corresponding tree node. + * @param beg_partition Array of positions of size @c num_threads + * + 1. Each position contains the rank of the first element in + * the array of nodes to be inserted. + * @param rank_shift Array of size @c num_threads + 1 containing + * the number of accumulated gaps at the beginning of each + * partition + * @param pos_beg First position in the access array to be + * considered to split @c t + * @param pos_end Last position (included) in the access array to + * be considered to split @c t + * @param conc Array of concatenation problems to be initialized + * @param num_threads Number of threads and corresponding + * subsequences in which the original sequence has been + * partitioned + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void + _M_bulk_insertion_split_tree_by_pivot(_Rb_tree_node_ptr t, _Rb_tree_node_ptr* r, _Iterator* access, size_type* beg_partition, size_type* rank_shift, const size_type pos_beg, const size_type pos_end, concat_problem** conc, const thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + if (pos_beg == pos_end) + { + //Elements are in [pos_beg, pos_end] + conc[pos_beg*2]->t = t; + conc[pos_beg*2]->black_h = black_height(t); + force_black_root (conc[pos_beg*2]->t, conc[pos_beg*2]->black_h); + return; + } + if (t == 0) + { + for (size_type i = pos_beg; i < pos_end; ++i) + { + conc[i*2]->t = NULL; + conc[i*2]->black_h = 0; + conc[i*2+1]->t = NULL; + } + conc[pos_end*2]->t = NULL; + conc[pos_end*2]->black_h = 0; + return; + } + + // Return the last pos, in which key >= (pos-1). + // Search in the range [pos_beg, pos_end] + size_type pos = std::upper_bound(access + pos_beg, access + pos_end + 1, base_type::_S_key(t), compare_value_key<_Iterator, _Compare>(base_type::_M_impl._M_key_compare)) - access; + if (pos != pos_beg) + { + --pos; + } + _GLIBCXX_PARALLEL_ASSERT(pos == 0 or not base_type::_M_impl._M_key_compare(base_type::_S_key(t), _KeyOfValue()(*access[pos]))); + + + _Rb_tree_node_ptr ll, lr; + int black_h_ll, black_h_lr; + _Rb_tree_node_ptr rl, rr; + int black_h_rl, black_h_rr; + + if (pos != pos_beg) + { + _Rb_tree_node_ptr prev = r[beg_partition[pos] - 1 - (rank_shift[pos] - rank_shift[pos - 1])]; + + _GLIBCXX_PARALLEL_ASSERT(strictly_less_or_less_equal(base_type::_S_key(prev), _KeyOfValue()(*access[pos]))); + + split(static_cast<_Rb_tree_node_ptr>(t->_M_left), + static_cast(_KeyOfValue()(*access[pos])), + static_cast(base_type::_S_key(prev)), + conc[pos*2-1]->t, ll, lr, black_h_ll, black_h_lr, + strictly_less_or_less_equal); + + _M_bulk_insertion_split_tree_by_pivot(ll, r, access, beg_partition, rank_shift, pos_beg, pos-1, conc,num_threads, strictly_less_or_less_equal); + } + else + { + lr = static_cast<_Rb_tree_node_ptr>(t->_M_left); + black_h_lr = black_height (lr); + force_black_root (lr, black_h_lr); + } + + if (pos != pos_end) + { + _Rb_tree_node_ptr prev = r[beg_partition[pos+1] - 1 - (rank_shift[pos+1] - rank_shift[pos])]; + + _GLIBCXX_PARALLEL_ASSERT(not base_type::_M_impl._M_key_compare(_KeyOfValue()(*access[pos+1]), base_type::_S_key(prev))); + _GLIBCXX_PARALLEL_ASSERT(strictly_less_or_less_equal(base_type::_S_key(prev), _KeyOfValue()(*access[pos+1]))); + + split(static_cast<_Rb_tree_node_ptr>(t->_M_right), + static_cast(_KeyOfValue()(*access[pos+1])), + static_cast(base_type::_S_key(prev)), + conc[pos*2+1]->t, rl, rr, black_h_rl, black_h_rr, + strictly_less_or_less_equal); + + _M_bulk_insertion_split_tree_by_pivot(rr, r, access, beg_partition, rank_shift, pos+1, pos_end, conc,num_threads, strictly_less_or_less_equal); + } + else + { + rl = static_cast<_Rb_tree_node_ptr>(t->_M_right); + black_h_rl = black_height (rl); + force_black_root (rl, black_h_rl); + } + + // When key(t) is equal to key(access[pos]) and no other key in + // the left tree satisfies the criteria to be conc[pos*2-1]->t, + // key(t) must be assigned to it to avoid repetitions. + // Therefore, we do not have a root parameter for the + // concatenate function and a new concatenate function must be + // provided. + if (pos != pos_beg and conc[pos*2-1]->t == NULL and not strictly_less_or_less_equal(_KeyOfValue()(*access[pos]), base_type::_S_key(t))) + { + conc[pos*2-1]->t = t; + t = NULL; + } + concatenate(t, lr, rl, black_h_lr, black_h_rl, conc[pos*2]->t, conc[pos*2]->black_h); + } + + /** @brief Divide the insertion problem until a leaf is reached or + * the problem is small. + * + * During the recursion, the right subproblem is queued, so that + * it can be handled by any thread. The left subproblem is + * divided recursively, and finally, solved right away + * sequentially. + * @param r Array of nodes containing the nodes to added into the tree + * @param ins_problems Pointer to a queue of insertion + * problems. The calling thread owns this queue, i.e. it is the + * only one to push elements, but other threads could pop elements + * from it in other methods. + * @param ip Current insertion problem to be solved + * @param existing Number of existing elements found when solving + * the insertion problem (out) + * @param min_problem Threshold size on the size of the insertion + * problem in which to stop recursion + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void + _M_bulk_insertion_split_sequence(_Rb_tree_node_ptr* r, RestrictedBoundedConcurrentQueue* ins_problems, insertion_problem& ip, size_type& existing, const size_type min_problem, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + _GLIBCXX_PARALLEL_ASSERT(ip.t == ip.conc->t); + if (ip.t == NULL or (ip.pos_end- ip.pos_beg) <= min_problem) + { + // SOLVE PROBLEM SEQUENTIALLY + // Start solving the problem. + _GLIBCXX_PARALLEL_ASSERT(ip.pos_beg <= ip.pos_end); + _M_bulk_insertion_merge_concatenate(r, ip, existing, strictly_less_or_less_equal); + return; + } + + size_type pos_beg_right; + size_type pos_end_left = divide(r, ip.pos_beg, ip.pos_end, base_type::_S_key(ip.t), pos_beg_right, existing, strictly_less_or_less_equal); + + int black_h_l, black_h_r; + if (ip.t->_M_color == std::_S_black) + { + black_h_l = black_h_r = ip.conc->black_h - 1; + } + else + { + black_h_l = black_h_r = ip.conc->black_h; + } + + // Right problem into the queue. + ip.conc->right_problem = new concat_problem(static_cast<_Rb_tree_node_ptr>(ip.t->_M_right), black_h_r, ip.conc); + ip.conc->left_problem = new concat_problem(static_cast<_Rb_tree_node_ptr>(ip.t->_M_left), black_h_l, ip.conc); + + ins_problems->push_front(insertion_problem(pos_beg_right, ip.pos_end, ip.array_partition, ip.conc->right_problem)); + + // Solve left problem. + insertion_problem ip_left(ip.pos_beg, pos_end_left, ip.array_partition, ip.conc->left_problem); + _M_bulk_insertion_split_sequence(r, ins_problems, ip_left, existing, min_problem, strictly_less_or_less_equal); + } + + + /** @brief Insert a sequence of elements into a tree using a + * divide-and-conquer scheme. + * + * The problem is solved recursively and sequentially dividing the + * sequence to be inserted according to the root of the tree. This + * is done until a leaf is reached or the proportion of elements + * to be inserted is small. Finally, the two resulting trees are + * concatenated. + * @param r_array Array of nodes containing the nodes to be added + * into the tree (among others) + * @param t Root of the tree + * @param pos_beg Position of the first node in the array of + * nodes to be inserted into the tree + * @param pos_end Position of the first node in the array of + * nodes that will not be inserted into the tree + * @param existing Number of existing elements found while + * inserting the range [@c pos_beg, @c pos_end) (out) + * @param black_h Height of the tree @c t and of the resulting + * tree after the recursive calls (in and out) + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + * @return Resulting tree after the elements have been inserted + */ + template + _Rb_tree_node_ptr _M_bulk_insertion_merge(_Rb_tree_node_ptr* r_array, _Rb_tree_node_ptr t, const size_type pos_beg, const size_type pos_end, size_type& existing, int& black_h, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { +#ifndef NDEBUG + int count; +#endif + _GLIBCXX_PARALLEL_ASSERT(pos_beg<=pos_end); + + // Leaf: a tree with the range must be constructed. Returns its + // height in black nodes and its root (in ip.t) If there is + // nothing to insert, we still need the height for balancing. + if (t == NULL) + { + if (pos_end == pos_beg) return NULL; + t = simple_tree_construct(r_array,pos_beg, pos_end, black_h); + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t,count)); + return t; + } + if (pos_end == pos_beg) + return t; + if ((pos_end - pos_beg) <= (size_type)(black_h)) + { + // Exponential size tree with respect the number of elements + // to be inserted. + for (size_type p = pos_beg; p < pos_end; ++p) + { + t = _M_insert_local(t, r_array[p], existing, black_h, strictly_less_or_less_equal); + } + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(t,count)); + return t; + } + + size_type pos_beg_right; + size_type pos_end_left = divide(r_array, pos_beg, pos_end, base_type::_S_key(t), pos_beg_right, existing, strictly_less_or_less_equal); + + + int black_h_l, black_h_r; + if (t->_M_color == std::_S_black) + { + black_h_l = black_h_r = black_h - 1; + } + else + { + black_h_l = black_h_r = black_h; + } + force_black_root(t->_M_left, black_h_l); + _Rb_tree_node_ptr l = _M_bulk_insertion_merge(r_array, static_cast<_Rb_tree_node_ptr>(t->_M_left), pos_beg, pos_end_left, existing, black_h_l, strictly_less_or_less_equal); + force_black_root(t->_M_right, black_h_r); + _Rb_tree_node_ptr r = _M_bulk_insertion_merge(r_array, static_cast<_Rb_tree_node_ptr>(t->_M_right), pos_beg_right, pos_end, existing, black_h_r, strictly_less_or_less_equal); + + concatenate(t, l, r, black_h_l, black_h_r, t, black_h); + + return t; + } + + /** @brief Solve a given insertion problem and all the parent + * concatenation problem that are ready to be solved. + * + * First, solve an insertion problem. + + * Then, check if it is possible to solve the parent + * concatenation problem. If this is the case, solve it and go + * up recursively, as far as possible. Quit otherwise. + * + * @param r Array of nodes containing the nodes to be added into + * the tree (among others) + * @param ip Insertion problem to solve initially. + * @param existing Number of existing elements found while + * inserting the range defined by the insertion problem (out) + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + */ + template + void _M_bulk_insertion_merge_concatenate(_Rb_tree_node_ptr* r, insertion_problem& ip, size_type& existing, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + concat_problem* conc = ip.conc; + _GLIBCXX_PARALLEL_ASSERT(ip.pos_beg <= ip.pos_end); + + conc->t = _M_bulk_insertion_merge(r, ip.t, ip.pos_beg, ip.pos_end, existing, conc->black_h, strictly_less_or_less_equal); + _GLIBCXX_PARALLEL_ASSERT(conc->t == NULL or conc->t->_M_color == std::_S_black); + + bool is_ready = true; + while (conc->par_problem != NULL and is_ready) + { + // Pre: exists left and right problem, so there is not a deadlock + if (compare_and_swap(&conc->par_problem->is_ready, concat_problem::READY_NO, concat_problem::READY_YES)) + is_ready = false; + + if (is_ready) + { + conc = conc->par_problem; + _GLIBCXX_PARALLEL_ASSERT(conc->left_problem!=NULL and conc->right_problem!=NULL); + _GLIBCXX_PARALLEL_ASSERT (conc->left_problem->black_h >=0 and conc->right_problem->black_h>=0); + // Finished working with the problems. + concatenate(conc->t, conc->left_problem->t, conc->right_problem->t, conc->left_problem->black_h, conc->right_problem->black_h, conc->t, conc->black_h); + + delete conc->left_problem; + delete conc->right_problem; + } + } + } + + // Begin of sorting, searching and related comparison-based helper methods. + + /** @brief Check whether a random-access sequence is sorted, and + * calculate its size. + * + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param dist Size of the sequence (out) + * @return sequence is sorted. */ + template + bool + is_sorted_distance(const _RandomAccessIterator __first, const _RandomAccessIterator __last, size_type& dist, std::random_access_iterator_tag) const + { + gr_or_eq<_Compare, _RandomAccessIterator> geq(base_type::_M_impl._M_key_compare); + dist = __last - __first; + + // In parallel. + return equal(__first + 1, __last, __first, geq); + } + + /** @brief Check whether an input sequence is sorted, and + * calculate its size. + * + * The list partitioning tool is used so that all the work is + * done in only one traversal. + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param dist Size of the sequence (out) + * @return sequence is sorted. */ + template + bool + is_sorted_distance(const _InputIterator __first, const _InputIterator __last, size_type& dist, std::input_iterator_tag) const + { + dist = 1; + bool is_sorted = true; + _InputIterator it = __first; + _InputIterator prev = it++; + while (it != __last) + { + ++dist; + if (base_type::_M_impl._M_key_compare(_KeyOfValue()(*it),_KeyOfValue()(*prev))) + { + is_sorted = false; + ++it; + break; + } + prev = it; + ++it; + } + while (it != __last) + { + ++dist; + ++it; + } + return is_sorted; + } + + /** @brief Check whether a random-access sequence is sorted, + * calculate its size, and obtain intermediate accessors to the + * sequence to ease parallelization. + * + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param access Array of size @c num_pieces + 1 that defines @c + * num_pieces subsequences of the original sequence (out). Each + * position @c i will contain an iterator to the first element in + * the subsequence @c i. + * @param beg_partition Array of size @c num_pieces + 1 that + * defines @c num_pieces subsequences of the original sequence + * (out). Each position @c i will contain the rank of the first + * element in the subsequence @c i. + * @param dist Size of the sequence (out) + * @param num_pieces Number of pieces to generate. + * @return Sequence is sorted. */ + template + bool + is_sorted_distance_accessors(const _RandomAccessIterator __first, const _RandomAccessIterator __last, _RandomAccessIterator* access, size_type* beg_partition, size_type& dist, thread_index_t& num_pieces, std::random_access_iterator_tag) const + { + bool is_sorted = is_sorted_distance(__first, __last, dist,std::__iterator_category(__first)); + if (dist < (unsigned int) num_pieces) + num_pieces = dist; + + // Do it opposite way to use accessors in equal function??? + range_accessors(__first,__last, access, beg_partition, dist, num_pieces, std::__iterator_category(__first)); + return is_sorted; + } + + /** @brief Check whether an input sequence is sorted, calculate + * its size, and obtain intermediate accessors to the sequence to + * ease parallelization. + * + * The list partitioning tool is used so that all the work is + * done in only one traversal. + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param access Array of size @c num_pieces + 1 that defines @c + * num_pieces subsequences of the original sequence (out). Each + * position @c i will contain an iterator to the first element in + * the subsequence @c i. + * @param beg_partition Array of size @c num_pieces + 1 that + * defines @c num_pieces subsequences of the original sequence + * (out). Each position @c i will contain the rank of the first + * element in the subsequence @c i. + * @param dist Size of the sequence (out) + * @param num_pieces Number of pieces to generate. + * @return Sequence is sorted. */ + template + bool + is_sorted_distance_accessors(const _InputIterator __first, const _InputIterator __last, _InputIterator* access, size_type* beg_partition, size_type& dist, thread_index_t& num_pieces, std::input_iterator_tag) const + { + is_sorted_functor<_InputIterator, _Compare> sorted(__first, base_type::_M_impl._M_key_compare); + dist = list_partition(__first, __last, access, (beg_partition+1), num_pieces, sorted, 0); + + // Calculate the rank of the begining each partition from the + // sequence sizes (what is stored at this point in beg_partition + // array). + beg_partition[0] = 0; + for (int i = 0; i < num_pieces; ++i) + { + beg_partition[i+1] += beg_partition[i]; + } + + return sorted.is_sorted(); + } + + /** @brief Make a full copy of the elements of a sequence + * + * The unitialized_copy method from the stl is called in parallel + * using the access array to point to the beginning of each + * partition + * @param access Array of size @c num_threads + 1 that defines @c + * num_threads subsequences. Each position @c i contains an + * iterator to the first element in the subsequence @c i. + * @param beg_partition Array of size @c num_threads + 1 that + * defines @c num_threads subsequences. Each position @c i + * contains the rank of the first element in the subsequence @c + * i. + * @param out Begin iterator of output sequence. + * @param num_threads Number of threads to use. */ + template + static void + uninitialized_copy_from_accessors(_InputIterator* access, size_type* beg_partition, _OutputIterator out, const thread_index_t num_threads) + { +#pragma omp parallel num_threads(num_threads) + { + int iam = omp_get_thread_num(); + uninitialized_copy(access[iam], access[iam+1], out+beg_partition[iam]); + } + } + + /** @brief Make a copy of the pointers of the elements of a sequence + * @param access Array of size @c num_threads + 1 that defines @c + * num_threads subsequences. Each position @c i contains an + * iterator to the first element in the subsequence @c i. + * @param beg_partition Array of size @c num_threads + 1 that + * defines @c num_threads subsequences. Each position @c i + * contains the rank of the first element in the subsequence @c + * i. + * @param out Begin iterator of output sequence. + * @param num_threads Number of threads to use. */ + template + static void + uninitialized_ptr_copy_from_accessors(_InputIterator* access, size_type* beg_partition, _OutputIterator out, const thread_index_t num_threads) + { +#pragma omp parallel num_threads(num_threads) + { + int iam = omp_get_thread_num(); + _OutputIterator itout = out + beg_partition[iam]; + for (_InputIterator it = access[iam]; it != access[iam+1]; ++it) + { + *itout = &(*it); + ++itout; + } + } + } + + /** @brief Split a sorted node array in two parts according to a key. + * + * For unique containers, if the splitting key is in the array of + * nodes, the corresponding node is erased. + * @param r Array of nodes containing the nodes to split (among others) + * @param pos_beg Position of the first node in the array of + * nodes to be considered + * @param pos_end Position of the first node in the array of + * nodes to be not considered + * @param key Splitting key + * @param pos_beg_right Position of the first node in the + * resulting right partition (out) + * @param existing Number of existing elements before dividing + * (in) and after (out). Specificically, the counter is + * incremented by one for unique containers if the splitting key + * was already in the array of nodes. + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + * @return Position of the last node (not included) in the + * resulting left partition (out) + */ + template + size_type + divide(_Rb_tree_node_ptr* r, const size_type pos_beg, const size_type pos_end, const key_type& key, size_type& pos_beg_right, size_type& existing, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + pos_beg_right = std::lower_bound(r + pos_beg, r + pos_end, key, compare_node_key<_Compare>(base_type::_M_impl._M_key_compare)) - r; + + //Check if the element exists. + size_type pos_end_left = pos_beg_right; + + // If r[pos_beg_right] is equal to key, must be erased + /***** Dealing with repetitions (CORRECTNESS ISSUE) *****/ + _GLIBCXX_PARALLEL_ASSERT((pos_beg_right == pos_end) or not base_type::_M_impl._M_key_compare(base_type::_S_key(r[pos_beg_right]),key)); + _GLIBCXX_PARALLEL_ASSERT((pos_beg_right + 1 >= pos_end) or strictly_less_or_less_equal(key, base_type::_S_key(r[pos_beg_right + 1]))); + if (pos_beg_right != pos_end and not strictly_less_or_less_equal(key, base_type::_S_key(r[pos_beg_right]))) + { + _M_destroy_node(r[pos_beg_right]); + r[pos_beg_right] = NULL; + ++pos_beg_right; + ++existing; + } + _GLIBCXX_PARALLEL_ASSERT(pos_end_left <= pos_beg_right and pos_beg_right <= pos_end and pos_end_left >= pos_beg); + return pos_end_left; + } + + + /** @brief Parallelization helper method: Given a random-access + sequence of known size, divide it into pieces of almost the + same size. + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param access Array of size @c num_pieces + 1 that defines @c + * num_pieces subsequences. Each position @c i contains an + * iterator to the first element in the subsequence @c i. + * @param beg_partition Array of size @c num_pieces + 1 that + * defines @c num_pieces subsequences. Each position @c i + * contains the rank of the first element in the subsequence @c + * i. + * @param n Sequence size + * @param num_pieces Number of pieces. */ + template + static void + range_accessors(const _RandomAccessIterator __first, const _RandomAccessIterator __last, _RandomAccessIterator* access, size_type* beg_partition, const size_type n, const thread_index_t num_pieces, std::random_access_iterator_tag) + { + access[0] = __first; + for (int i=1; i< num_pieces; ++i) + { + access[i] = access[i-1] + (__last-__first)/num_pieces; + beg_partition[i]= beg_partition[i-1]+ (__last-__first)/num_pieces; + } + beg_partition[num_pieces] = __last - access[num_pieces-1] + beg_partition[num_pieces-1]; + access[num_pieces]= __last; + } + + /** @brief Parallelization helper method: Given an input-access + sequence of known size, divide it into pieces of almost the + same size. + * @param __first Begin iterator of sequence. + * @param __last End iterator of sequence. + * @param access Array of size @c num_pieces + 1 that defines @c + * num_pieces subsequences. Each position @c i contains an + * iterator to the first element in the subsequence @c i. + * @param beg_partition Array of size @c num_pieces + 1 that + * defines @c num_pieces subsequences. Each position @c i + * contains the rank of the first element in the subsequence @c + * i. + * @param n Sequence size + * @param num_pieces Number of pieces. */ + template + static void + range_accessors(const _InputIterator __first, const _InputIterator __last, _InputIterator* access, size_type* beg_partition, const size_type n, const thread_index_t num_pieces, std::input_iterator_tag) + { + access[0] = __first; + _InputIterator it= __first; + for (int i=1; i< num_pieces; ++i) + { + for (int j=0; j< n/num_pieces; ++j) + ++it; + access[i] = it; + beg_partition[i]= n/num_pieces + beg_partition[i-1]; + } + access[num_pieces] = __last; + beg_partition[num_pieces] = n - (num_pieces-1)*(n/num_pieces) + beg_partition[num_pieces-1]; + } + + /** @brief Initialize an array of concatenation problems for bulk + insertion. They are linked as a tree with (end - beg) leaves. + * @param conc Array of concatenation problems pointers to initialize. + * @param beg Rank of the first leave to initialize + * @param end Rank of the last (not included) leave to initialize + * @param parent Pointer to the parent concatenation problem. + */ + static concat_problem* + _M_bulk_insertion_initialize_upper_problems(concat_problem** conc, const int beg, const int end, concat_problem* parent) + { + if (beg + 1 == end) + { + conc[2*beg]->par_problem = parent; + return conc[2*beg]; + } + + int size = end - beg; + int mid = beg + size/2; + conc[2*mid-1]->par_problem = parent; + conc[2*mid-1]->left_problem = _M_bulk_insertion_initialize_upper_problems(conc, beg, mid, conc[2*mid-1]); + conc[2*mid-1]->right_problem = _M_bulk_insertion_initialize_upper_problems(conc, mid, end, conc[2*mid-1]); + return conc[2*mid-1]; + } + + + /** @brief Determine black height of a node recursively. + * @param t Node. + * @return Black height of the node. */ + static int + black_height(const _Rb_tree_node_ptr t) + { + if (t == NULL) return 0; + int bh = black_height (static_cast (t->_M_left)); + if (t->_M_color == std::_S_black) + ++bh; + return bh; + } + + /** @brief Color a leaf black + * @param t Leaf pointer. + * @param black_h Black height of @c t (out) */ + static void + make_black_leaf(const _Rb_tree_node_ptr t, int& black_h) + { + black_h = 0; + if (t != NULL) + { + _GLIBCXX_PARALLEL_ASSERT(t->_M_left == NULL and t->_M_right == NULL); + black_h = 1; + t->_M_color = std::_S_black; + } + } + + /** @brief Color a node black. + * @param t Node to color black. + * @param black_h Black height of @c t (out) */ + static void + make_leaf(const _Rb_tree_node_ptr t, int& black_h) + { + _GLIBCXX_PARALLEL_ASSERT(t != NULL); + black_h = 1; + t->_M_color = std::_S_black; + t->_M_left = NULL; + t->_M_right = NULL; + } + + /** @brief Construct a tree from a root, a left subtree and a + right subtree. + * @param root Root of constructed tree. + * @param l Root of left subtree. + * @param r Root of right subtree. + * @pre @c l, @c r are black. + */ + template + static _Rb_tree_node_ptr + plant(const _Rb_tree_node_ptr root, const _Rb_tree_node_ptr l, const _Rb_tree_node_ptr r) + { + S::left(root) = l; + S::right(root) = r; + if (l != NULL) + l->_M_parent = root; + if (r != NULL) + r->_M_parent = root; + root->_M_color = std::_S_red; + return root; + } + + /** @brief Concatenate two red-black subtrees using and an + intermediate node, which might be NULL + * @param root Intermediate node. + * @param l Left subtree. + * @param r Right subtree. + * @param black_h_l Black height of left subtree. + * @param black_h_r Black height of right subtree. + * @param t Tree resulting of the concatenation + * @param black_h Black height of the resulting tree + * @pre Left tree is higher than left tree + * @post @c t is correct red-black tree with height @c black_h. + */ + void + concatenate(_Rb_tree_node_ptr root, _Rb_tree_node_ptr l, _Rb_tree_node_ptr r, int black_h_l, int black_h_r, _Rb_tree_node_ptr& t, int& black_h) const + { +#ifndef NDEBUG + int count = 0, count1 = 0, count2 = 0; +#endif + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(l, count1)); + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(r, count2)); + + _GLIBCXX_PARALLEL_ASSERT(l != NULL ? l->_M_color != std::_S_red and black_h_l > 0 : black_h_l == 0); + _GLIBCXX_PARALLEL_ASSERT(r != NULL ? r->_M_color != std::_S_red and black_h_r > 0 : black_h_r == 0); + + if (black_h_l > black_h_r) + if (root != NULL) + concatenate(root, l, r, black_h_l, black_h_r, t, black_h); + else + { + if (r == NULL) + { + t = l; + black_h = black_h_l; + } + else + { + // XXX SHOULD BE the same as extract_min but slower. + /* + root = static_cast<_Rb_tree_node_ptr>(_Rb_tree_node_base::_S_minimum(r)); + split(r, _S_key(_Rb_tree_increment(root)), _S_key(root), root, t, r, black_h, black_h_r); + */ + extract_min(r, root, r, black_h_r); + _GLIBCXX_PARALLEL_ASSERT(root != NULL); + concatenate(root, l, r, black_h_l, black_h_r, t, black_h); + } + } + else + if (root != NULL) + concatenate(root, r, l, black_h_r, black_h_l, t, black_h); + else + { + if (l == NULL) + { + t = r; + black_h = black_h_r; + } + else + { + // XXX SHOULD BE the same as extract_max but slower + /* + root = static_cast<_Rb_tree_node_ptr>(_Rb_tree_node_base::_S_maximum(l)); + split(l, _S_key(root), _S_key(_Rb_tree_decrement(root)), root, l, t, black_h_l, black_h); + */ + extract_max(l, root, l, black_h_l); + _GLIBCXX_PARALLEL_ASSERT(root != NULL); + concatenate(root, r, l, black_h_r, black_h_l, t, black_h); + } + } +#ifndef NDEBUG + if (root!=NULL) ++count1; + _GLIBCXX_PARALLEL_ASSERT(t == NULL or t->_M_color == std::_S_black); + bool b = rb_verify_tree(t, count); + if (not b){ + _GLIBCXX_PARALLEL_ASSERT(false); + } + _GLIBCXX_PARALLEL_ASSERT(count1+count2 == count); +#endif + } + + /** @brief Concatenate two red-black subtrees using and a not NULL + * intermediate node. + * + * @c S is the symmetry parameter. + * @param rt Intermediate node. + * @param l Left subtree. + * @param r Right subtree. + * @param black_h_l Black height of left subtree. + * @param black_h_r Black height of right subtree. + * @param t Tree resulting of the concatenation + * @param black_h Black height of the resulting tree + * @pre Left tree is higher than right tree. @c rt != NULL + * @post @c t is correct red-black tree with height @c black_h. + */ + template + static void + concatenate(const _Rb_tree_node_ptr rt, _Rb_tree_node_ptr l, _Rb_tree_node_ptr r, int black_h_l, int black_h_r, _Rb_tree_node_ptr& t, int& black_h) + { + _Rb_tree_node_base* root = l; + _Rb_tree_node_ptr parent = NULL; + black_h = black_h_l; + _GLIBCXX_PARALLEL_ASSERT(black_h_l >= black_h_r); + while (black_h_l != black_h_r) + { + if (l->_M_color == std::_S_black) + --black_h_l; + parent = l; + l = static_cast<_Rb_tree_node_ptr>(S::right(l)); + _GLIBCXX_PARALLEL_ASSERT((black_h_l == 0 and (l == NULL or l->_M_color == std::_S_red)) or (black_h_l != 0 and l != NULL)); + _GLIBCXX_PARALLEL_ASSERT((black_h_r == 0 and (r == NULL or r->_M_color == std::_S_red)) or (black_h_r != 0 and r != NULL)); + } + if (l != NULL and l->_M_color == std::_S_red) + { + //the root needs to be black + parent = l; + l = static_cast<_Rb_tree_node_ptr>(S::right(l)); + } + _GLIBCXX_PARALLEL_ASSERT(l != NULL ? l->_M_color == std::_S_black : true); + _GLIBCXX_PARALLEL_ASSERT(r != NULL ? r->_M_color == std::_S_black : true); + t = plant(rt, l, r); + t->_M_parent = parent; + if (parent != NULL) + { + S::right(parent) = t; + black_h += _Rb_tree_rebalance(t, root); + t = static_cast<_Rb_tree_node_ptr> (root); + } + else + { + ++black_h; + t->_M_color = std::_S_black; + } + _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black); + } + + /** @brief Split a tree according to key in three parts: a left + * child, a right child and an intermediate node. + * + * Trees are concatenated once the recursive call returns. That + * is, from bottom to top (ie. smaller to larger), so the cost + * bounds for split hold. + * @param t Root of the tree to split. + * @param key Key to split according to. + * @param prev_k Key to split the intermediate node + * @param root Out parameter. If a node exists whose key is + * smaller or equal than @c key, but strictly larger than @c + * prev_k, this is returned. Otherwise, it is null. + * @param l Root of left subtree returned, nodes less than @c key. + * @param r Root of right subtree returned, nodes greater or + * equal than @c key. + * @param black_h_l Black height of the left subtree. + * @param black_h_r Black height of the right subtree. + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + * @return Black height of t */ + template + int + split(_Rb_tree_node_ptr t, const key_type& key, const key_type& prev_k, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l, _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r, StrictlyLessOrEqual strictly_less_or_less_equal) const + { + if (t != NULL) + { + // Must be initialized, in case we never go left!!! + root = NULL; + int h = split_not_null(t, key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_less_equal); +#ifndef NDEBUG + _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key)); + _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key)); + int count1, count2; + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(l, count1)); + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(r, count2)); + _GLIBCXX_PARALLEL_ASSERT(root == NULL or base_type::_M_impl._M_key_compare(prev_k, base_type::_S_key(root)) and not base_type::_M_impl._M_key_compare(key, base_type::_S_key(root))); + _GLIBCXX_PARALLEL_ASSERT(root != NULL or l==NULL or not base_type::_M_impl._M_key_compare(prev_k, base_type::_S_key(base_type::_S_maximum(l)))); +#endif + return h; + } + + r = NULL; + root = NULL; + l = NULL; + black_h_r = 0; + black_h_l = 0; + return 0; + } + + /** @brief Split a tree according to key in three parts: a left + * child, a right child and an intermediate node. + * + * @param t Root of the tree to split. + * @param key Key to split according to. + * @param prev_k Key to split the intermediate node + * @param root Out parameter. If a node exists whose key is + * smaller or equal than @c key, but strictly larger than @c + * prev_k, this is returned. Otherwise, it is null. + * @param l Root of left subtree returned, nodes less than @c key. + * @param r Root of right subtree returned, nodes greater or + * equal than @c key. + * @param black_h_l Black height of the left subtree. + * @param black_h_r Black height of the right subtree. + * @param strictly_less_or_equal Comparator to deal transparently + * with repetitions with respect to the uniqueness of the + * wrapping container + * @pre t != NULL + * @return Black height of t */ + template + int + split_not_null(const _Rb_tree_node_ptr t, const key_type& key, const key_type& prev_k, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l, _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r, StrictlyLessOrEqual strictly_less_or_equal) const + { + _GLIBCXX_PARALLEL_ASSERT (t != NULL); + int black_h, b_h; + int black_node = 0; + if (t->_M_color == std::_S_black) + ++black_node; + if (strictly_less_or_equal(key, base_type::_S_key(t))) + { + if (t->_M_left != NULL ) + { + // t->M_right is at most one node + // go to the left + b_h = black_h = split_not_null( static_cast<_Rb_tree_node_ptr>(t->_M_left), key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_equal); + // Moin root and right subtree to already existing right + // half, leave left subtree. + force_black_root(t->_M_right, b_h); + concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r); + } + else + { + // t->M_right is at most one node + r = t; + black_h_r = black_node; + force_black_root(r, black_h_r); + + black_h = 0; + l = NULL; + black_h_l = 0; + } + _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key)); + _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key)); + } + else + { + if (t->_M_right != NULL ) + { + // Go to the right. + if (strictly_less_or_equal(prev_k, base_type::_S_key(t))) + root = t; + b_h = black_h = split_not_null(static_cast<_Rb_tree_node_ptr>(t->_M_right), key, prev_k, root, l, r, black_h_l, black_h_r, strictly_less_or_equal); + // Join root and left subtree to already existing left + // half, leave right subtree. + force_black_root(t->_M_left, b_h); + if (root != t) + { + // There was another point where we went right. + concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l); + } + else + { + l = static_cast<_Rb_tree_node_ptr>(t->_M_left); + black_h_l = b_h; + } + _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key)); + _GLIBCXX_PARALLEL_ASSERT(r == NULL or not base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_minimum(r)),key)); + } + else + { + if (strictly_less_or_equal(prev_k, base_type::_S_key(t))) + { + root = t; + l= static_cast<_Rb_tree_node_ptr>(t->_M_left); + make_black_leaf(l, black_h_l); + _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key)); + } + else + { + l= t; + black_h_l = black_node; + force_black_root(l, black_h_l); + _GLIBCXX_PARALLEL_ASSERT(l == NULL or base_type::_M_impl._M_key_compare(base_type::_S_key(base_type::_S_maximum(l)),key)); + } + + r = NULL; + black_h = 0; + black_h_r = 0; + } + } + return black_h + black_node; + } + + /** @brief Color the root black and update the black height accordingly. + * + * @param t Root of the tree. + * @param black_h Black height of the tree @c t (out) */ + static void force_black_root(_Rb_tree_node_base* t, int& black_h) + { + if (t != NULL and t->_M_color == std::_S_red) + { + t->_M_color = std::_S_black; + ++ black_h; + } + } + + /** @brief Split the tree in two parts: the minimum element from a + tree (i.e. leftmost) and the rest (right subtree) + * @param t Root of the tree + * @param root Minimum element (out) + * @param r Right subtree: @c t - {@c root} + * @param black_h_r Black height of the right subtree. + * @return Black height of the original tree */ + int + extract_min(const _Rb_tree_node_ptr t, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& r, int& black_h_r) const + { + _GLIBCXX_PARALLEL_ASSERT (t != NULL); + int black_h, b_h; + int black_node = 0; + if (t->_M_color == std::_S_black) + ++black_node; + + if (t->_M_left != NULL ) + { + // t->M_right is at most one node + // go to the left + b_h = black_h = extract_min( static_cast<_Rb_tree_node_ptr>(t->_M_left), root, r, black_h_r); + + // Join root and right subtree to already existing right + // half, leave left subtree + force_black_root(t->_M_right, b_h); + concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r); + } + else + { + // t->M_right is at most one node + root = t; + if (t->_M_right == NULL) + { + r = NULL; + black_h_r = 0; + } + else + { + r = static_cast<_Rb_tree_node_ptr>(t->_M_right); + black_h_r = 1; + r->_M_color = std::_S_black; + } + black_h = 0; + } + return black_h + black_node; + } + + + /** @brief Split the tree in two parts: the greatest element from + a tree (i.e. rightmost) and the rest (left subtree) + * @param t Root of the tree + * @param root Maximum element (out) + * @param l Left subtree: @c t - {@c root} + * @param black_h_l Black height of the left subtree. + * @return Black height of the original tree */ + int + extract_max(const _Rb_tree_node_ptr t, _Rb_tree_node_ptr& root, _Rb_tree_node_ptr& l, int& black_h_l) const + { + _GLIBCXX_PARALLEL_ASSERT (t != NULL); + int black_h, b_h; + int black_node = 0; + if (t->_M_color == std::_S_black) + ++black_node; + + if (t->_M_right != NULL ) + { + b_h = black_h = extract_max(static_cast<_Rb_tree_node_ptr>(t->_M_right), root, l, black_h_l); + + // Join root and left subtree to already existing left half, + // leave right subtree. + force_black_root(t->_M_left, b_h); + + concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l); + } + else + { + root = t; + if (t->_M_left == NULL) + { + l = NULL; + black_h_l = 0; + } + else + { + l = static_cast<_Rb_tree_node_ptr>(t->_M_left); + black_h_l = 1; + l->_M_color = std::_S_black; + } + black_h = 0; + } + return black_h + black_node; + } + + /** @brief Split tree according to key in two parts: a left tree + * and a right subtree + * + * Trees are concatenated once the recursive call returns. That + * is, from bottom to top (ie. smaller to larger), so the cost + * bounds for split hold. + * @param t Root of the tree to split. + * @param key Key to split according to. + * @param l Root of left subtree returned, nodes less than @c key. + * @param r Root of right subtree returned, nodes greater than @c key. + * @param black_h_l Black height of the left subtree. + * @param black_h_r Black height of the right subtree. + * @return Black height of the original tree */ + int + split(const _Rb_tree_node_ptr t, const key_type& key, _Rb_tree_node_ptr& l, _Rb_tree_node_ptr& r, int& black_h_l, int& black_h_r) const + { + if (t != NULL) + { + int black_h, b_h; + int black_node = 0; + if (t->_M_color == std::_S_black) + ++black_node; + if (not (base_type::_M_impl._M_key_compare(base_type::_S_key(t), key))) + { + // Go to the left. + b_h = black_h = split( static_cast<_Rb_tree_node_ptr>(t->_M_left), key, l, r, black_h_l, black_h_r); + + // Join root and right subtree to already existing right + // half, leave left subtree. + force_black_root(t->_M_right, b_h); + concatenate(t, r, static_cast<_Rb_tree_node_ptr>(t->_M_right), black_h_r, b_h, r, black_h_r); + } + else + { + // Go to the right. + b_h = black_h = split(static_cast<_Rb_tree_node_ptr>(t->_M_right), key, l, r, black_h_l, black_h_r); + + // Join root and left subtree to already existing left + // half, leave right subtree. + force_black_root(t->_M_left, b_h); + concatenate(t, static_cast<_Rb_tree_node_ptr>(t->_M_left), l, b_h, black_h_l, l, black_h_l); + } + return black_h + black_node; + } + else + { + r = NULL; + l = NULL; + black_h_r = 0; + black_h_l = 0; + return 0; + } + } + + /** @brief Insert an existing node in tree and rebalance it, if + * appropriate. + * + * The keyword "local" is used because no attributes of the + * red-black tree are changed, so this insertion is not yet seen + * by the global data structure. + * @param t Root of tree to insert into. + * @param new_t Existing node to insert. + * @param existing Number of existing elements before insertion + * (in) and after (out). Specifically, the counter is incremented + * by one for unique containers if the key of new_t was already + * in the tree. + * @param black_h Black height of the resulting tree (out) + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + * @return Resulting tree after insertion */ + template + _Rb_tree_node_ptr + _M_insert_local(_Rb_tree_node_base* t, const _Rb_tree_node_ptr new_t, size_type& existing, int& black_h, StrictlyLessOrLessEqual strictly_less_or_less_equal) + { + _GLIBCXX_PARALLEL_ASSERT(t != NULL); + if (_M_insert_local_top_down(t, new_t, NULL, NULL, true, strictly_less_or_less_equal)) + { + t->_M_parent = NULL; + black_h += _Rb_tree_rebalance(new_t, t); + _GLIBCXX_PARALLEL_ASSERT(t->_M_color == std::_S_black); + return static_cast<_Rb_tree_node_ptr>(t); + } + else + { + base_type::_M_destroy_node(new_t); + ++existing; + force_black_root(t, black_h); + return static_cast<_Rb_tree_node_ptr>(t); + } + } + + /***** Dealing with repetitions (CORRECTNESS ISSUE) *****/ + /** @brief Insert an existing node in tree, do no rebalancing. + * @param t Root of tree to insert into. + * @param new_t Existing node to insert. + * @param eq_t Node candidate to be equal than new_t, only + * relevant for unique containers + * @param parent Parent node of @c t + * @param is_left True if @c t is a left child of @c + * parent. False otherwise. + * @param strictly_less_or_less_equal Comparator to deal + * transparently with repetitions with respect to the uniqueness + * of the wrapping container + + * @return Success of the insertion + */ + template + bool + _M_insert_local_top_down(_Rb_tree_node_base* t, const _Rb_tree_node_ptr new_t, _Rb_tree_node_base* eq_t, _Rb_tree_node_base* parent, const bool is_left, StrictlyLessOrLessEqual strictly_less_or_less_equal) const + { + if (t != NULL) + { + if (strictly_less_or_less_equal(_S_key(new_t), _S_key(static_cast<_Rb_tree_node_ptr>(t)))) + { + return _M_insert_local_top_down(t->_M_left, new_t, eq_t, t, true, strictly_less_or_less_equal); + } + else + { + return _M_insert_local_top_down(t->_M_right, new_t, t, t, false, strictly_less_or_less_equal); + } + } + + _GLIBCXX_PARALLEL_ASSERT(parent != NULL); + + // Base case. + if (eq_t == NULL or strictly_less_or_less_equal(_S_key(static_cast<_Rb_tree_node_ptr>(eq_t)), _S_key(new_t))) + { + // The element to be inserted did not existed. + if (is_left) + { + parent->_M_left = new_t; + } + else + { + parent->_M_right = new_t; + } + + new_t->_M_parent = parent; + new_t->_M_left = NULL; + new_t->_M_right = NULL; + new_t->_M_color = std::_S_red; + + return true; + } + else + return false; + } + + /** @brief Rebalance a tree locally. + * + * Essentially, it is the same function as insert_erase from the + * base class, but without the insertion and without using any + * tree attributes. + * @param __x Root of the current subtree to rebalance. + * @param __root Root of tree where @c __x is in (rebalancing + * stops when root is reached) + * @return Increment in the black height after rebalancing + */ + static int + _Rb_tree_rebalance(_Rb_tree_node_base* __x, _Rb_tree_node_base*& __root) + { + _GLIBCXX_PARALLEL_ASSERT(__root->_M_color == std::_S_black); + // Rebalance. + while (__x != __root and __x->_M_parent != __root and + __x->_M_parent->_M_color == std::_S_red) + { + _Rb_tree_node_base* const __xpp = __x->_M_parent->_M_parent; + + if (__x->_M_parent == __xpp->_M_left) + { + _Rb_tree_node_base* const __y = __xpp->_M_right; + if (__y && __y->_M_color == std::_S_red) + { + __x->_M_parent->_M_color = std::_S_black; + __y->_M_color = std::_S_black; + __xpp->_M_color = std::_S_red; + __x = __xpp; + } + else + { + if (__x == __x->_M_parent->_M_right) + { + __x = __x->_M_parent; + std::_Rb_tree_rotate_left(__x, __root); + } + __x->_M_parent->_M_color = std::_S_black; + __xpp->_M_color = std::_S_red; + std::_Rb_tree_rotate_right(__xpp, __root); + } + } + else + { + _Rb_tree_node_base* const __y = __xpp->_M_left; + if (__y && __y->_M_color == std::_S_red) + { + __x->_M_parent->_M_color = std::_S_black; + __y->_M_color = std::_S_black; + __xpp->_M_color = std::_S_red; + __x = __xpp; + } + else + { + if (__x == __x->_M_parent->_M_left) + { + __x = __x->_M_parent; + std::_Rb_tree_rotate_right(__x, __root); + } + __x->_M_parent->_M_color = std::_S_black; + __xpp->_M_color = std::_S_red; + std::_Rb_tree_rotate_left(__xpp, __root); + } + } + } + if (__root->_M_color == std::_S_red) + { + __root->_M_color = std::_S_black; + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(static_cast(__root))); + return 1; + } + _GLIBCXX_PARALLEL_ASSERT(rb_verify_tree(static_cast(__root))); + return 0; + } + + /** @brief Analogous to class method rb_verify() but only for a subtree. + * @param __x Pointer to root of subtree to check. + * @param count Returned number of nodes. + * @return Tree correct. + */ + bool + rb_verify_tree(const typename base_type::_Const_Link_type __x, int& count) const + { + int bh; + return rb_verify_tree_node(__x) and rb_verify_tree(__x, count, bh); + } + + /** @brief Verify that a subtree is binary search tree (verifies + key relationships) + * @param __x Pointer to root of subtree to check. + * @return Tree correct. + */ + bool + rb_verify_tree_node(const typename base_type::_Const_Link_type __x) const + { + if (__x == NULL) + return true; + else + { + return rb_verify_node(__x) and + rb_verify_tree_node(base_type::_S_left(__x)) and + rb_verify_tree_node( base_type::_S_right(__x)); + } + } + + /** @brief Verify all the properties of a red-black tree except + for the key ordering + * @param __x Pointer to (subtree) root node. + * @return Tree correct. + */ + static bool + rb_verify_tree(const typename base_type::_Const_Link_type __x) + { + int bh, count; + return rb_verify_tree(__x, count, bh); + } + + /** @brief Verify all the properties of a red-black tree except + for the key ordering + * @param __x Pointer to (subtree) root node. + * @param count Number of nodes of @c __x (out). + * @param black_h Black height of @c __x (out). + * @return Tree correct. + */ + static bool + rb_verify_tree(const typename base_type::_Const_Link_type __x, int& count, int& black_h) + { + if (__x == NULL) + { + count = 0; + black_h = 0; + return true; + } + typename base_type::_Const_Link_type __L = base_type::_S_left(__x); + typename base_type::_Const_Link_type __R = base_type::_S_right(__x); + int countL, countR = 0, bhL, bhR; + bool ret = rb_verify_tree(__L, countL, bhL); + ret = ret and rb_verify_tree(__R, countR, bhR); + count = 1 + countL + countR; + ret = ret and bhL == bhR; + black_h = bhL + ((__x->_M_color == std::_S_red)? 0 : 1); + return ret; + } + + /** @brief Verify red-black properties (including key based) for a node + * @param __x Pointer to node. + * @return Node correct. + */ + bool + rb_verify_node(const typename base_type::_Const_Link_type __x) const + { + typename base_type::_Const_Link_type __L = base_type::_S_left(__x); + typename base_type::_Const_Link_type __R = base_type::_S_right(__x); + if (__x->_M_color == std::_S_red) + if ((__L && __L->_M_color == std::_S_red) + || (__R && __R->_M_color == std::_S_red)) + { + return false; + } + if (__L != NULL) + { + __L = static_cast(base_type::_S_maximum(__L)); + if (base_type::_M_impl._M_key_compare(base_type::_S_key(__x), base_type::_S_key(__L))) + { + return false; + } + } + + if (__R != NULL) + { + __R = static_cast(base_type::_S_minimum(__R)); + if (base_type::_M_impl._M_key_compare(base_type::_S_key(__R), base_type::_S_key(__x))) + { + return false; + } + } + + return true; + } + + /** @brief Print all the information of the root. + * @param t Root of the tree. + */ + static void + print_root(_Rb_tree_node_base* t) + { + /* + if (t != NULL) + std::cout<< base_type::_S_key(t) << std::endl; + else + std::cout<< "NULL" << std::endl; + */ + } + + /** @brief Print all the information of the tree. + * @param t Root of the tree. + */ + static void + print_tree(_Rb_tree_node_base* t) + { + /* + if (t != NULL) + { + print_tree(t->_M_left); + std::cout<< base_type::_S_key(t) << std::endl; + print_tree(t->_M_right); + } + */ + } + + /** @brief Print blanks. + * @param b Number of blanks to print. + * @return A string with @c b blanks */ + inline static std::string + blanks(int b) + { + /* + std::string s = ""; + for (int i=0; i < b; ++i) + s += " "; + return s; + */ + } + + /** @brief Print all the information of the tree. + * @param t Root of the tree. + * @param c Width of a printed key. + */ + template + static void + draw_tree(Pointer t, const int c) + { + /* + if (t == NULL) + { + std::cout << blanks(c) << "NULL" << std::endl; + return; + } + draw_tree(static_cast(t->_M_right), c + 8); + std::cout << blanks(c) << "" << base_type::_S_key(t) << " "; + if (t->_M_color == std::_S_black) + std::cout << "B" << std::endl; + else + std::cout << "R" << std::endl; + draw_tree(static_cast(t->_M_left), c + 8); + */ + } + + public: + /** @brief Verify that all the red-black tree properties hold for + the stored tree, as well as the additional properties that the + STL implementation imposes. + */ + bool + rb_verify() + { + if (base_type::_M_impl._M_node_count == 0 || base_type::begin() == base_type::end()) + { + bool res = base_type::_M_impl._M_node_count == 0 && base_type::begin() == base_type::end() + && base_type::_M_impl._M_header._M_left ==base_type::_M_end() + && base_type::_M_impl._M_header._M_right == base_type::_M_end(); + _GLIBCXX_PARALLEL_ASSERT(res); + return res; + } + size_type i=0; + unsigned int __len = _Rb_tree_black_count(base_type::_M_leftmost(), base_type::_M_root()); + for (typename base_type::const_iterator __it =base_type::begin(); __it != base_type::end(); ++__it) + { + typename base_type::_Const_Link_type __x = static_cast(__it._M_node); + if (not rb_verify_node(__x)) return false; + if (!base_type::_S_left(__x)&& !base_type::_S_right(__x) && _Rb_tree_black_count(__x,base_type::_M_root()) != __len) + { + _GLIBCXX_PARALLEL_ASSERT(false); + return false; + } + ++i; + } + + if (i != base_type::_M_impl._M_node_count) + printf("%ld != %ld\n", i, base_type::_M_impl._M_node_count); + + if (base_type::_M_leftmost() != std::_Rb_tree_node_base::_S_minimum(base_type::_M_root())) + { + _GLIBCXX_PARALLEL_ASSERT(false); + return false; + } + if (base_type::_M_rightmost() != std::_Rb_tree_node_base::_S_maximum(base_type::_M_root())) + { + _GLIBCXX_PARALLEL_ASSERT(false); + return false; + } + _GLIBCXX_PARALLEL_ASSERT(i == base_type::_M_impl._M_node_count); + return true; + } + }; + +} + +#endif diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h new file mode 100644 index 00000000000..5f8014f5c71 --- /dev/null +++ b/libstdc++-v3/include/parallel/types.h @@ -0,0 +1,98 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/types.h + * @brief Basic typedefs. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler and Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_TYPES_H +#define _GLIBCXX_PARALLEL_TYPES_H 1 + +#include + +namespace __gnu_parallel +{ + // XXX need to use + /** @brief 8-bit signed integer. */ + typedef char int8; + + /** @brief 8-bit unsigned integer. */ + typedef unsigned char uint8; + + /** @brief 16-bit signed integer. */ + typedef short int16; + + /** @brief 16-bit unsigned integer. */ + typedef unsigned short uint16; + + /** @brief 32-bit signed integer. */ + typedef int int32; + + /** @brief 32-bit unsigned integer. */ + typedef unsigned int uint32; + + /** @brief 64-bit signed integer. */ + typedef long long int64; + + /** @brief 64-bit unsigned integer. */ + typedef unsigned long long uint64; + + /** + * @brief Unsigned integer to index elements. + * The total number of elements for each algorithm must fit into this type. + */ + typedef uint64 sequence_index_t; + + /** + * @brief Unsigned integer to index a thread number. + * The maximum thread number (for each processor) must fit into this type. + */ + typedef uint16 thread_index_t; + + /** + * @brief Longest compare-and-swappable integer type on this platform. + */ + typedef int64 lcas_t; + + /** + * @brief Number of bits of ::lcas_t. + */ + static const int lcas_t_bits = sizeof(lcas_t) * 8; + + /** + * @brief ::lcas_t with the right half of bits set to 1. + */ + static const lcas_t lcas_t_mask = (((lcas_t)1 << (lcas_t_bits / 2)) - 1); +} + +#endif /* _GLIBCXX_TYPES_H */ diff --git a/libstdc++-v3/include/parallel/unique_copy.h b/libstdc++-v3/include/parallel/unique_copy.h new file mode 100644 index 00000000000..93a030429eb --- /dev/null +++ b/libstdc++-v3/include/parallel/unique_copy.h @@ -0,0 +1,193 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/unique_copy.h + * @brief Parallel implementations of std::unique_copy(). + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Robert Geisberger and Robin Dapp. + +#ifndef _GLIBCXX_PARALLEL_UNIQUE_H +#define _GLIBCXX_PARALLEL_UNIQUE_H 1 + +#include +#include + +namespace __gnu_parallel +{ + + /** @brief Parallel std::unique_copy(), without explicit equality predicate. + * @param first Begin iterator of input sequence. + * @param last End iterator of input sequence. + * @param result Begin iterator of result sequence. + * @param binary_pred Equality predicate. + * @return End iterator of result sequence. */ + template + inline OutputIterator + parallel_unique_copy(InputIterator first, InputIterator last, + OutputIterator result, BinaryPredicate binary_pred) + { + _GLIBCXX_CALL(last - first) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type size = last - first; + int num_threads = __gnu_parallel::get_max_threads(); + difference_type counter[num_threads + 1]; + + if (size == 0) + return result; + + // Let the first thread process two parts. + difference_type borders[num_threads + 2]; + __gnu_parallel::equally_split(size, num_threads + 1, borders); + + // First part contains at least one element. +#pragma omp parallel num_threads(num_threads) + { + int iam = omp_get_thread_num(); + + difference_type begin, end; + + // Check for length without duplicates + // Needed for position in output + difference_type i = 0; + OutputIterator out = result; + if (iam == 0) + { + begin = borders[0] + 1; // == 1 + end = borders[iam + 1]; + + i++; + new (static_cast(&*out)) value_type(*first); + out++; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter-1))) + { + i++; + new (static_cast(&*out)) value_type(*iter); + out++; + } + } + } + else + { + begin = borders[iam]; //one part + end = borders[iam + 1]; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter-1))) + { + i++; + } + } + } + counter[iam] = i; + + // Last part still untouched. + difference_type begin_output; + +#pragma omp barrier + + // Store result in output on calculated positions. + begin_output = 0; + + if (iam == 0) + { + for (int t = 0; t < num_threads; t++) + begin_output += counter[t]; + + i = 0; + + OutputIterator iter_out = result + begin_output; + + begin = borders[num_threads]; + end = size; + + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (iter == first || !binary_pred(*iter, *(iter-1))) + { + i++; + new (static_cast(&*iter_out)) value_type(*iter); + iter_out++; + } + } + + counter[num_threads] = i; + } + else + { + for (int t = 0; t < iam; t++) + begin_output += counter[t]; + + OutputIterator iter_out = result + begin_output; + for (InputIterator iter = first + begin; iter < first + end; ++iter) + { + if (!binary_pred(*iter, *(iter-1))) + { + new (static_cast (&*iter_out)) value_type(*iter); + iter_out++; + } + } + } + } + + difference_type end_output = 0; + for (int t = 0; t < num_threads + 1; t++) + end_output += counter[t]; + + return result + end_output; + } + + /** @brief Parallel std::unique_copy(), without explicit equality predicate + * @param first Begin iterator of input sequence. + * @param last End iterator of input sequence. + * @param result Begin iterator of result sequence. + * @return End iterator of result sequence. */ + template + inline OutputIterator + parallel_unique_copy(InputIterator first, InputIterator last, + OutputIterator result) + { + typedef typename std::iterator_traits::value_type value_type; + + return parallel_unique_copy(first, last, result, std::equal_to()); + } + +}//namespace __gnu_parallel + +#endif diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h new file mode 100644 index 00000000000..cc8f37e8d09 --- /dev/null +++ b/libstdc++-v3/include/parallel/workstealing.h @@ -0,0 +1,289 @@ +// -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, +// MA 02111-1307, USA. + +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +/** @file parallel/workstealing.h + * @brief Parallelization of embarrassingly parallel execution by + * means of work-stealing. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Felix Putze. + +#ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H +#define _GLIBCXX_PARALLEL_WORKSTEALING_H 1 + +#include +#include +#include + +namespace __gnu_parallel +{ + +#define _GLIBCXX_JOB_VOLATILE volatile + + /** @brief One job for a certain thread. */ + template + struct Job + { + typedef _DifferenceTp difference_type; + + /** @brief First element. + * + * Changed by owning and stealing thread. By stealing thread, + * always incremented. */ + _GLIBCXX_JOB_VOLATILE difference_type first; + + /** @brief Last element. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE difference_type last; + + /** @brief Number of elements, i. e. @c last-first+1. + * + * Changed by owning thread only. */ + _GLIBCXX_JOB_VOLATILE difference_type load; + }; + + /** @brief Work stealing algorithm for random access iterators. + * + * Uses O(1) additional memory. Synchronization at job lists is + * done with atomic operations. + * @param begin Begin iterator of element sequence. + * @param end End iterator of element sequence. + * @param op User-supplied functor (comparator, predicate, adding + * functor, ...). + * @param f Functor to "process" an element with op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param r Functor to "add" a single result to the already + * processed elements (depends on functionality). + * @param base Base value for reduction. + * @param output Pointer to position where final result is written to + * @param bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + Op + for_each_template_random_access_workstealing(RandomAccessIterator begin, + RandomAccessIterator end, + Op op, Fu& f, Red r, + Result base, Result& output, + typename std::iterator_traits::difference_type bound) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits traits_type; + typedef typename traits_type::difference_type difference_type; + + + difference_type chunk_size = static_cast(Settings::workstealing_chunk_size); + + // How many jobs? + difference_type length = (bound < 0) ? (end - begin) : bound; + + // To avoid false sharing in a cache line. + const int stride = Settings::cache_line_size * 10 / sizeof(Job) + 1; + + // Total number of threads currently working. + thread_index_t busy = 0; + thread_index_t num_threads = get_max_threads(); + difference_type num_threads_min = num_threads < end - begin ? num_threads : end - begin; + + // No more threads than jobs, at least one thread. + difference_type num_threads_max = num_threads_min > 1 ? num_threads_min : 1; + num_threads = static_cast(num_threads_max); + + // Create job description array. + Job *job = new Job[num_threads * stride]; + + // Write base value to output. + output = base; + +#pragma omp parallel shared(busy) num_threads(num_threads) + { + // Initialization phase. + + // Flags for every thread if it is doing productive work. + bool iam_working = false; + + // Thread id. + thread_index_t iam = omp_get_thread_num(); + + // This job. + Job& my_job = job[iam * stride]; + + // Random number (for work stealing). + thread_index_t victim; + + // Local value for reduction. + Result result = Result(); + + // Number of elements to steal in one attempt. + difference_type steal; + + // Every thread has its own random number generator (modulo num_threads). + random_number rand_gen(iam, num_threads); + +#pragma omp atomic + // This thread is currently working. + busy++; + + iam_working = true; + + // How many jobs per thread? last thread gets the rest. + my_job.first = static_cast(iam * (length / num_threads)); + + my_job.last = (iam == (num_threads - 1)) ? (length - 1) : ((iam + 1) * (length / num_threads) - 1); + my_job.load = my_job.last - my_job.first + 1; + + // Init result with first value (to have a base value for reduction). + if (my_job.first <= my_job.last) + { + // Cannot use volatile variable directly. + difference_type my_first = my_job.first; + result = f(op, begin + my_first); + my_job.first++; + my_job.load--; + } + + RandomAccessIterator current; + +#pragma omp barrier + + // Actual work phase + // Work on own or stolen start + while (busy > 0) + { + // Work until no productive thread left. +#pragma omp flush(busy) + + // Thread has own work to do + while (my_job.first <= my_job.last) + { + // fetch-and-add call + // Reserve current job block (size chunk_size) in my queue. + difference_type current_job = fetch_and_add(&(my_job.first), chunk_size); + + // Update load, to make the three values consistent, + // first might have been changed in the meantime + my_job.load = my_job.last - my_job.first + 1; + for (difference_type job_counter = 0; job_counter < chunk_size && current_job <= my_job.last; job_counter++) + { + // Yes: process it! + current = begin + current_job; + current_job++; + + // Do actual work. + result = r(result, f(op, current)); + } + +#pragma omp flush(busy) + + } + + // After reaching this point, a thread's job list is empty. + if (iam_working) + { +#pragma omp atomic + // This thread no longer has work. + busy--; + + iam_working = false; + } + + difference_type supposed_first, supposed_last, supposed_load; + do + { + // Find random nonempty deque (not own) and do consistency check. + yield(); +#pragma omp flush(busy) + victim = rand_gen(); + supposed_first = job[victim * stride].first; + supposed_last = job[victim * stride].last; + supposed_load = job[victim * stride].load; + } + while (busy > 0 + && ((supposed_load <= 0) || ((supposed_first + supposed_load - 1) != supposed_last))); + + if (busy == 0) + break; + + if (supposed_load > 0) + { + // Has work and work to do. + // Number of elements to steal (at least one). + steal = (supposed_load < 2) ? 1 : supposed_load / 2; + + // Protects against stealing threads + // omp_set_lock(&(job[victim * stride].lock)); + + // Push victim's start forward. + difference_type stolen_first = fetch_and_add(&(job[victim * stride].first), steal); + difference_type stolen_try = stolen_first + steal - difference_type(1); + + // Protects against working thread + // omp_unset_lock(&(job[victim * stride].lock)); + + my_job.first = stolen_first; + + // Avoid std::min dependencies. + my_job.last = stolen_try < supposed_last ? stolen_try : supposed_last; + + my_job.load = my_job.last - my_job.first + 1; + + //omp_unset_lock(&(my_job.lock)); + +#pragma omp atomic + // Has potential work again. + busy++; + iam_working = true; + +#pragma omp flush(busy) + } +#pragma omp flush(busy) + } // end while busy > 0 +#pragma omp critical(writeOutput) + // Add accumulated result to output. + output = r(output, result); + + //omp_destroy_lock(&(my_job.lock)); + } + + delete[] job; + + // Points to last element processed (needed as return value for + // some algorithms like transform) + f.finish_iterator = begin + length; + + return op; + } +} // end namespace + +#endif diff --git a/libstdc++-v3/include/std/algorithm b/libstdc++-v3/include/std/algorithm index 8f88cc0378c..9c61f2854fb 100644 --- a/libstdc++-v3/include/std/algorithm +++ b/libstdc++-v3/include/std/algorithm @@ -66,4 +66,8 @@ #include #include +#ifdef _GLIBCXX_PARALLEL +# include +#endif + #endif /* _GLIBCXX_ALGORITHM */ diff --git a/libstdc++-v3/include/std/bitset b/libstdc++-v3/include/std/bitset index 9ead05915dc..c9f911195e9 100644 --- a/libstdc++-v3/include/std/bitset +++ b/libstdc++-v3/include/std/bitset @@ -62,7 +62,7 @@ ((__n) < 1 ? 0 : ((__n) + _GLIBCXX_BITSET_BITS_PER_WORD - 1) \ / _GLIBCXX_BITSET_BITS_PER_WORD) -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) /** * @if maint diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric index 7f21b3d9e8e..dcb32943fd9 100644 --- a/libstdc++-v3/include/std/numeric +++ b/libstdc++-v3/include/std/numeric @@ -67,4 +67,8 @@ #include #include +#ifdef _GLIBCXX_PARALLEL +# include +#endif + #endif /* _GLIBCXX_NUMERIC */ diff --git a/libstdc++-v3/include/std/string b/libstdc++-v3/include/std/string index 9bf779666c3..6848326d569 100644 --- a/libstdc++-v3/include/std/string +++ b/libstdc++-v3/include/std/string @@ -49,8 +49,12 @@ #include #include // For operators >>, <<, and getline. #include +#include +#include #include #include // For less +#include +#include #include #ifndef _GLIBCXX_EXPORT_TEMPLATE diff --git a/libstdc++-v3/libmath/Makefile.in b/libstdc++-v3/libmath/Makefile.in index f72792aebce..084d5a5ef71 100644 --- a/libstdc++-v3/libmath/Makefile.in +++ b/libstdc++-v3/libmath/Makefile.in @@ -86,6 +86,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -123,6 +124,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ diff --git a/libstdc++-v3/libsupc++/Makefile.in b/libstdc++-v3/libsupc++/Makefile.in index 3e714743a24..a7437cc55fa 100644 --- a/libstdc++-v3/libsupc++/Makefile.in +++ b/libstdc++-v3/libsupc++/Makefile.in @@ -141,6 +141,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -178,6 +179,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ diff --git a/libstdc++-v3/po/Makefile.in b/libstdc++-v3/po/Makefile.in index f9ec7e38ece..99047c85cb5 100644 --- a/libstdc++-v3/po/Makefile.in +++ b/libstdc++-v3/po/Makefile.in @@ -71,6 +71,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -108,6 +109,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ diff --git a/libstdc++-v3/scripts/check_performance b/libstdc++-v3/scripts/check_performance index b7bbd6d0202..4e70106a005 100755 --- a/libstdc++-v3/scripts/check_performance +++ b/libstdc++-v3/scripts/check_performance @@ -32,10 +32,12 @@ SH_FLAG="-Wl,--rpath -Wl,$BUILD_DIR/../../gcc \ -Wl,--rpath -Wl,$BUILD_DIR/src/.libs" ST_FLAG="-static" LINK=$SH_FLAG -CXX="$COMPILER $INCLUDES $PCH_FLAGS $FLAGS $LINK" +CXX="$COMPILER $INCLUDES $FLAGS $CXXFLAGS $LINK" LIBS="./libtestc++.a" TESTS_FILE="testsuite_files_performance" +echo CXX is $CXX + for NAME in `cat $TESTS_FILE` do RUN=true @@ -79,7 +81,7 @@ do EXE_NAME="`echo $FILE_NAME.exe`" $CXX $TESTNAME $LIBS -o $EXE_NAME if [ -f $EXE_NAME ]; then - ./$EXE_NAME >& tmp.$FILE_NAME + ./$EXE_NAME >& tmp.$FILE_NAME else echo "compile error:" echo "$CXX $TESTNAME $LIBS -o $EXE_NAME" diff --git a/libstdc++-v3/scripts/testsuite_flags.in b/libstdc++-v3/scripts/testsuite_flags.in index 16623e0f615..457adaf4d5f 100755 --- a/libstdc++-v3/scripts/testsuite_flags.in +++ b/libstdc++-v3/scripts/testsuite_flags.in @@ -58,6 +58,13 @@ case ${query} in CXXFLAGS_config="@SECTION_FLAGS@ @CXXFLAGS@ @EXTRA_CXX_FLAGS@" echo ${CXXFLAGS_default} ${CXXFLAGS_config} ;; + --cxxparallelflags) + CXXFLAGS_parallel="-D_GLIBCXX_PARALLEL -fopenmp + -B${BUILD_DIR}/../libgomp + -I${BUILD_DIR}/../libgomp + -L${BUILD_DIR}/../libgomp/.libs -lgomp" + echo ${CXXFLAGS_parallel} + ;; --cxxpchflags) PCHFLAGS="@glibcxx_PCHFLAGS@" echo ${PCHFLAGS} diff --git a/libstdc++-v3/src/Makefile.am b/libstdc++-v3/src/Makefile.am index 419580190c5..0c000b07ba6 100644 --- a/libstdc++-v3/src/Makefile.am +++ b/libstdc++-v3/src/Makefile.am @@ -112,8 +112,7 @@ atomicity.cc: ${atomicity_file} # Source files linked in via configuration/make substitution for a # particular host, but with ad hoc naming rules. host_sources_extra = \ - basic_file.cc \ - c++locale.cc + basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources} c++locale.cc: ${glibcxx_srcdir}/$(CLOCALE_CC) $(LN_S) ${glibcxx_srcdir}/$(CLOCALE_CC) ./$@ || true @@ -121,6 +120,12 @@ c++locale.cc: ${glibcxx_srcdir}/$(CLOCALE_CC) basic_file.cc: ${glibcxx_srcdir}/$(BASIC_FILE_CC) $(LN_S) ${glibcxx_srcdir}/$(BASIC_FILE_CC) ./$@ || true +if ENABLE_PARALLEL +parallel_sources = parallel_list.cc +else +parallel_sources = +endif + if GLIBCXX_LDBL_COMPAT ldbl_compat_sources = compatibility-ldbl.cc else @@ -137,7 +142,6 @@ sources = \ complex_io.cc \ ctype.cc \ debug.cc \ - debug_list.cc \ functexcept.cc \ hash.cc \ hash_c++0x.cc \ @@ -148,6 +152,7 @@ sources = \ ios_locale.cc \ limits.cc \ list.cc \ + debug_list.cc \ locale.cc \ locale_init.cc \ locale_facets.cc \ @@ -175,8 +180,7 @@ sources = \ wlocale-inst.cc \ wstring-inst.cc \ ${host_sources} \ - ${host_sources_extra} \ - ${ldbl_compat_sources} + ${host_sources_extra} VPATH = $(top_srcdir)/src:$(top_srcdir) @@ -207,6 +211,12 @@ concept-inst.lo: concept-inst.cc concept-inst.o: concept-inst.cc $(CXXCOMPILE) -D_GLIBCXX_CONCEPT_CHECKS -fimplicit-templates -c $< +# Use special rules for parallel_list.cc compile. +parallel_list.lo: parallel_list.cc + $(LTCXXCOMPILE) -I$(glibcxx_builddir)/../libgomp -c $< +parallel_list.o: parallel_list.cc + $(CXXCOMPILE) -I$(glibcxx_builddir)/../libgomp -c $< + # Use special rules for the C++0x sources so that the proper flags are passed. system_error.lo: system_error.cc $(LTCXXCOMPILE) -std=gnu++0x -c $< diff --git a/libstdc++-v3/src/Makefile.in b/libstdc++-v3/src/Makefile.in index eb6e7235358..168be1cf761 100644 --- a/libstdc++-v3/src/Makefile.in +++ b/libstdc++-v3/src/Makefile.in @@ -69,9 +69,9 @@ toolexeclibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(toolexeclib_LTLIBRARIES) am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \ mt_allocator.cc codecvt.cc compatibility.cc complex_io.cc \ - ctype.cc debug.cc debug_list.cc functexcept.cc hash.cc \ - hash_c++0x.cc globals_io.cc ios.cc ios_failure.cc ios_init.cc \ - ios_locale.cc limits.cc list.cc locale.cc locale_init.cc \ + ctype.cc debug.cc functexcept.cc hash.cc hash_c++0x.cc \ + globals_io.cc ios.cc ios_failure.cc ios_init.cc ios_locale.cc \ + limits.cc list.cc debug_list.cc locale.cc locale_init.cc \ locale_facets.cc localename.cc stdexcept.cc strstream.cc \ system_error.cc tree.cc allocator-inst.cc concept-inst.cc \ fstream-inst.cc ext-inst.cc ios-inst.cc iostream-inst.cc \ @@ -81,25 +81,28 @@ am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \ wstring-inst.cc atomicity.cc codecvt_members.cc \ collate_members.cc ctype_members.cc messages_members.cc \ monetary_members.cc numeric_members.cc time_members.cc \ - basic_file.cc c++locale.cc compatibility-ldbl.cc + basic_file.cc c++locale.cc compatibility-ldbl.cc \ + parallel_list.cc am__objects_1 = atomicity.lo codecvt_members.lo collate_members.lo \ ctype_members.lo messages_members.lo monetary_members.lo \ numeric_members.lo time_members.lo -am__objects_2 = basic_file.lo c++locale.lo -@GLIBCXX_LDBL_COMPAT_TRUE@am__objects_3 = compatibility-ldbl.lo -am__objects_4 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \ +@GLIBCXX_LDBL_COMPAT_TRUE@am__objects_2 = compatibility-ldbl.lo +@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo +am__objects_4 = basic_file.lo c++locale.lo $(am__objects_2) \ + $(am__objects_3) +am__objects_5 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \ codecvt.lo compatibility.lo complex_io.lo ctype.lo debug.lo \ - debug_list.lo functexcept.lo hash.lo hash_c++0x.lo \ - globals_io.lo ios.lo ios_failure.lo ios_init.lo ios_locale.lo \ - limits.lo list.lo locale.lo locale_init.lo locale_facets.lo \ + functexcept.lo hash.lo hash_c++0x.lo globals_io.lo ios.lo \ + ios_failure.lo ios_init.lo ios_locale.lo limits.lo list.lo \ + debug_list.lo locale.lo locale_init.lo locale_facets.lo \ localename.lo stdexcept.lo strstream.lo system_error.lo \ tree.lo allocator-inst.lo concept-inst.lo fstream-inst.lo \ ext-inst.lo ios-inst.lo iostream-inst.lo istream-inst.lo \ istream.lo locale-inst.lo misc-inst.lo ostream-inst.lo \ sstream-inst.lo streambuf-inst.lo streambuf.lo string-inst.lo \ valarray-inst.lo wlocale-inst.lo wstring-inst.lo \ - $(am__objects_1) $(am__objects_2) $(am__objects_3) -am_libstdc___la_OBJECTS = $(am__objects_4) + $(am__objects_1) $(am__objects_4) +am_libstdc___la_OBJECTS = $(am__objects_5) libstdc___la_OBJECTS = $(am_libstdc___la_OBJECTS) DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = @@ -121,6 +124,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -158,6 +162,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ @@ -348,9 +354,10 @@ atomicity_file = ${glibcxx_srcdir}/$(ATOMICITY_SRCDIR)/atomicity.h # Source files linked in via configuration/make substitution for a # particular host, but with ad hoc naming rules. host_sources_extra = \ - basic_file.cc \ - c++locale.cc + basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources} +@ENABLE_PARALLEL_FALSE@parallel_sources = +@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc @GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources = @GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc @@ -364,7 +371,6 @@ sources = \ complex_io.cc \ ctype.cc \ debug.cc \ - debug_list.cc \ functexcept.cc \ hash.cc \ hash_c++0x.cc \ @@ -375,6 +381,7 @@ sources = \ ios_locale.cc \ limits.cc \ list.cc \ + debug_list.cc \ locale.cc \ locale_init.cc \ locale_facets.cc \ @@ -402,8 +409,7 @@ sources = \ wlocale-inst.cc \ wstring-inst.cc \ ${host_sources} \ - ${host_sources_extra} \ - ${ldbl_compat_sources} + ${host_sources_extra} libstdc___la_SOURCES = $(sources) libstdc___la_LIBADD = \ @@ -790,6 +796,12 @@ concept-inst.lo: concept-inst.cc concept-inst.o: concept-inst.cc $(CXXCOMPILE) -D_GLIBCXX_CONCEPT_CHECKS -fimplicit-templates -c $< +# Use special rules for parallel_list.cc compile. +parallel_list.lo: parallel_list.cc + $(LTCXXCOMPILE) -I$(glibcxx_builddir)/../libgomp -c $< +parallel_list.o: parallel_list.cc + $(CXXCOMPILE) -I$(glibcxx_builddir)/../libgomp -c $< + # Use special rules for the C++0x sources so that the proper flags are passed. system_error.lo: system_error.cc $(LTCXXCOMPILE) -std=gnu++0x -c $< diff --git a/libstdc++-v3/src/list.cc b/libstdc++-v3/src/list.cc index e1f82e69c5d..0e70d231998 100644 --- a/libstdc++-v3/src/list.cc +++ b/libstdc++-v3/src/list.cc @@ -55,7 +55,7 @@ #include -_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD) +_GLIBCXX_BEGIN_NESTED_NAMESPACE(std, _GLIBCXX_STD_D) void _List_node_base::swap(_List_node_base& __x, _List_node_base& __y) diff --git a/libstdc++-v3/src/parallel_list.cc b/libstdc++-v3/src/parallel_list.cc new file mode 100644 index 00000000000..a79a886904c --- /dev/null +++ b/libstdc++-v3/src/parallel_list.cc @@ -0,0 +1,32 @@ +// Parallel mode support code for list -*- C++ -*- + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +#define _GLIBCXX_PARALLEL + +#include "list.cc" diff --git a/libstdc++-v3/testsuite/25_algorithms/headers/algorithm_parallel_mode.cc b/libstdc++-v3/testsuite/25_algorithms/headers/algorithm_parallel_mode.cc new file mode 100644 index 00000000000..c2e56c2bd35 --- /dev/null +++ b/libstdc++-v3/testsuite/25_algorithms/headers/algorithm_parallel_mode.cc @@ -0,0 +1,23 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-D_GLIBCXX_PARALLEL -fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include diff --git a/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm.cc b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm.cc new file mode 100644 index 00000000000..90e02796263 --- /dev/null +++ b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm.cc @@ -0,0 +1,23 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include diff --git a/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed1.cc b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed1.cc new file mode 100644 index 00000000000..c6c4ce2ff43 --- /dev/null +++ b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed1.cc @@ -0,0 +1,37 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include +#include +#include + +void test() +{ + typedef unsigned short value_type; + typedef std::vector vector_type; + + const value_type c(0); + + vector_type v(10); + std::find(v.begin(), v.end(), c); + __gnu_parallel::find(v.begin(), v.end(), c); +} diff --git a/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed2.cc b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed2.cc new file mode 100644 index 00000000000..94794d46c9f --- /dev/null +++ b/libstdc++-v3/testsuite/25_algorithms/headers/parallel_algorithm_mixed2.cc @@ -0,0 +1,42 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +// Make sure to test without _GLIBCXX_PARALLEL +#ifdef _GLIBCXX_PARALLEL +# undef _GLIBCXX_PARALLEL +#endif + +#include +#include +#include + +void test() +{ + typedef unsigned short value_type; + typedef std::vector vector_type; + + const value_type c(0); + + vector_type v(10); + std::find(v.begin(), v.end(), c); + __gnu_parallel::find(v.begin(), v.end(), c); +} diff --git a/libstdc++-v3/testsuite/26_numerics/headers/numeric/numeric_parallel_mode.cc b/libstdc++-v3/testsuite/26_numerics/headers/numeric/numeric_parallel_mode.cc new file mode 100644 index 00000000000..3e3b3de15dd --- /dev/null +++ b/libstdc++-v3/testsuite/26_numerics/headers/numeric/numeric_parallel_mode.cc @@ -0,0 +1,23 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-D_GLIBCXX_PARALLEL -fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include diff --git a/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric.cc b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric.cc new file mode 100644 index 00000000000..d2d531e9ab6 --- /dev/null +++ b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric.cc @@ -0,0 +1,23 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include diff --git a/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed1.cc b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed1.cc new file mode 100644 index 00000000000..425fd59c34b --- /dev/null +++ b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed1.cc @@ -0,0 +1,38 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include +#include +#include +#include + +void test() +{ + typedef unsigned short value_type; + typedef std::vector vector_type; + + const value_type c(0); + + vector_type v(10); + std::accumulate(v.begin(), v.end(), value_type(1)); + __gnu_parallel::accumulate(v.begin(), v.end(), value_type(1)); +} diff --git a/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed2.cc b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed2.cc new file mode 100644 index 00000000000..d7bdfc1a511 --- /dev/null +++ b/libstdc++-v3/testsuite/26_numerics/headers/numeric/parallel_numeric_mixed2.cc @@ -0,0 +1,43 @@ +// { dg-do compile } +// { dg-require-parallel-mode "" } +// { dg-options "-fopenmp" { target *-*-* } } + +// Copyright (C) 2007 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +// Make sure to test without _GLIBCXX_PARALLEL +#ifdef _GLIBCXX_PARALLEL +# undef _GLIBCXX_PARALLEL +#endif + +#include +#include +#include +#include + +void test() +{ + typedef unsigned short value_type; + typedef std::vector vector_type; + + const value_type c(0); + + vector_type v(10); + std::accumulate(v.begin(), v.end(), value_type(1)); + __gnu_parallel::accumulate(v.begin(), v.end(), value_type(1)); +} diff --git a/libstdc++-v3/testsuite/Makefile.am b/libstdc++-v3/testsuite/Makefile.am index a881b9f1a2a..4a4dc442dd3 100644 --- a/libstdc++-v3/testsuite/Makefile.am +++ b/libstdc++-v3/testsuite/Makefile.am @@ -123,11 +123,41 @@ doc-performance: ${glibcxx_srcdir}/testsuite/data/make_graph_htmls.xml \ ${glibcxx_srcdir}/testsuite/data/make_graph_test_infos.xml local g++) +# Runs the testsuite in parallel mode. +libgomp_dir=${glibcxx_builddir}/../libgomp +libgomp_flags=-B${glibcxx_builddir}/../libgomp \ + -I${glibcxx_builddir}/../libgomp \ + -L${glibcxx_builddir}/../libgomp/.libs -lgomp + +atomic_flags=$(ATOMIC_FLAGS) +parallel_flags="unix/-D_GLIBCXX_PARALLEL/-fopenmp" + +check-parallel: site.exp + -@(if test ! -d $${libgomp_dir}; then \ + echo "Testing parallel mode failed as libgomp not present."; \ + exit 1; \ + fi; \ + outputdir=parallel; export outputdir; \ + if test ! -d $${outputdir}; then \ + mkdir $${outputdir}; \ + fi; \ + srcdir=`$(am__cd) $(srcdir) && pwd`; export srcdir; \ + EXPECT=$(EXPECT); export EXPECT; \ + $(MAKE) CXXFLAGS="$(atomic_flags) $(libgomp_flags)" $(AM_MAKEFLAGS) RUNTESTFLAGS="$(RUNTESTFLAGS) conformance.exp --outdir $${outputdir} --objdir $${outputdir} --target_board=$(parallel_flags)" check-DEJAGNU; ) + +check-performance-parallel: testsuite_files_performance ${performance_script} + -@(chmod + ${check_performance_script}; \ + CXXFLAGS="-D_GLIBCXX_PARALLEL -fopenmp $(atomic_flags) $(libgomp_flags)"; export CXXFLAGS; \ + ${check_performance_script} ${glibcxx_srcdir} ${glibcxx_builddir}) .PHONY: baseline_symbols new-abi-baseline \ - check-abi check-compile check-performance + check-abi check-compile check-performance check-parallel # By adding these files here, automake will remove them for 'make clean' CLEANFILES = *.txt *.tst *.exe core* filebuf_* tmp* ostream_* *.log *.sum \ testsuite_* site.exp abi_check baseline_symbols *TEST* *.dat \ - *.o *.cc *.a *.so *.xml + *.s *.o *.cc *.a *.so *.xml + +# To remove directories. +clean-local: + rm -rf parallel diff --git a/libstdc++-v3/testsuite/Makefile.in b/libstdc++-v3/testsuite/Makefile.in index d943b3f836d..b76a5ed194e 100644 --- a/libstdc++-v3/testsuite/Makefile.in +++ b/libstdc++-v3/testsuite/Makefile.in @@ -75,6 +75,7 @@ AMTAR = @AMTAR@ AR = @AR@ AS = @AS@ ATOMICITY_SRCDIR = @ATOMICITY_SRCDIR@ +ATOMIC_FLAGS = @ATOMIC_FLAGS@ ATOMIC_WORD_SRCDIR = @ATOMIC_WORD_SRCDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -112,6 +113,8 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_PARALLEL_FALSE = @ENABLE_PARALLEL_FALSE@ +ENABLE_PARALLEL_TRUE = @ENABLE_PARALLEL_TRUE@ ENABLE_SYMVERS_DARWIN_FALSE = @ENABLE_SYMVERS_DARWIN_FALSE@ ENABLE_SYMVERS_DARWIN_TRUE = @ENABLE_SYMVERS_DARWIN_TRUE@ ENABLE_SYMVERS_FALSE = @ENABLE_SYMVERS_FALSE@ @@ -303,10 +306,19 @@ check_performance_script = ${glibcxx_srcdir}/scripts/check_performance # Generates the plots and graphs for performance testing. doc_performance_script = ${glibcxx_srcdir}/scripts/make_graphs.py +# Runs the testsuite in parallel mode. +libgomp_dir = ${glibcxx_builddir}/../libgomp +libgomp_flags = -B${glibcxx_builddir}/../libgomp \ + -I${glibcxx_builddir}/../libgomp \ + -L${glibcxx_builddir}/../libgomp/.libs -lgomp + +atomic_flags = $(ATOMIC_FLAGS) +parallel_flags = "unix/-D_GLIBCXX_PARALLEL/-fopenmp" + # By adding these files here, automake will remove them for 'make clean' CLEANFILES = *.txt *.tst *.exe core* filebuf_* tmp* ostream_* *.log *.sum \ testsuite_* site.exp abi_check baseline_symbols *TEST* *.dat \ - *.o *.cc *.a *.so *.xml + *.s *.o *.cc *.a *.so *.xml all: all-am @@ -434,7 +446,7 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-generic clean-libtool mostlyclean-am +clean-am: clean-generic clean-libtool clean-local mostlyclean-am distclean: distclean-am -rm -f Makefile @@ -480,14 +492,15 @@ ps-am: uninstall-am: uninstall-info-am .PHONY: all all-am check check-DEJAGNU check-am clean clean-generic \ - clean-libtool distclean distclean-DEJAGNU distclean-generic \ - distclean-libtool distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-exec \ - install-exec-am install-info install-info-am install-man \ - install-strip installcheck installcheck-am installdirs \ - maintainer-clean maintainer-clean-generic mostlyclean \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - uninstall uninstall-am uninstall-info-am + clean-libtool clean-local distclean distclean-DEJAGNU \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-exec install-exec-am install-info \ + install-info-am install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am \ + uninstall-info-am # This rule generates all of the testsuite_files* lists at once. @@ -557,8 +570,30 @@ doc-performance: ${glibcxx_srcdir}/testsuite/data/make_graph_htmls.xml \ ${glibcxx_srcdir}/testsuite/data/make_graph_test_infos.xml local g++) +check-parallel: site.exp + -@(if test ! -d $${libgomp_dir}; then \ + echo "Testing parallel mode failed as libgomp not present."; \ + exit 1; \ + fi; \ + outputdir=parallel; export outputdir; \ + if test ! -d $${outputdir}; then \ + mkdir $${outputdir}; \ + fi; \ + srcdir=`$(am__cd) $(srcdir) && pwd`; export srcdir; \ + EXPECT=$(EXPECT); export EXPECT; \ + $(MAKE) CXXFLAGS="$(atomic_flags) $(libgomp_flags)" $(AM_MAKEFLAGS) RUNTESTFLAGS="$(RUNTESTFLAGS) conformance.exp --outdir $${outputdir} --objdir $${outputdir} --target_board=$(parallel_flags)" check-DEJAGNU; ) + +check-performance-parallel: testsuite_files_performance ${performance_script} + -@(chmod + ${check_performance_script}; \ + CXXFLAGS="-D_GLIBCXX_PARALLEL -fopenmp $(atomic_flags) $(libgomp_flags)"; export CXXFLAGS; \ + ${check_performance_script} ${glibcxx_srcdir} ${glibcxx_builddir}) + .PHONY: baseline_symbols new-abi-baseline \ - check-abi check-compile check-performance + check-abi check-compile check-performance check-parallel + +# To remove directories. +clean-local: + rm -rf parallel # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/libstdc++-v3/testsuite/lib/dg-options.exp b/libstdc++-v3/testsuite/lib/dg-options.exp index 29943ce2a6b..a13fa74cf63 100644 --- a/libstdc++-v3/testsuite/lib/dg-options.exp +++ b/libstdc++-v3/testsuite/lib/dg-options.exp @@ -35,6 +35,15 @@ proc dg-require-debug-mode { args } { return } +proc dg-require-parallel-mode { args } { + if { ![ check_v3_target_parallel_mode ] } { + upvar dg-do-what dg-do-what + set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"] + return + } + return +} + proc dg-require-fileio { args } { if { ![ check_v3_target_fileio ] } { upvar dg-do-what dg-do-what diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp index 460330527fd..136c2f7e7cb 100644 --- a/libstdc++-v3/testsuite/lib/libstdc++.exp +++ b/libstdc++-v3/testsuite/lib/libstdc++.exp @@ -383,6 +383,7 @@ proc v3_target_compile_as_c { source dest type options } { global flags_file global blddir global cc + global cxxflags if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } { lappend options "libs=${gluefile}" @@ -393,6 +394,7 @@ proc v3_target_compile_as_c { source dest type options } { set cc_final $cc set cxxlibglossflags [libgloss_link_flags] set cc_final [concat $cc_final $cxxlibglossflags] + set cc_final [concat $cc_final $cxxflags] set cc_final [concat $cc_final $includes] regsub -all {\s[-]nostdinc[+][+]} $cc_final "" cc_final @@ -884,3 +886,59 @@ proc check_v3_target_debug_mode { } { verbose "check_v3_target_debug_mode: $et_debug_mode" 2 return $et_debug_mode } + +proc check_v3_target_parallel_mode { } { + global cxxflags + global DEFAULT_CXXFLAGS + global et_parallel_mode + + global tool + + if { ![info exists et_parallel_mode_target_name] } { + set et_parallel_mode_target_name "" + } + + # If the target has changed since we set the cached value, clear it. + set current_target [current_target_name] + if { $current_target != $et_parallel_mode_target_name } { + verbose "check_v3_target_parallel_mode: `$et_parallel_mode_target_name'" 2 + set et_parallel_mode_target_name $current_target + if [info exists et_parallel_mode] { + verbose "check_v3_target_parallel_mode: removing cached result" 2 + unset et_parallel_mode + } + } + + if [info exists et_parallel_mode] { + verbose "check_v3_target_parallel_mode: using cached result" 2 + } else { + set et_parallel_mode 0 + + # Set up, compile, and execute a C++ test program that depends + # on parallel mode working. + set src parallel_mode[pid].cc + set exe parallel_mode[pid].exe + + set f [open $src "w"] + puts $f "#include " + puts $f "int main()" + puts $f "{ return 0; }" + close $f + + set cxxflags_saved $cxxflags + set cxxflags "$cxxflags $DEFAULT_CXXFLAGS -Werror" + + set lines [v3_target_compile $src $exe executable ""] + set cxxflags $cxxflags_saved + file delete $src + + if [string match "" $lines] { + # No error message, compilation succeeded. + set et_parallel_mode 1 + } else { + verbose "check_v3_target_parallel_mode: compilation failed" 2 + } + } + verbose "check_v3_target_parallel_mode: $et_parallel_mode" 2 + return $et_parallel_mode +}