From 7e2f0d2d77e4bc273fe00f99d970605d8e38d4d6 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 4 Feb 2013 10:16:33 +0100 Subject: [PATCH] Fix handling of collating symbols in regexps --- ChangeLog | 9 ++++++ NEWS | 4 +-- posix/Makefile | 3 +- posix/bug-regex35.c | 52 ++++++++++++++++++++++++++++++++ posix/regcomp.c | 72 +++++++++++++++++---------------------------- 5 files changed, 92 insertions(+), 48 deletions(-) create mode 100644 posix/bug-regex35.c diff --git a/ChangeLog b/ChangeLog index edb023475f..9ed5fa4d72 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2013-02-12 Andreas Schwab + + [BZ #11561] + * posix/regcomp.c (parse_bracket_exp): When looking up collating + elements compare against the byte sequence of it, not its name. + * posix/Makefile (tests): Add bug-regex35. + (bug-regex35-ENV): Define. + * posix/bug-regex35.c: New file. + 2013-02-11 Tom de Vries * string/str-two-way.h: Fix typo RESULT_TYPE -> RETURN_TYPE in diff --git a/NEWS b/NEWS index d6467e6af6..f5c6b5215f 100644 --- a/NEWS +++ b/NEWS @@ -9,8 +9,8 @@ Version 2.18 * The following bugs are resolved with this release: - 13951, 14142, 14200, 14317, 14327, 14496, 14964, 14981, 14982, 14985, - 14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062. + 11561, 13951, 14142, 14200, 14317, 14327, 14496, 14964, 14981, 14982, + 14985, 14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062. Version 2.17 diff --git a/posix/Makefile b/posix/Makefile index 57672d8837..88d409f303 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \ tst-rfc3484-3 \ tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \ bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \ - bug-getopt5 tst-getopt_long1 + bug-getopt5 tst-getopt_long1 bug-regex35 xtests := bug-ga2 ifeq (yes,$(build-shared)) test-srcs := globtest @@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex35-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests diff --git a/posix/bug-regex35.c b/posix/bug-regex35.c new file mode 100644 index 0000000000..7957e7f860 --- /dev/null +++ b/posix/bug-regex35.c @@ -0,0 +1,52 @@ +/* Test regcomp with collating symbols in bracket expressions + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + regex_t r; + + if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL) + { + puts ("setlocale failed"); + return 1; + } + + if (regcomp (&r, "[[.ch.]]", REG_NOSUB) != 0) + { + puts ("regcomp failed"); + return 1; + } + + if (regexec (&r, "ch", 0, 0, 0) != 0) + { + puts ("regexec failed"); + return 1; + } + + regfree (&r); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/posix/regcomp.c b/posix/regcomp.c index 88bd581d96..578044155b 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -2776,40 +2776,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Local function for parse_bracket_exp used in _LIBC environement. Seek the collating symbol entry correspondings to NAME. - Return the index of the symbol in the SYMB_TABLE. */ + Return the index of the symbol in the SYMB_TABLE, + or -1 if not found. */ auto inline int32_t __attribute ((always_inline)) - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; + seek_collating_symbol_entry (const unsigned char *name, size_t name_len) { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - int32_t second = hash % (table_size - 2) + 1; + int32_t elem; - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - return elem; + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + int32_t idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + if (/* Compare the length of the name. */ + name_len == extra[idx] + /* Compare the name. */ + && memcmp (name, &extra[idx + 1], name_len) == 0) + /* Yep, this is the entry. */ + return elem; + } + return -1; } /* Local function for parse_bracket_exp used in _LIBC environment. @@ -2818,8 +2807,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline unsigned int __attribute ((always_inline)) - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; + lookup_collation_sequence_value (bracket_elem_t *br_elem) { if (br_elem->type == SB_CHAR) { @@ -2847,7 +2835,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, int32_t elem, idx; elem = seek_collating_symbol_entry (br_elem->opr.name, sym_name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; @@ -2865,7 +2853,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Return the collation sequence value. */ return *(unsigned int *) (extra + idx); } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + else if (sym_name_len == 1) { /* No valid character. Match it as a single byte character. */ @@ -2887,11 +2875,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; - bitset_t sbcset; - bracket_elem_t *start_elem, *end_elem; + build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) { unsigned int ch; uint32_t start_collseq; @@ -2970,25 +2955,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; - bitset_t sbcset; - const unsigned char *name; + build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) { int32_t elem, idx; size_t name_len = strlen ((const char *) name); if (nrules != 0) { elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; /* Skip the name of collating element name. */ idx += 1 + extra[idx]; } - else if (symb_table[2 * elem] == 0 && name_len == 1) + else if (name_len == 1) { /* No valid character, treat it as a normal character. */