Fix handling of collating symbols in regexps
This commit is contained in:
parent
a175b684e2
commit
7e2f0d2d77
@ -1,3 +1,12 @@
|
|||||||
|
2013-02-12 Andreas Schwab <schwab@suse.de>
|
||||||
|
|
||||||
|
[BZ #11561]
|
||||||
|
* posix/regcomp.c (parse_bracket_exp): When looking up collating
|
||||||
|
elements compare against the byte sequence of it, not its name.
|
||||||
|
* posix/Makefile (tests): Add bug-regex35.
|
||||||
|
(bug-regex35-ENV): Define.
|
||||||
|
* posix/bug-regex35.c: New file.
|
||||||
|
|
||||||
2013-02-11 Tom de Vries <tom@codesourcery.com>
|
2013-02-11 Tom de Vries <tom@codesourcery.com>
|
||||||
|
|
||||||
* string/str-two-way.h: Fix typo RESULT_TYPE -> RETURN_TYPE in
|
* string/str-two-way.h: Fix typo RESULT_TYPE -> RETURN_TYPE in
|
||||||
|
4
NEWS
4
NEWS
@ -9,8 +9,8 @@ Version 2.18
|
|||||||
|
|
||||||
* The following bugs are resolved with this release:
|
* The following bugs are resolved with this release:
|
||||||
|
|
||||||
13951, 14142, 14200, 14317, 14327, 14496, 14964, 14981, 14982, 14985,
|
11561, 13951, 14142, 14200, 14317, 14327, 14496, 14964, 14981, 14982,
|
||||||
14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062.
|
14985, 14994, 14996, 15003, 15006, 15020, 15023, 15036, 15054, 15062.
|
||||||
|
|
||||||
|
|
||||||
Version 2.17
|
Version 2.17
|
||||||
|
@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \
|
|||||||
tst-rfc3484-3 \
|
tst-rfc3484-3 \
|
||||||
tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \
|
tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \
|
||||||
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
|
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
|
||||||
bug-getopt5 tst-getopt_long1
|
bug-getopt5 tst-getopt_long1 bug-regex35
|
||||||
xtests := bug-ga2
|
xtests := bug-ga2
|
||||||
ifeq (yes,$(build-shared))
|
ifeq (yes,$(build-shared))
|
||||||
test-srcs := globtest
|
test-srcs := globtest
|
||||||
@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
|
|||||||
bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
|
bug-regex35-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
tst-rxspencer-ARGS = --utf8 rxspencer/tests
|
tst-rxspencer-ARGS = --utf8 rxspencer/tests
|
||||||
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
|
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
tst-pcre-ARGS = PCRE.tests
|
tst-pcre-ARGS = PCRE.tests
|
||||||
|
52
posix/bug-regex35.c
Normal file
52
posix/bug-regex35.c
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/* Test regcomp with collating symbols in bracket expressions
|
||||||
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <locale.h>
|
||||||
|
#include <regex.h>
|
||||||
|
|
||||||
|
static int
|
||||||
|
do_test (void)
|
||||||
|
{
|
||||||
|
regex_t r;
|
||||||
|
|
||||||
|
if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
|
||||||
|
{
|
||||||
|
puts ("setlocale failed");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regcomp (&r, "[[.ch.]]", REG_NOSUB) != 0)
|
||||||
|
{
|
||||||
|
puts ("regcomp failed");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regexec (&r, "ch", 0, 0, 0) != 0)
|
||||||
|
{
|
||||||
|
puts ("regexec failed");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
regfree (&r);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TEST_FUNCTION do_test ()
|
||||||
|
#include "../test-skeleton.c"
|
@ -2776,40 +2776,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
|
|
||||||
/* Local function for parse_bracket_exp used in _LIBC environement.
|
/* Local function for parse_bracket_exp used in _LIBC environement.
|
||||||
Seek the collating symbol entry correspondings to NAME.
|
Seek the collating symbol entry correspondings to NAME.
|
||||||
Return the index of the symbol in the SYMB_TABLE. */
|
Return the index of the symbol in the SYMB_TABLE,
|
||||||
|
or -1 if not found. */
|
||||||
|
|
||||||
auto inline int32_t
|
auto inline int32_t
|
||||||
__attribute ((always_inline))
|
__attribute ((always_inline))
|
||||||
seek_collating_symbol_entry (name, name_len)
|
seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
|
||||||
const unsigned char *name;
|
|
||||||
size_t name_len;
|
|
||||||
{
|
{
|
||||||
int32_t hash = elem_hash ((const char *) name, name_len);
|
int32_t elem;
|
||||||
int32_t elem = hash % table_size;
|
|
||||||
if (symb_table[2 * elem] != 0)
|
|
||||||
{
|
|
||||||
int32_t second = hash % (table_size - 2) + 1;
|
|
||||||
|
|
||||||
do
|
for (elem = 0; elem < table_size; elem++)
|
||||||
{
|
if (symb_table[2 * elem] != 0)
|
||||||
/* First compare the hashing value. */
|
{
|
||||||
if (symb_table[2 * elem] == hash
|
int32_t idx = symb_table[2 * elem + 1];
|
||||||
/* Compare the length of the name. */
|
/* Skip the name of collating element name. */
|
||||||
&& name_len == extra[symb_table[2 * elem + 1]]
|
idx += 1 + extra[idx];
|
||||||
/* Compare the name. */
|
if (/* Compare the length of the name. */
|
||||||
&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
|
name_len == extra[idx]
|
||||||
name_len) == 0)
|
/* Compare the name. */
|
||||||
{
|
&& memcmp (name, &extra[idx + 1], name_len) == 0)
|
||||||
/* Yep, this is the entry. */
|
/* Yep, this is the entry. */
|
||||||
break;
|
return elem;
|
||||||
}
|
}
|
||||||
|
return -1;
|
||||||
/* Next entry. */
|
|
||||||
elem += second;
|
|
||||||
}
|
|
||||||
while (symb_table[2 * elem] != 0);
|
|
||||||
}
|
|
||||||
return elem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Local function for parse_bracket_exp used in _LIBC environment.
|
/* Local function for parse_bracket_exp used in _LIBC environment.
|
||||||
@ -2818,8 +2807,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
|
|
||||||
auto inline unsigned int
|
auto inline unsigned int
|
||||||
__attribute ((always_inline))
|
__attribute ((always_inline))
|
||||||
lookup_collation_sequence_value (br_elem)
|
lookup_collation_sequence_value (bracket_elem_t *br_elem)
|
||||||
bracket_elem_t *br_elem;
|
|
||||||
{
|
{
|
||||||
if (br_elem->type == SB_CHAR)
|
if (br_elem->type == SB_CHAR)
|
||||||
{
|
{
|
||||||
@ -2847,7 +2835,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
int32_t elem, idx;
|
int32_t elem, idx;
|
||||||
elem = seek_collating_symbol_entry (br_elem->opr.name,
|
elem = seek_collating_symbol_entry (br_elem->opr.name,
|
||||||
sym_name_len);
|
sym_name_len);
|
||||||
if (symb_table[2 * elem] != 0)
|
if (elem != -1)
|
||||||
{
|
{
|
||||||
/* We found the entry. */
|
/* We found the entry. */
|
||||||
idx = symb_table[2 * elem + 1];
|
idx = symb_table[2 * elem + 1];
|
||||||
@ -2865,7 +2853,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
/* Return the collation sequence value. */
|
/* Return the collation sequence value. */
|
||||||
return *(unsigned int *) (extra + idx);
|
return *(unsigned int *) (extra + idx);
|
||||||
}
|
}
|
||||||
else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
|
else if (sym_name_len == 1)
|
||||||
{
|
{
|
||||||
/* No valid character. Match it as a single byte
|
/* No valid character. Match it as a single byte
|
||||||
character. */
|
character. */
|
||||||
@ -2887,11 +2875,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
|
|
||||||
auto inline reg_errcode_t
|
auto inline reg_errcode_t
|
||||||
__attribute ((always_inline))
|
__attribute ((always_inline))
|
||||||
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
|
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
|
||||||
re_charset_t *mbcset;
|
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
|
||||||
int *range_alloc;
|
|
||||||
bitset_t sbcset;
|
|
||||||
bracket_elem_t *start_elem, *end_elem;
|
|
||||||
{
|
{
|
||||||
unsigned int ch;
|
unsigned int ch;
|
||||||
uint32_t start_collseq;
|
uint32_t start_collseq;
|
||||||
@ -2970,25 +2955,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|||||||
|
|
||||||
auto inline reg_errcode_t
|
auto inline reg_errcode_t
|
||||||
__attribute ((always_inline))
|
__attribute ((always_inline))
|
||||||
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
|
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
|
||||||
re_charset_t *mbcset;
|
int *coll_sym_alloc, const unsigned char *name)
|
||||||
int *coll_sym_alloc;
|
|
||||||
bitset_t sbcset;
|
|
||||||
const unsigned char *name;
|
|
||||||
{
|
{
|
||||||
int32_t elem, idx;
|
int32_t elem, idx;
|
||||||
size_t name_len = strlen ((const char *) name);
|
size_t name_len = strlen ((const char *) name);
|
||||||
if (nrules != 0)
|
if (nrules != 0)
|
||||||
{
|
{
|
||||||
elem = seek_collating_symbol_entry (name, name_len);
|
elem = seek_collating_symbol_entry (name, name_len);
|
||||||
if (symb_table[2 * elem] != 0)
|
if (elem != -1)
|
||||||
{
|
{
|
||||||
/* We found the entry. */
|
/* We found the entry. */
|
||||||
idx = symb_table[2 * elem + 1];
|
idx = symb_table[2 * elem + 1];
|
||||||
/* Skip the name of collating element name. */
|
/* Skip the name of collating element name. */
|
||||||
idx += 1 + extra[idx];
|
idx += 1 + extra[idx];
|
||||||
}
|
}
|
||||||
else if (symb_table[2 * elem] == 0 && name_len == 1)
|
else if (name_len == 1)
|
||||||
{
|
{
|
||||||
/* No valid character, treat it as a normal
|
/* No valid character, treat it as a normal
|
||||||
character. */
|
character. */
|
||||||
|
Loading…
Reference in New Issue
Block a user