2001-04-12  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/TESTS2: New file.
	* iconvdata/run-iconv-test.sh: Also run tests from TESTS2.
	* iconvdata/testdata/alfabeta..UTF-8: New file.
	* iconvdata/testdata/alfabeta..UTF-16.BE: New file.
	* iconvdata/testdata/alfabeta..UTF-16.LE: New file.
	* iconvdata/testdata/alfabeta..UTF-32.BE: New file.
	* iconvdata/testdata/alfabeta..UTF-32.LE: New file.

2001-04-11  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/utf-32.c: New file.
	* iconvdata/gconv-modules: Add entries for UTF-32, UTF-32LE, UTF-32BE.
	* iconvdata/Makefile (modules): Add UTF-32.
	(distribute): Add utf-32.c.

2001-04-11  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/utf-16.c (PREPARE_LOOP): Initialize 'swap' after possibly
	changing it in the state. After incrementing 'inptr', store it back.
	* iconvdata/unicode.c (PREPARE_LOOP): After incrementing 'inptr',
	store it back.

2001-04-11  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/utf-16.c (gconv_init): Use MAX_NEEDED_FROM, not
	MIN_NEEDED_FROM.
This commit is contained in:
Ulrich Drepper 2001-04-12 20:26:40 +00:00
parent 9dd7309cee
commit b721a2c03c
16 changed files with 412 additions and 20 deletions

View File

@ -1,3 +1,32 @@
2001-04-12 Bruno Haible <haible@clisp.cons.org>
* iconvdata/TESTS2: New file.
* iconvdata/run-iconv-test.sh: Also run tests from TESTS2.
* iconvdata/testdata/alfabeta..UTF-8: New file.
* iconvdata/testdata/alfabeta..UTF-16.BE: New file.
* iconvdata/testdata/alfabeta..UTF-16.LE: New file.
* iconvdata/testdata/alfabeta..UTF-32.BE: New file.
* iconvdata/testdata/alfabeta..UTF-32.LE: New file.
2001-04-11 Bruno Haible <haible@clisp.cons.org>
* iconvdata/utf-32.c: New file.
* iconvdata/gconv-modules: Add entries for UTF-32, UTF-32LE, UTF-32BE.
* iconvdata/Makefile (modules): Add UTF-32.
(distribute): Add utf-32.c.
2001-04-11 Bruno Haible <haible@clisp.cons.org>
* iconvdata/utf-16.c (PREPARE_LOOP): Initialize 'swap' after possibly
changing it in the state. After incrementing 'inptr', store it back.
* iconvdata/unicode.c (PREPARE_LOOP): After incrementing 'inptr',
store it back.
2001-04-11 Bruno Haible <haible@clisp.cons.org>
* iconvdata/utf-16.c (gconv_init): Use MAX_NEEDED_FROM, not
MIN_NEEDED_FROM.
2001-04-11 David Mosberger <davidm@hpl.hp.com>
* sysdeps/ia64/htonl.S: Drop superfluous "alloc".

View File

@ -46,7 +46,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \
INIS-CYRILLIC ISO_6937-2 ISO_2033 ISO_5427 ISO_5427-EXT \
ISO_5428 ISO_10367-BOX MAC-IS MAC-UK NATS-DANO NATS-SEFI \
SAMI-WS2 ISO-IR-197 TIS-620 KOI8-U GBK ISIRI-3342 GBGBK \
ISO-2022-CN libISOIR165 UTF-16 UNICODE UTF-7 BIG5HKSCS \
ISO-2022-CN libISOIR165 UTF-16 UNICODE UTF-32 UTF-7 BIG5HKSCS \
GB18030 ISO-2022-CN-EXT VISCII GBBIG5
modules.so := $(addsuffix .so, $(modules))
@ -134,7 +134,7 @@ distribute := gconv-modules extra-module.mk gap.awk gaptab.awk \
macintosh.c mac-is.c mac-uk.c nats-dano.c nats-sefi.c sjis.c \
t.61.c uhc.c sami-ws2.c iso-ir-197.c tis-620.c koi8-u.c \
isiri-3342.c isiri-3342.h gbgbk.c iso-2022-cn.c cns11643l2.h \
iso8859-16.c utf-16.c unicode.c utf-7.c big5hkscs.c \
iso8859-16.c utf-16.c unicode.c utf-32.c utf-7.c big5hkscs.c \
iso-ir-165.c iso-ir-165.h gb18030.c iso-2022-cn-ext.c \
ibm932.c ibm932.h ibm943.c ibm943.h gbbig5.c

27
iconvdata/TESTS2 Normal file
View File

@ -0,0 +1,27 @@
# Tests for endianness dependent iconv(1) (and therefore iconv(3)) in GNU libc.
# Copyright (C) 2001 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# Contributed by Bruno Haible <haible@clisp.cons.org>, 2001.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with the GNU C Library; see the file COPYING.LIB. If not,
# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
# Each line consists of three fields:
# 1. The endianness independent encoding.
# 2. The endianness dependent encoding.
# 3. The filename stem.
UTF-8 UTF-16 alfabeta
UTF-8 UTF-32 alfabeta

View File

@ -1318,6 +1318,18 @@ alias CSUNICODE// UNICODE//
module UNICODE// INTERNAL UNICODE 1
module INTERNAL UNICODE// UNICODE 1
# from to module cost
module UTF-32// INTERNAL UTF-32 1
module INTERNAL UTF-32// UTF-32 1
# from to module cost
module UTF-32LE// INTERNAL UTF-32 1
module INTERNAL UTF-32LE// UTF-32 1
# from to module cost
module UTF-32BE// INTERNAL UTF-32 1
module INTERNAL UTF-32BE// UTF-32 1
# from to module cost
module UTF-7// INTERNAL UTF-7 1
module INTERNAL UTF-7// UTF-7 1

View File

@ -127,7 +127,7 @@ while read from to subset targets; do
fi
if test "$subset" != Y; then
echo $ac_n " suntzu: ASCII -> $to -> ASCII $ac_c"
echo $ac_n " suntzu: ASCII -> $to -> ASCII $ac_c"
$PROG -f ASCII -t $to testdata/suntzus |
$PROG -f $to -t ASCII > $temp1 ||
{ if test $? -gt 128; then exit 1; fi
@ -139,6 +139,49 @@ while read from to subset targets; do
fi
done < TESTS
# We read the file named TESTS2. All non-empty lines not starting with
# `#' are interpreted as commands.
while read utf8 from filename; do
# Ignore empty and comment lines.
if test -z "$filename" || test "$utf8" = '#'; then continue; fi
# Expand the variables now.
PROG=`eval echo $ICONV`
# Test conversion to the endianness dependent encoding.
echo $ac_n "test encoder: $utf8 -> $from $ac_c"
$PROG -f $utf8 -t $from < testdata/${filename}..${utf8} > $temp1
cmp $temp1 testdata/${filename}..${from}.BE > /dev/null 2>&1 ||
cmp $temp1 testdata/${filename}..${from}.LE > /dev/null 2>&1 ||
{ echo "/FAILED"; failed=1; continue; }
echo "OK"
# Test conversion from the endianness dependent encoding.
echo $ac_n "test decoder: $from -> $utf8 $ac_c"
$PROG -f $from -t $utf8 < testdata/${filename}..${from}.BE > $temp1
cmp $temp1 testdata/${filename}..${utf8} > /dev/null 2>&1 ||
{ echo "/FAILED"; failed=1; continue; }
$PROG -f $from -t $utf8 < testdata/${filename}..${from}.LE > $temp1
cmp $temp1 testdata/${filename}..${utf8} > /dev/null 2>&1 ||
{ echo "/FAILED"; failed=1; continue; }
echo "OK"
# Test byte swapping behaviour.
echo $ac_n "test non-BOM: ${from}BE -> ${from}LE $ac_c"
$PROG -f ${from}BE -t ${from}LE < testdata/${filename}..${from}.BE > $temp1
cmp $temp1 testdata/${filename}..${from}.LE > /dev/null 2>&1 ||
{ echo "/FAILED"; failed=1; continue; }
echo "OK"
# Test byte swapping behaviour.
echo $ac_n "test non-BOM: ${from}LE -> ${from}BE $ac_c"
$PROG -f ${from}LE -t ${from}BE < testdata/${filename}..${from}.LE > $temp1
cmp $temp1 testdata/${filename}..${from}.BE > /dev/null 2>&1 ||
{ echo "/FAILED"; failed=1; continue; }
echo "OK"
done < TESTS2
exit $failed
# Local Variables:
# mode:shell-script

BIN
iconvdata/testdata/alfabeta..UTF-16.BE vendored Normal file

Binary file not shown.

BIN
iconvdata/testdata/alfabeta..UTF-16.LE vendored Normal file

Binary file not shown.

BIN
iconvdata/testdata/alfabeta..UTF-32.BE vendored Normal file

Binary file not shown.

BIN
iconvdata/testdata/alfabeta..UTF-32.LE vendored Normal file

Binary file not shown.

6
iconvdata/testdata/alfabeta..UTF-8 vendored Normal file
View File

@ -0,0 +1,6 @@
ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ <- Greek
𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝 <- Etruscan
ABCDEFGHIJKLMNOPQRSTUVWXYZ <- Latin
АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ <- Cyrillic
𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈 <- Gothic
אבגדהוזחטיךכלםמןנסעףפץצקרש <- Hebrew

View File

@ -53,11 +53,11 @@
\
if (get16u (inptr) == BOM) \
/* Simply ignore the BOM character. */ \
inptr += 2; \
*inptrp = inptr += 2; \
else if (get16u (inptr) == BOM_OE) \
{ \
((struct unicode_data *) step->__data)->swap = 1; \
inptr += 2; \
*inptrp = inptr += 2; \
} \
} \
} \

View File

@ -1,5 +1,5 @@
/* Conversion module for UTF-16.
Copyright (C) 1999, 2000 Free Software Foundation, Inc.
Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
@ -44,7 +44,7 @@
#define PREPARE_LOOP \
enum direction dir = ((struct utf16_data *) step->__data)->dir; \
enum variant var = ((struct utf16_data *) step->__data)->var; \
int swap = ((struct utf16_data *) step->__data)->swap; \
int swap; \
if (FROM_DIRECTION && var == UTF_16) \
{ \
if (data->__invocation_counter == 0) \
@ -55,11 +55,11 @@
\
if (get16u (inptr) == BOM) \
/* Simply ignore the BOM character. */ \
inptr += 2; \
*inptrp = inptr += 2; \
else if (get16u (inptr) == BOM_OE) \
{ \
((struct utf16_data *) step->__data)->swap = 1; \
inptr += 2; \
*inptrp = inptr += 2; \
} \
} \
} \
@ -72,7 +72,8 @@
\
put16u (outbuf, BOM); \
outbuf += 2; \
}
} \
swap = ((struct utf16_data *) step->__data)->swap;
#define EXTRA_LOOP_ARGS , var, swap
@ -159,7 +160,7 @@ gconv_init (struct __gconv_step *step)
if (dir == from_utf16)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MAX_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
@ -168,7 +169,7 @@ gconv_init (struct __gconv_step *step)
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MAX_NEEDED_FROM;
}
step->__stateful = 0;

270
iconvdata/utf-32.c Normal file
View File

@ -0,0 +1,270 @@
/* Conversion module for UTF-32.
Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <byteswap.h>
#include <dlfcn.h>
#include <gconv.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* This is the Byte Order Mark character (BOM). */
#define BOM 0x0000feffu
/* And in the other byte order. */
#define BOM_OE 0xfffe0000u
/* Definitions used in the body of the `gconv' function. */
#define FROM_LOOP from_utf32_loop
#define TO_LOOP to_utf32_loop
#define DEFINE_INIT 0
#define DEFINE_FINI 0
#define MIN_NEEDED_FROM 4
#define MIN_NEEDED_TO 4
#define FROM_DIRECTION (dir == from_utf32)
#define PREPARE_LOOP \
enum direction dir = ((struct utf32_data *) step->__data)->dir; \
enum variant var = ((struct utf32_data *) step->__data)->var; \
int swap; \
if (FROM_DIRECTION && var == UTF_32) \
{ \
if (data->__invocation_counter == 0) \
{ \
/* We have to find out which byte order the file is encoded in. */ \
if (inptr + 4 > inend) \
return __GCONV_EMPTY_INPUT; \
\
if (get32u (inptr) == BOM) \
/* Simply ignore the BOM character. */ \
*inptrp = inptr += 4; \
else if (get32u (inptr) == BOM_OE) \
{ \
((struct utf32_data *) step->__data)->swap = 1; \
*inptrp = inptr += 4; \
} \
} \
} \
else if (!FROM_DIRECTION && var == UTF_32 && !data->__internal_use \
&& data->__invocation_counter == 0) \
{ \
/* Emit the Byte Order Mark. */ \
if (__builtin_expect (outbuf + 4 > outend, 0)) \
return __GCONV_FULL_OUTPUT; \
\
put32u (outbuf, BOM); \
outbuf += 4; \
} \
swap = ((struct utf32_data *) step->__data)->swap;
#define EXTRA_LOOP_ARGS , var, swap
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_utf32,
from_utf32
};
enum variant
{
illegal_var,
UTF_32,
UTF_32LE,
UTF_32BE
};
struct utf32_data
{
enum direction dir;
enum variant var;
int swap;
};
extern int gconv_init (struct __gconv_step *step);
int
gconv_init (struct __gconv_step *step)
{
/* Determine which direction. */
struct utf32_data *new_data;
enum direction dir = illegal_dir;
enum variant var = illegal_var;
int result;
if (__strcasecmp (step->__from_name, "UTF-32//") == 0)
{
dir = from_utf32;
var = UTF_32;
}
else if (__strcasecmp (step->__to_name, "UTF-32//") == 0)
{
dir = to_utf32;
var = UTF_32;
}
else if (__strcasecmp (step->__from_name, "UTF-32BE//") == 0)
{
dir = from_utf32;
var = UTF_32BE;
}
else if (__strcasecmp (step->__to_name, "UTF-32BE//") == 0)
{
dir = to_utf32;
var = UTF_32BE;
}
else if (__strcasecmp (step->__from_name, "UTF-32LE//") == 0)
{
dir = from_utf32;
var = UTF_32LE;
}
else if (__strcasecmp (step->__to_name, "UTF-32LE//") == 0)
{
dir = to_utf32;
var = UTF_32LE;
}
result = __GCONV_NOCONV;
if (__builtin_expect (dir, to_utf32) != illegal_dir)
{
new_data = (struct utf32_data *) malloc (sizeof (struct utf32_data));
result = __GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->var = var;
new_data->swap = ((var == UTF_32LE && BYTE_ORDER == BIG_ENDIAN)
|| (var == UTF_32BE
&& BYTE_ORDER == LITTLE_ENDIAN));
step->__data = new_data;
if (dir == from_utf32)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
else
{
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
}
step->__stateful = 0;
result = __GCONV_OK;
}
}
return result;
}
extern void gconv_end (struct __gconv_step *data);
void
gconv_end (struct __gconv_step *data)
{
free (data->__data);
}
/* Convert from the internal (UCS4-like) format to UTF-32. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define LOOPFCT TO_LOOP
#define BODY \
{ \
uint32_t c = get32 (inptr); \
\
if (__builtin_expect (c >= 0x110000, 0)) \
{ \
STANDARD_ERR_HANDLER (4); \
} \
else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0)) \
{ \
/* Surrogate characters in UCS-4 input are not valid. \
We must catch this. If we let surrogates pass through, \
attackers could make a security hole exploit by \
generating "irregular UTF-32" sequences. */ \
if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
inptr += 4; \
++*irreversible; \
continue; \
} \
\
if (swap) \
put32 (outptr, bswap_32 (c)); \
else \
put32 (outptr, c); \
\
outptr += 4; \
inptr += 4; \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS \
, enum variant var, int swap
#include <iconv/loop.c>
/* Convert from UTF-32 to the internal (UCS4-like) format. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint32_t u1 = get32 (inptr); \
\
if (swap) \
u1 = bswap_32 (u1); \
\
if (__builtin_expect (u1 >= 0x110000, 0)) \
{ \
/* This is illegal. */ \
if (! ignore_errors_p ()) \
{ \
/* This is an illegal character. */ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
inptr += 4; \
++*irreversible; \
continue; \
} \
\
put32 (outptr, u1); \
inptr += 4; \
outptr += 4; \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS \
, enum variant var, int swap
#include <iconv/loop.c>
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>

View File

@ -1,3 +1,9 @@
2001-04-12 Ulrich Drepper <drepper@redhat.com>
* sysdeps/i386/pt-machine.h (CURRENT_STACK_FRAME): Define using
__builtin_frame_address.
* sysdeps/i386/i686/pt-machine.h: Likewise.
2001-04-11 Ulrich Drepper <drepper@redhat.com>
* Makefile (tests): Comment out tst-cancel for now.

View File

@ -1,6 +1,6 @@
/* Machine-dependent pthreads configuration and inline functions.
i686 version.
Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
Copyright (C) 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>.
@ -26,8 +26,7 @@
/* Get some notion of the current stack. Need not be exactly the top
of the stack, just something somewhere in the current frame. */
#define CURRENT_STACK_FRAME stack_pointer
register char * stack_pointer __asm__ ("%esp");
#define CURRENT_STACK_FRAME __builtin_frame_address (0)
/* Spinlock implementation; required. */
@ -38,8 +37,8 @@ testandset (int *spinlock)
__asm__ __volatile__ (
"xchgl %0, %1"
: "=r"(ret), "=m"(*spinlock)
: "0"(1), "m"(*spinlock)
: "=r" (ret), "=m" (*spinlock)
: "0" (1), "m" (*spinlock)
: "memory");
return ret;

View File

@ -1,6 +1,6 @@
/* Machine-dependent pthreads configuration and inline functions.
i386 version.
Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
Copyright (C) 1996,1997,1998,1999,2000,2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>.
@ -25,8 +25,7 @@
/* Get some notion of the current stack. Need not be exactly the top
of the stack, just something somewhere in the current frame. */
#define CURRENT_STACK_FRAME stack_pointer
register char * stack_pointer __asm__ ("%esp");
#define CURRENT_STACK_FRAME __builtin_frame_address (0)
/* Spinlock implementation; required. */