1998-04-26  Ulrich Drepper  <drepper@cygnus.com>

	* iconvdata/gconv-modules: Add entry for ISO-2022-JP and
	ISO-2022-JP-2.
This commit is contained in:
Ulrich Drepper 1998-04-26 10:28:28 +00:00
parent 918b9d72a9
commit bc900b1118
10 changed files with 2079 additions and 1599 deletions

View File

@ -1,3 +1,8 @@
1998-04-26 Ulrich Drepper <drepper@cygnus.com>
* iconvdata/gconv-modules: Add entry for ISO-2022-JP and
ISO-2022-JP-2.
1998-04-25 18:39 Ulrich Drepper <drepper@cygnus.com>
* iconvdata/Makefile: Use gap method for iso8859-5, iso8859-7,

View File

@ -353,7 +353,7 @@ $(objpfx)ibm1047.h: ../localedata/charmaps/IBM1047 Makefile
$(objpfx)iso8859-7jp.h: ../localedata/charmaps/ISO-8859-7 Makefile
$(make-target-directory)
( echo "static const uint32_t iso88597_to_ucs4[96] = {"; \
sed -e '/^[^[:space:]]*[[:space:]]*.x00/d' -e 's/^[^[:space:]]*[[:space:]]*.x\([A-F].\)[[:space:]]*<U\(....\)>.*/ [0x\1-0xA0] = 0x\2,/p' -e d $^ | sort -u; \
sed -e '/^[^[:space:]]*[[:space:]]*.x00/d' -e 's/^[^[:space:]]*[[:space:]]*.x\([A-F].\)[[:space:]]*<U\(....\)>.*/ [0x\1 - 0xA0] = 0x\2,/p' -e d $^ | sort -u; \
echo "};"; \
echo "static struct gap from_idx[] = {"; \
sed -e '/^[^[:space:]]*[[:space:]]*.x00/d' -e 's/^[^[:space:]]*[[:space:]]*.x\([A-F].\)[[:space:]]*<U\(....\)>.*/0x\2 0x\1/p' -e d $^ | sort -u | $(PERL) gap.pl; \

View File

@ -54,7 +54,7 @@
{ \
/* Two or more byte character. First test whether the \
next character is also available. */ \
const char *endp; \
const unsigned char *endp; \
\
if (NEED_LENGTH_TEST && inptr + 1 >= inend) \
{ \

View File

@ -28,13 +28,11 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
{
if (ch > 0x7f)
{
uint16_t idx = 0;
if (ucs4_to_ksc5601 (ch, &idx))
idx |= 0x8080;
cp[0] = (unsigned char) (idx / 256);
cp[1] = (unsigned char) (idx & 0xff);
if (ucs4_to_ksc5601 (ch, cp, 2) != UNKNOWN_10646_CHAR)
{
cp[0] |= 0x80;
cp[1] |= 0x80;
}
}
/* XXX Think about 0x5c ; '\'. */
else
@ -89,14 +87,14 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
/* Two-byte character. First test whether the next character \
is also available. */ \
ch = ksc5601_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inptr - inbufend : 2, x080); \
NEED_LENGTH_TEST ? inptr - inend : 2, 0x80); \
if (NEED_LENGTH_TEST && ch == 0) \
{ \
/* The second character is not available. */ \
result = GCONV_INCOMPLETE_INPUT; \
break; \
} \
if (ch == UNKNOWN_10646_CHAR)) \
if (ch == UNKNOWN_10646_CHAR) \
{ \
/* This is an illegal character. */ \
result = GCONV_ILLEGAL_INPUT; \

View File

@ -23,6 +23,7 @@
#include <gconv.h>
#include <stdint.h>
#include <assert.h>
/* Conversion table. */
extern const uint16_t __gb2312_to_ucs[];
@ -66,7 +67,7 @@ extern const char __gb2312_from_ucs4_tab8[][2];
extern const char __gb2312_from_ucs4_tab9[][2];
static inline size_t
ucs4_to_gb2312 (uint32_t wch, unsigned char **s, size_t avail)
ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail)
{
unsigned int ch = (unsigned int) wch;
char buf[2];
@ -220,8 +221,8 @@ ucs4_to_gb2312 (uint32_t wch, unsigned char **s, size_t avail)
if (avail < 2)
return 0;
*(*s)++ = cp[0];
*(*s)++ = cp[1];
s[0] = cp[0];
s[1] = cp[1];
return 2;
}

View File

@ -750,3 +750,10 @@ module INTERNAL CP737// CP737 1
# from to module cost
module CP775// INTERNAL CP775 1
module INTERNAL CP775// CP775 1
# from to module cost
module ISO-2022-JP// INTERNAL ISO-2022-JP 1
module INTERNAL ISO-2022-JP// ISO-2022-JP 1
module ISO-2022-JP-2// INTERNAL ISO-2022-JP 1
module INTERNAL ISO-2022-JP-2// ISO-2022-JP 1

View File

@ -22,10 +22,18 @@
#include <stdint.h>
#include <string.h>
#include "jis0201.h"
#include "jis0208.h"
#include "jis0212.h"
#include "jis0201.h"
#include "gb2312.h"
#include "ksc5601.h"
struct gap
{
uint16_t start;
uint16_t end;
int32_t idx;
};
#include "iso8859-7jp.h"
/* This makes obvious what everybody knows: 0x1b is the Esc character. */
@ -36,11 +44,12 @@
#define DEFINE_FINI 0
/* Definitions used in the body of the `gconv' function. */
#define FROM_LOOP from_iso2022jp
#define TO_LOOP to_iso2022jp
#define FROM_LOOP from_iso2022jp_loop
#define TO_LOOP to_iso2022jp_loop
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 4
#define MIN_NEEDED_TO 4
#define MAX_NEEDED_TO 4
#define FROM_DIRECTION dir == from_iso2022jp
#define PREPARE_LOOP \
enum direction dir = ((struct iso2022jp_data *) step->data)->dir; \
@ -98,7 +107,7 @@ gconv_init (struct gconv_step *step)
/* Determine which direction. */
struct iso2022jp_data *new_data;
enum direction dir = illegal_dir;
enum variant var;
enum variant var = illegal_var;
int result;
if (__strcasecmp (step->from_name, "ISO-2022-JP//") == 0)
@ -233,9 +242,9 @@ gconv_end (struct gconv_step *data)
then the input buffer ends we terminate with an error since \
we must not risk missing an escape sequence just because it \
is not entirely in the current input buffer. */ \
if (inptr + 2 >= inbufend \
if (inptr + 2 >= inend \
|| (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
&& inptr +3 >= inbufend)) \
&& inptr + 3 >= inend)) \
{ \
/* Not enough input available. */ \
result = GCONV_EMPTY_INPUT; \
@ -244,7 +253,7 @@ gconv_end (struct gconv_step *data)
\
if (inptr[1] == '(') \
{ \
if (inptr[2] = 'B') \
if (inptr[2] == 'B') \
{ \
/* ASCII selected. */ \
set = ASCII_set; \
@ -293,7 +302,7 @@ gconv_end (struct gconv_step *data)
inptr += 4; \
continue; \
} \
else (inptr[3] == 'D') \
else if (inptr[3] == 'D') \
{ \
/* JIS X 0212-1990 selected. */ \
set = JISX0212_set; \
@ -364,23 +373,22 @@ gconv_end (struct gconv_step *data)
0208-1990. If somebody has problems with this please \
provide the appropriate tables. */ \
ch = jisx0208_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inbufend - inptr : 2, 0); \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else if (set == JISX0212_set) \
/* Use the JIS X 0212 table. */ \
ch = jisx0212_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inbufend - inptr : 2, 0); \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else if (set == GB2312_set) \
/* Use the GB 2312 table. */ \
ch = gb2312_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inbufend - inptr : 2, 0); \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else \
{ \
assert (set == KSC5601_set); \
\
/* Use the KSC 5601 table. */ \
ch = ksc5601_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inbufend - inptr : 2, \
0); \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
} \
\
if (NEED_LENGTH_TEST && ch == 0) \
@ -459,21 +467,21 @@ gconv_end (struct gconv_step *data)
if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
written = ucs4_to_jisx0208 (ch, outptr, \
(NEED_LENGTH_TEST \
? outbufend - outptr : 2)); \
? outend - outptr : 2)); \
else if (set == JISX0212_set) \
written = ucs4_to_jisx0212 (ch, outptr, \
(NEED_LENGTH_TEST \
? outbufend - outptr : 2)); \
? outend - outptr : 2)); \
else if (set == GB2312_set) \
written = ucs4_to_gb2312 (ch, outptr, (NEED_LENGTH_TEST \
? outbufend - outptr : 2)); \
? outend - outptr : 2)); \
else \
{ \
assert (set == KSC5601_set); \
\
written = ucs4_to_ksc5601 (ch, outptr, \
(NEED_LENGTH_TEST \
? outbufend - outptr : 2)); \
? outend - outptr : 2)); \
} \
\
if (NEED_LENGTH_TEST && written == 0) \
@ -499,7 +507,7 @@ gconv_end (struct gconv_step *data)
/* First test whether we have at least three more bytes for \
the escape sequence. The two charsets which require four \
bytes will be handled later. */ \
if (NEED_LENGTH_TEST && outptr + 3 > outbufend) \
if (NEED_LENGTH_TEST && outptr + 3 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -514,7 +522,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = 'B'; \
set = ASCII_set; \
\
if (NEED_LENGTH_TEST && outptr == outbufend) \
if (NEED_LENGTH_TEST && outptr == outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -527,7 +535,7 @@ gconv_end (struct gconv_step *data)
/* This character set is not available in ISO-2022-JP. */ \
if (var == iso2022jp) \
{ \
result == GCONV_ILLEGAL_INPUT; \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
\
@ -537,7 +545,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = 'A'; \
set = ISO88591_set; \
\
if (NEED_LENGTH_TEST && outptr == outbufend) \
if (NEED_LENGTH_TEST && outptr == outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -563,7 +571,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = '@'; \
set = JISX0201_set; \
\
if (NEED_LENGTH_TEST && outptr == outbufend) \
if (NEED_LENGTH_TEST && outptr == outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -582,7 +590,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = 'B'; \
set = JISX0208_1983_set; \
\
if (NEED_LENGTH_TEST && outptr + 2 > outbufend) \
if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -603,7 +611,7 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0212. */ \
if (outptr + 4 > outbufend) \
if (outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -614,7 +622,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = 'D'; \
set = JISX0212_set; \
\
if (NEED_LENGTH_TEST && outptr + 2 > outbufend) \
if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -634,7 +642,7 @@ gconv_end (struct gconv_step *data)
*outptr++ = 'A'; \
set = GB2312_set; \
\
if (NEED_LENGTH_TEST && outptr + 2 > outbufend) \
if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -649,7 +657,7 @@ gconv_end (struct gconv_step *data)
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use KSC 5601. */ \
if (outptr + 4 > outbufend) \
if (outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
@ -661,7 +669,7 @@ gconv_end (struct gconv_step *data)
set = KSC5601_set; \
\
if (NEED_LENGTH_TEST \
&& outptr + 2 > outbufend) \
&& outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \

View File

@ -141,84 +141,6 @@ johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
- (c2 > 0x90 ? 0x43 : 0x31)];
}
static uint16_t
johab_hanja_from_ucs (uint32_t ch)
{
uint16_t idx;
if (ucs4_to_ksc5601_hanja (ch, &idx))
{
int idx1, idx2;
/* Hanja begins at the 42th row. 42=0x2a : 0x2a + 0x20 = 0x4a. */
idx1 = idx / 256 - 0x4a;
idx2 = idx % 256 + 0x80;
return ((idx1 / 2) * 256 + 0xe000 + idx2
+ (idx1 % 2 ? 0 : (idx2 > 0xee ? 0x43 : 0x31) - 0xa1));
}
else
return 0;
}
static uint16_t
johab_sym_from_ucs (uint32_t ch)
{
uint16_t idx;
if (ucs4_to_ksc5601_sym (ch, &idx))
{
int idx1, idx2;
idx1 = idx / 256 - 0x21;
idx2 = idx % 256 + 0x80;
return ((idx1 / 2) * 256 + 0xd900 + idx2
+ (idx1 % 2 ? 0 : (idx2 > 0xee ? 0x43 : 0x31) - 0xa1));
}
else
return 0;
}
static inline void
johab_from_ucs4 (uint32_t ch, unsigned char *cp)
{
if (ch >= 0x7f)
{
int idx;
if (ch >= 0xac00 && ch <= 0xd7a3)
{
ch -= 0xac00;
idx = init_to_bit[ch / 588]; /* 21*28 = 588 */
idx += mid_to_bit[(ch / 28) % 21]; /* (ch % (21 * 28)) / 28 */
idx += final_to_bit[ch % 28]; /* (ch % (21 * 28)) % 28 */
}
/* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164)
as symbol */
else if (ch >= 0x3131 && ch <= 0x3163)
idx = jamo_from_ucs_table[ch - 0x3131];
else if (ch >= 0x4e00 && ch <= 0x9fa5
|| ch >= 0xf900 && ch <= 0xfa0b)
idx = johab_hanja_from_ucs (ch);
/* Half-width Korean Currency Won Sign
else if ( ch == 0x20a9 )
idx = 0x5c00;
*/
else
idx = johab_sym_from_ucs (ch);
cp[0] = (unsigned char) (idx / 256);
cp[1] = (unsigned char) (idx & 0xff);
}
else
{
cp[0] = (unsigned char) ch;
cp[1] = 0;
}
}
/* Definitions used in the body of the `gconv' function. */
#define CHARSET_NAME "JOHAB//"
#define FROM_LOOP from_johab
@ -365,7 +287,6 @@ johab_from_ucs4 (uint32_t ch, unsigned char *cp)
#define BODY \
{ \
uint32_t ch = *((uint32_t *) inptr); \
unsigned char cp[2]; \
/* \
if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
{ \
@ -379,27 +300,101 @@ johab_from_ucs4 (uint32_t ch, unsigned char *cp)
else \
cp = from_ucs4_lat1[ch]; \
*/ \
johab_from_ucs4 (ch, cp); \
\
if (cp[0] == '\0' && ch != 0) \
if (ch < 0x7f) \
*outptr++ = ch; \
else \
{ \
/* Illegal character. */ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
\
*outptr++ = cp[0]; \
/* Now test for a possible second byte and write this if possible. */ \
if (cp[1] != '\0') \
{ \
if (NEED_LENGTH_TEST && outptr >= outend) \
if (ch >= 0xac00 && ch <= 0xd7a3) \
{ \
/* The result does not fit into the buffer. */ \
--outptr; \
result = GCONV_FULL_OUTPUT; \
break; \
ch -= 0xac00; \
\
ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
+ mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
+ final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
\
if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ch / 256; \
*outptr++ = ch % 256; \
} \
/* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
as symbol */ \
else if (ch >= 0x3131 && ch <= 0x3163) \
{ \
ch = jamo_from_ucs_table[ch - 0x3131]; \
\
if (NEED_LENGTH_TEST && outptr + 2 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ch / 256; \
*outptr++ = ch % 256; \
} \
if ((ch >= 0x4e00 && ch <= 0x9fa5) || (ch >= 0xf900 && ch <= 0xfa0b)) \
{ \
size_t written; \
\
written = ucs4_to_ksc5601_hanja (ch, outptr, \
(NEED_LENGTH_TEST \
? outend - outptr : 2)); \
if (NEED_LENGTH_TEST && written == 0) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
if (written == UNKNOWN_10646_CHAR) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
\
outptr[0] -= 0x4a; \
outptr[1] += 0x80; \
\
outptr[1] += (outptr[0] % 2 \
? 0 : (outptr[1] > 0xee ? 0x43 : 0x31)); \
outptr[1] -= 0xa1; \
outptr[0] /= 2; \
outptr[0] += 0xe0; \
\
outptr += 2; \
} \
else \
{ \
size_t written; \
\
written = ucs4_to_ksc5601_sym (ch, outptr, \
(NEED_LENGTH_TEST \
? outend - outptr : 2)); \
if (NEED_LENGTH_TEST && written == 0) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
if (written == UNKNOWN_10646_CHAR) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
\
outptr[0] -= 0x4a; \
outptr[1] += 0x80; \
\
outptr[1] += (outptr[0] % 2 \
? 0 : (outptr[1] > 0xee ? 0x43 : 0x31)); \
outptr[1] -= 0xa1; \
outptr[0] /= 2; \
outptr[0] += 0xe0; \
\
outptr += 2; \
} \
*outptr++ = cp[1]; \
} \
\
inptr += 4; \

View File

@ -82,7 +82,7 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
}
static inline size_t
ucs4_to_ksc5601_hangul (uint32_t wch, unsigned char **s, size-t avail)
ucs4_to_ksc5601_hangul (uint32_t wch, unsigned char *s, size_t avail)
{
int l = 0;
int m;
@ -102,8 +102,8 @@ ucs4_to_ksc5601_hangul (uint32_t wch, unsigned char **s, size-t avail)
if (avail < 2)
return 0;
*(*s)++ = (m / 94) + 0x30;
*(*s)++ = (m % 94) + 0x21;
s[0] = (m / 94) + 0x30;
s[1] = (m % 94) + 0x21;
return 2;
}
@ -114,7 +114,7 @@ ucs4_to_ksc5601_hangul (uint32_t wch, unsigned char **s, size-t avail)
static inline size_t
ucs4_to_ksc5601_hanja (uint32_t wch, unsigned char **s, size_t avail)
ucs4_to_ksc5601_hanja (uint32_t wch, unsigned char *s, size_t avail)
{
int l = 0;
int m;
@ -134,8 +134,8 @@ ucs4_to_ksc5601_hanja (uint32_t wch, unsigned char **s, size_t avail)
if (avail < 2)
return 0;
*(*s)++ = __ksc5601_hanja_from_ucs[m].val[0];
*(*s)++ = __ksc5601_hanja_from_ucs[m].val[1];
s[0] = __ksc5601_hanja_from_ucs[m].val[0];
s[1] = __ksc5601_hanja_from_ucs[m].val[1];
return 2;
}
@ -145,7 +145,7 @@ ucs4_to_ksc5601_hanja (uint32_t wch, unsigned char **s, size_t avail)
}
static inline size_t
ucs4_to_ksc5601_sym (uint32_t wch, unsigned char **s, size_t avail)
ucs4_to_ksc5601_sym (uint32_t wch, unsigned char *s, size_t avail)
{
int l = 0;
int m;
@ -165,8 +165,8 @@ ucs4_to_ksc5601_sym (uint32_t wch, unsigned char **s, size_t avail)
if (avail < 2)
return 0;
*(*s)++ = __ksc5601_sym_from_ucs[m].val[0];
*(*s)++ = __ksc5601_sym_from_ucs[m].val[1];
s[0] = __ksc5601_sym_from_ucs[m].val[0];
s[1] = __ksc5601_sym_from_ucs[m].val[1];
return 2;
}
@ -177,10 +177,10 @@ ucs4_to_ksc5601_sym (uint32_t wch, unsigned char **s, size_t avail)
static inline size_t
ucs4_to_ksc5601 (uint32_t wch, unsigned char **s, size_t avail)
ucs4_to_ksc5601 (uint32_t wch, unsigned char *s, size_t avail)
{
if (wch >= 0xac00 && wch <= 0xd7a3)
return ucs4_to_ksc5601_hangul (wch, (uint16_t *) s);
return ucs4_to_ksc5601_hangul (wch, s, avail);
else if ((wch >= 0x4e00 && wch <= 0x9fff)
|| (wch >= 0xf900 && wch <= 0xfa0b))
return ucs4_to_ksc5601_hanja (wch, s, avail);

File diff suppressed because it is too large Load Diff