cppcharset.c (one_iso88591_to_utf8): New function.
2004-01-16 Eric Christopher <echristo@redhat.com> Chandrakala Chavva <cchavva@redhat.com> * cppcharset.c (one_iso88591_to_utf8): New function. (convert_iso88591_utf8): Ditto. Use. (conversion_tab): Use. (_cpp_input_to_utf8): New function. (_cpp_init_iconv_buffer): Ditto. (_cpp_close_iconv_buffer): Ditto. * cpphash.h: Prototype new functions. (cpp_buffer): Add input_cset_desc. * cppinit.c: Add input_charset default. * cpplib.c (cpp_push_buffer): Support init and close of iconv. * cpplib.h (cpp_options): Add input_charset. From-SVN: r76000
This commit is contained in:
parent
2f9c39f8fc
commit
cf551fbaca
@ -1,3 +1,19 @@
|
||||
2004-01-16 Eric Christopher <echristo@redhat.com>
|
||||
Chandrakala Chavva <cchavva@redhat.com>
|
||||
|
||||
* cppcharset.c (one_iso88591_to_utf8): New function.
|
||||
(convert_iso88591_utf8): Ditto. Use.
|
||||
(conversion_tab): Use.
|
||||
(_cpp_input_to_utf8): New function.
|
||||
(_cpp_init_iconv_buffer): Ditto.
|
||||
(_cpp_close_iconv_buffer): Ditto.
|
||||
* cpphash.h: Prototype new functions.
|
||||
(cpp_buffer): Add input_cset_desc.
|
||||
* cppinit.c: Add input_charset default.
|
||||
* cpplib.c (cpp_push_buffer): Support init and
|
||||
close of iconv.
|
||||
* cpplib.h (cpp_options): Add input_charset.
|
||||
|
||||
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
|
||||
|
||||
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
|
||||
|
@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The first 256 code points of ISO 8859.1 have the same numeric
|
||||
values as the first 256 code points of Unicode, therefore the
|
||||
incoming ISO 8859.1 character can be passed directly to
|
||||
one_cppchar_to_utf8 (which expects a Unicode value). */
|
||||
|
||||
static int
|
||||
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
|
||||
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
|
||||
{
|
||||
const uchar *inbuf = *inbufp;
|
||||
int rval;
|
||||
|
||||
if (*inbytesleftp > 1)
|
||||
return EINVAL;
|
||||
|
||||
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
|
||||
if (rval)
|
||||
return rval;
|
||||
|
||||
*inbufp += 1;
|
||||
*inbytesleftp -= 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper routine for the next few functions. The 'const' on
|
||||
one_conversion means that we promise not to modify what function is
|
||||
pointed to, which lets the inliner see through it. */
|
||||
@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
|
||||
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
|
||||
}
|
||||
|
||||
static bool
|
||||
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
|
||||
struct _cpp_strbuf *to)
|
||||
{
|
||||
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
|
||||
}
|
||||
|
||||
|
||||
/* Identity conversion, used when we have no alternative. */
|
||||
static bool
|
||||
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
|
||||
@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
|
||||
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
|
||||
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
|
||||
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
|
||||
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
|
||||
};
|
||||
|
||||
/* Subroutine of cpp_init_iconv: initialize and return a
|
||||
@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
|
||||
if (ret.cd == (iconv_t) -1)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
|
||||
"conversion from %s to %s not supported by iconv",
|
||||
from, to);
|
||||
else
|
||||
@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
|
||||
}
|
||||
else
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
|
||||
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
|
||||
"no iconv implementation, cannot convert from %s to %s",
|
||||
from, to);
|
||||
ret.func = convert_no_conversion;
|
||||
@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uchar *
|
||||
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
|
||||
{
|
||||
struct _cpp_strbuf tbuf;
|
||||
struct cset_converter cvt = pfile->buffer->input_cset_desc;
|
||||
|
||||
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
|
||||
tbuf.text = xmalloc (tbuf.asize);
|
||||
tbuf.len = 0;
|
||||
|
||||
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (length)
|
||||
tbuf.text[tbuf.len] = '\n';
|
||||
else
|
||||
tbuf.text[0] = '\n';
|
||||
|
||||
return tbuf.text;
|
||||
}
|
||||
|
||||
/* Check the input file format. At present assuming the input file
|
||||
is in iso-8859-1 format. Convert this input character set to
|
||||
source character set format (UTF-8). */
|
||||
|
||||
void
|
||||
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
|
||||
{
|
||||
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
|
||||
from);
|
||||
}
|
||||
|
||||
void
|
||||
_cpp_close_iconv_buffer (cpp_reader *pfile)
|
||||
{
|
||||
if (HAVE_ICONV
|
||||
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
|
||||
iconv_close (pfile->buffer->input_cset_desc.cd);
|
||||
}
|
||||
|
@ -313,6 +313,10 @@ struct cpp_buffer
|
||||
|
||||
/* Used for buffer overlays by cpptrad.c. */
|
||||
const uchar *saved_cur, *saved_rlimit;
|
||||
|
||||
/* Descriptor for converting from the input character set to the
|
||||
source character set. */
|
||||
struct cset_converter input_cset_desc;
|
||||
};
|
||||
|
||||
/* A cpp_reader encapsulates the "state" of a pre-processor run.
|
||||
@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
|
||||
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
|
||||
unsigned int, unsigned int);
|
||||
extern void _cpp_pop_buffer (cpp_reader *);
|
||||
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
|
||||
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
|
||||
extern void _cpp_close_iconv_buffer (cpp_reader *);
|
||||
|
||||
/* In cpptrad.c. */
|
||||
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
|
||||
|
@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
|
||||
CPP_OPTION (pfile, narrow_charset) = 0;
|
||||
CPP_OPTION (pfile, wide_charset) = 0;
|
||||
|
||||
/* Default the input character set to iso-8859-1 for now. */
|
||||
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
|
||||
|
||||
/* A fake empty "directory" used as the starting point for files
|
||||
looked up without a search path. Name cannot be '/' because we
|
||||
don't want to prepend anything at all to filenames using it. All
|
||||
|
@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
|
||||
int from_stage3)
|
||||
{
|
||||
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
|
||||
const char *input = CPP_OPTION (pfile, input_charset);
|
||||
|
||||
/* Clears, amongst other things, if_stack and mi_cmacro. */
|
||||
memset (new, 0, sizeof (cpp_buffer));
|
||||
@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
|
||||
new->need_line = true;
|
||||
|
||||
pfile->buffer = new;
|
||||
_cpp_init_iconv_buffer (pfile, input);
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
|
||||
/* In case of a missing #endif. */
|
||||
pfile->state.skipping = 0;
|
||||
|
||||
_cpp_close_iconv_buffer (pfile);
|
||||
|
||||
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
|
||||
pfile->buffer = buffer->prev;
|
||||
|
||||
|
@ -332,6 +332,9 @@ struct cpp_options
|
||||
/* Holds the name of the target wide character set. */
|
||||
const char *wide_charset;
|
||||
|
||||
/* Holds the name of the input character set. */
|
||||
const char *input_charset;
|
||||
|
||||
/* True to warn about precompiled header files we couldn't use. */
|
||||
bool warn_invalid_pch;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user