cppcharset.c (one_iso88591_to_utf8): New function.

2004-01-16  Eric Christopher  <echristo@redhat.com>
	    Chandrakala Chavva <cchavva@redhat.com>

	* cppcharset.c (one_iso88591_to_utf8): New function.
	(convert_iso88591_utf8): Ditto. Use.
	(conversion_tab): Use.
	(_cpp_input_to_utf8): New function.
	(_cpp_init_iconv_buffer): Ditto.
	(_cpp_close_iconv_buffer): Ditto.
	* cpphash.h: Prototype new functions.
	(cpp_buffer): Add input_cset_desc.
	* cppinit.c: Add input_charset default.
	* cpplib.c (cpp_push_buffer): Support init and
	close of iconv.
	* cpplib.h (cpp_options): Add input_charset.

From-SVN: r76000
This commit is contained in:
Eric Christopher 2004-01-16 22:37:49 +00:00
parent 2f9c39f8fc
commit cf551fbaca
6 changed files with 129 additions and 18 deletions

View File

@ -1,3 +1,19 @@
2004-01-16 Eric Christopher <echristo@redhat.com>
Chandrakala Chavva <cchavva@redhat.com>
* cppcharset.c (one_iso88591_to_utf8): New function.
(convert_iso88591_utf8): Ditto. Use.
(conversion_tab): Use.
(_cpp_input_to_utf8): New function.
(_cpp_init_iconv_buffer): Ditto.
(_cpp_close_iconv_buffer): Ditto.
* cpphash.h: Prototype new functions.
(cpp_buffer): Add input_cset_desc.
* cppinit.c: Add input_charset default.
* cpplib.c (cpp_push_buffer): Support init and
close of iconv.
* cpplib.h (cpp_options): Add input_charset.
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.

View File

@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
/* The first 256 code points of ISO 8859.1 have the same numeric
values as the first 256 code points of Unicode, therefore the
incoming ISO 8859.1 character can be passed directly to
one_cppchar_to_utf8 (which expects a Unicode value). */
static int
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
{
const uchar *inbuf = *inbufp;
int rval;
if (*inbytesleftp > 1)
return EINVAL;
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
if (rval)
return rval;
*inbufp += 1;
*inbytesleftp -= 1;
return 0;
}
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
static bool
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
}
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
}
else
{
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
return result;
}
uchar *
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
{
struct _cpp_strbuf tbuf;
struct cset_converter cvt = pfile->buffer->input_cset_desc;
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
{
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
return NULL;
}
if (length)
tbuf.text[tbuf.len] = '\n';
else
tbuf.text[0] = '\n';
return tbuf.text;
}
/* Check the input file format. At present assuming the input file
is in iso-8859-1 format. Convert this input character set to
source character set format (UTF-8). */
void
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
{
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
from);
}
void
_cpp_close_iconv_buffer (cpp_reader *pfile)
{
if (HAVE_ICONV
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
iconv_close (pfile->buffer->input_cset_desc.cd);
}

View File

@ -313,6 +313,10 @@ struct cpp_buffer
/* Used for buffer overlays by cpptrad.c. */
const uchar *saved_cur, *saved_rlimit;
/* Descriptor for converting from the input character set to the
source character set. */
struct cset_converter input_cset_desc;
};
/* A cpp_reader encapsulates the "state" of a pre-processor run.
@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);

View File

@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, wide_charset) = 0;
/* Default the input character set to iso-8859-1 for now. */
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
don't want to prepend anything at all to filenames using it. All

View File

@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true;
pfile->buffer = new;
_cpp_init_iconv_buffer (pfile, input);
return new;
}
@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */
pfile->state.skipping = 0;
_cpp_close_iconv_buffer (pfile);
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;

View File

@ -332,6 +332,9 @@ struct cpp_options
/* Holds the name of the target wide character set. */
const char *wide_charset;
/* Holds the name of the input character set. */
const char *input_charset;
/* True to warn about precompiled header files we couldn't use. */
bool warn_invalid_pch;