re PR preprocessor/33415 (Can't compile .cpp file with UTF-8 BOM.)

libcpp
	PR libcpp/33415:
	* charset.c (_cpp_convert_input): Add buffer_start argument.
	Ignore UTF-8 BOM if seen.
	* internal.h (_cpp_convert_input): Add argument.
	* files.c (struct _cpp_file) <buffer_start>: New field.
	(destroy_cpp_file): Free buffer_start, not buffer.
	(_cpp_pop_file_buffer): Likewise.
	(read_file_guts): Update.
gcc/testsuite
	PR libcpp/33415:
	* gcc.dg/cpp/pr33415.c: New file.

From-SVN: r134507
This commit is contained in:
Tom Tromey 2008-04-21 14:02:00 +00:00 committed by Tom Tromey
parent 009890be6c
commit 688e7a5344
6 changed files with 68 additions and 16 deletions

View File

@ -1,3 +1,8 @@
2008-04-21 Tom Tromey <tromey@redhat.com>
PR libcpp/33415:
* gcc.dg/cpp/pr33415.c: New file.
2008-04-21 Olivier Hainque <hainque@adacore.com>
* gnat.dg/bltins.adb: New testcase.

View File

@ -0,0 +1,6 @@
 /* Test case for PR 33415. Note that the first bytes of this file
are a UTF-8 BOM. */
/* { dg-do compile } */
int f(void) { return 5; }

View File

@ -1,3 +1,14 @@
2008-04-21 Tom Tromey <tromey@redhat.com>
PR libcpp/33415:
* charset.c (_cpp_convert_input): Add buffer_start argument.
Ignore UTF-8 BOM if seen.
* internal.h (_cpp_convert_input): Add argument.
* files.c (struct _cpp_file) <buffer_start>: New field.
(destroy_cpp_file): Free buffer_start, not buffer.
(_cpp_pop_file_buffer): Likewise.
(read_file_guts): Update.
2008-04-18 Kris Van Hees <kris.van.hees@oracle.com>
* include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal.

View File

@ -1,5 +1,5 @@
/* CPP Library - charsets
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008
Free Software Foundation, Inc.
Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
@ -1637,18 +1637,24 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
source file) from INPUT_CHARSET to the source character set. INPUT
points to the input buffer, SIZE is its allocated size, and LEN is
the length of the meaningful data within the buffer. The
translated buffer is returned, and *ST_SIZE is set to the length of
the meaningful data within the translated buffer.
translated buffer is returned, *ST_SIZE is set to the length of
the meaningful data within the translated buffer, and *BUFFER_START
is set to the start of the returned buffer. *BUFFER_START may
differ from the return value in the case of a BOM or other ignored
marker information.
INPUT is expected to have been allocated with xmalloc. This function
will either return INPUT, or free it and return a pointer to another
xmalloc-allocated block of memory. */
INPUT is expected to have been allocated with xmalloc. This
function will either set *BUFFER_START to INPUT, or free it and set
*BUFFER_START to a pointer to another xmalloc-allocated block of
memory. */
uchar *
_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
uchar *input, size_t size, size_t len, off_t *st_size)
uchar *input, size_t size, size_t len,
const unsigned char **buffer_start, off_t *st_size)
{
struct cset_converter input_cset;
struct _cpp_strbuf to;
unsigned char *buffer;
input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
if (input_cset.func == convert_no_conversion)
@ -1689,8 +1695,24 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
else
to.text[to.len] = '\n';
buffer = to.text;
*st_size = to.len;
return to.text;
#if HOST_CHARSET == HOST_CHARSET_ASCII
/* The HOST_CHARSET test just above ensures that the source charset
is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
BOM -- however, even if it did, we would still need this code due
to the 'convert_no_conversion' case. */
if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
&& to.text[2] == 0xbf)
{
*st_size -= 3;
buffer += 3;
}
#endif
*buffer_start = to.text;
return buffer;
}
/* Decide on the default encoding to assume for input files. */

View File

@ -74,6 +74,10 @@ struct _cpp_file
/* The contents of NAME after calling read_file(). */
const uchar *buffer;
/* Pointer to the real start of BUFFER. read_file() might increment
BUFFER; when freeing, this this pointer must be used instead. */
const uchar *buffer_start;
/* The macro, if any, preventing re-inclusion. */
const cpp_hashnode *cmacro;
@ -635,8 +639,11 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
cpp_error (pfile, CPP_DL_WARNING,
"%s is shorter than expected", file->path);
file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
buf, size, total, &file->st.st_size);
file->buffer = _cpp_convert_input (pfile,
CPP_OPTION (pfile, input_charset),
buf, size, total,
&file->buffer_start,
&file->st.st_size);
file->buffer_valid = true;
return true;
@ -969,8 +976,8 @@ make_cpp_file (cpp_reader *pfile, cpp_dir *dir, const char *fname)
static void
destroy_cpp_file (_cpp_file *file)
{
if (file->buffer)
free ((void *) file->buffer);
if (file->buffer_start)
free ((void *) file->buffer_start);
free ((void *) file->name);
free (file);
}
@ -1302,9 +1309,10 @@ _cpp_pop_file_buffer (cpp_reader *pfile, _cpp_file *file)
/* Invalidate control macros in the #including file. */
pfile->mi_valid = false;
if (file->buffer)
if (file->buffer_start)
{
free ((void *) file->buffer);
free ((void *) file->buffer_start);
file->buffer_start = NULL;
file->buffer = NULL;
file->buffer_valid = false;
}

View File

@ -1,5 +1,5 @@
/* Part of CPP library.
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
@ -644,7 +644,7 @@ extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,
off_t *);
const unsigned char **, off_t *);
extern const char *_cpp_default_encoding (void);
extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile,
const unsigned char *id,