Makefile.in (LIBCPP_OBJS): Add cppcharset.o.

* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
	(cppcharset.o): New target.
	* c-lex.c (is_extended_char): Move to cppcharset.c.
	(utf8_extend_token): Delete.
	* cppcharset.c: New file.
	* cpphash.h (_cpp_valid_ucn): New.
	* cpplex.c (lex_identifier): Update prototype.
	(continues_identifier_p): Rename forms_identifier_p.  Handle UCN
	escapes.
	(maybe_read_ucs): Rename maybe_read_ucn.  Update to use code
	in cppcharset.c.
	(lex_number, lex_identifier, cpp_parse_escape): Update.
	(_cpp_lex_direct): Update to handle UCNs.
	(cpp_avoid_paste): Don't paste to form a UCN.
testsuite:
	* ucs.c: Update diagnostic messages.

From-SVN: r65845
This commit is contained in:
Neil Booth 2003-04-20 07:29:23 +00:00 committed by Neil Booth
parent 0a45ec5c78
commit 1613e52bdd
8 changed files with 705 additions and 426 deletions

View File

@ -1,3 +1,20 @@
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target.
* c-lex.c (is_extended_char): Move to cppcharset.c.
(utf8_extend_token): Delete.
* cppcharset.c: New file.
* cpphash.h (_cpp_valid_ucn): New.
* cpplex.c (lex_identifier): Update prototype.
(continues_identifier_p): Rename forms_identifier_p. Handle UCN
escapes.
(maybe_read_ucs): Rename maybe_read_ucn. Update to use code
in cppcharset.c.
(lex_number, lex_identifier, cpp_parse_escape): Update.
(_cpp_lex_direct): Update to handle UCNs.
(cpp_avoid_paste): Don't paste to form a UCN.
2003-04-19 Roger Sayle <roger@eyesopen.com>
* builtins.c (expand_builtin): Don't expand a pure or const

View File

@ -2320,7 +2320,7 @@ PREPROCESSOR_DEFINES = \
@TARGET_SYSTEM_ROOT_DEFINE@
LIBCPP_OBJS = cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpptrad.o \
cpphash.o cpperror.o cppinit.o \
cpphash.o cpperror.o cppinit.o cppcharset.o \
hashtable.o line-map.o mkdeps.o mbchar.o cpppch.o
LIBCPP_DEPS = $(CPPLIB_H) cpphash.h line-map.h hashtable.h intl.h \
@ -2333,6 +2333,7 @@ libcpp.a: $(LIBCPP_OBJS)
$(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS)
-$(RANLIB) libcpp.a
cppcharset.o: cppcharset.c $(LIBCPP_DEPS)
cpperror.o: cpperror.c $(LIBCPP_DEPS)
cppexp.o: cppexp.c $(LIBCPP_DEPS)
cpplex.o: cpplex.c $(LIBCPP_DEPS) mbchar.h

View File

@ -322,315 +322,6 @@ cb_undef (pfile, line, node)
(*debug_hooks->undef) (SOURCE_LINE (map, line),
(const char *) NODE_NAME (node));
}
#if 0 /* not yet */
/* Returns nonzero if C is a universal-character-name. Give an error if it
is not one which may appear in an identifier, as per [extendid].
Note that extended character support in identifiers has not yet been
implemented. It is my personal opinion that this is not a desirable
feature. Portable code cannot count on support for more than the basic
identifier character set. */
static inline int
is_extended_char (c)
int c;
{
#ifdef TARGET_EBCDIC
return 0;
#else
/* ASCII. */
if (c < 0x7f)
return 0;
/* None of the valid chars are outside the Basic Multilingual Plane (the
low 16 bits). */
if (c > 0xffff)
{
error ("universal-character-name '\\U%08x' not valid in identifier", c);
return 1;
}
/* Latin */
if ((c >= 0x00c0 && c <= 0x00d6)
|| (c >= 0x00d8 && c <= 0x00f6)
|| (c >= 0x00f8 && c <= 0x01f5)
|| (c >= 0x01fa && c <= 0x0217)
|| (c >= 0x0250 && c <= 0x02a8)
|| (c >= 0x1e00 && c <= 0x1e9a)
|| (c >= 0x1ea0 && c <= 0x1ef9))
return 1;
/* Greek */
if ((c == 0x0384)
|| (c >= 0x0388 && c <= 0x038a)
|| (c == 0x038c)
|| (c >= 0x038e && c <= 0x03a1)
|| (c >= 0x03a3 && c <= 0x03ce)
|| (c >= 0x03d0 && c <= 0x03d6)
|| (c == 0x03da)
|| (c == 0x03dc)
|| (c == 0x03de)
|| (c == 0x03e0)
|| (c >= 0x03e2 && c <= 0x03f3)
|| (c >= 0x1f00 && c <= 0x1f15)
|| (c >= 0x1f18 && c <= 0x1f1d)
|| (c >= 0x1f20 && c <= 0x1f45)
|| (c >= 0x1f48 && c <= 0x1f4d)
|| (c >= 0x1f50 && c <= 0x1f57)
|| (c == 0x1f59)
|| (c == 0x1f5b)
|| (c == 0x1f5d)
|| (c >= 0x1f5f && c <= 0x1f7d)
|| (c >= 0x1f80 && c <= 0x1fb4)
|| (c >= 0x1fb6 && c <= 0x1fbc)
|| (c >= 0x1fc2 && c <= 0x1fc4)
|| (c >= 0x1fc6 && c <= 0x1fcc)
|| (c >= 0x1fd0 && c <= 0x1fd3)
|| (c >= 0x1fd6 && c <= 0x1fdb)
|| (c >= 0x1fe0 && c <= 0x1fec)
|| (c >= 0x1ff2 && c <= 0x1ff4)
|| (c >= 0x1ff6 && c <= 0x1ffc))
return 1;
/* Cyrillic */
if ((c >= 0x0401 && c <= 0x040d)
|| (c >= 0x040f && c <= 0x044f)
|| (c >= 0x0451 && c <= 0x045c)
|| (c >= 0x045e && c <= 0x0481)
|| (c >= 0x0490 && c <= 0x04c4)
|| (c >= 0x04c7 && c <= 0x04c8)
|| (c >= 0x04cb && c <= 0x04cc)
|| (c >= 0x04d0 && c <= 0x04eb)
|| (c >= 0x04ee && c <= 0x04f5)
|| (c >= 0x04f8 && c <= 0x04f9))
return 1;
/* Armenian */
if ((c >= 0x0531 && c <= 0x0556)
|| (c >= 0x0561 && c <= 0x0587))
return 1;
/* Hebrew */
if ((c >= 0x05d0 && c <= 0x05ea)
|| (c >= 0x05f0 && c <= 0x05f4))
return 1;
/* Arabic */
if ((c >= 0x0621 && c <= 0x063a)
|| (c >= 0x0640 && c <= 0x0652)
|| (c >= 0x0670 && c <= 0x06b7)
|| (c >= 0x06ba && c <= 0x06be)
|| (c >= 0x06c0 && c <= 0x06ce)
|| (c >= 0x06e5 && c <= 0x06e7))
return 1;
/* Devanagari */
if ((c >= 0x0905 && c <= 0x0939)
|| (c >= 0x0958 && c <= 0x0962))
return 1;
/* Bengali */
if ((c >= 0x0985 && c <= 0x098c)
|| (c >= 0x098f && c <= 0x0990)
|| (c >= 0x0993 && c <= 0x09a8)
|| (c >= 0x09aa && c <= 0x09b0)
|| (c == 0x09b2)
|| (c >= 0x09b6 && c <= 0x09b9)
|| (c >= 0x09dc && c <= 0x09dd)
|| (c >= 0x09df && c <= 0x09e1)
|| (c >= 0x09f0 && c <= 0x09f1))
return 1;
/* Gurmukhi */
if ((c >= 0x0a05 && c <= 0x0a0a)
|| (c >= 0x0a0f && c <= 0x0a10)
|| (c >= 0x0a13 && c <= 0x0a28)
|| (c >= 0x0a2a && c <= 0x0a30)
|| (c >= 0x0a32 && c <= 0x0a33)
|| (c >= 0x0a35 && c <= 0x0a36)
|| (c >= 0x0a38 && c <= 0x0a39)
|| (c >= 0x0a59 && c <= 0x0a5c)
|| (c == 0x0a5e))
return 1;
/* Gujarati */
if ((c >= 0x0a85 && c <= 0x0a8b)
|| (c == 0x0a8d)
|| (c >= 0x0a8f && c <= 0x0a91)
|| (c >= 0x0a93 && c <= 0x0aa8)
|| (c >= 0x0aaa && c <= 0x0ab0)
|| (c >= 0x0ab2 && c <= 0x0ab3)
|| (c >= 0x0ab5 && c <= 0x0ab9)
|| (c == 0x0ae0))
return 1;
/* Oriya */
if ((c >= 0x0b05 && c <= 0x0b0c)
|| (c >= 0x0b0f && c <= 0x0b10)
|| (c >= 0x0b13 && c <= 0x0b28)
|| (c >= 0x0b2a && c <= 0x0b30)
|| (c >= 0x0b32 && c <= 0x0b33)
|| (c >= 0x0b36 && c <= 0x0b39)
|| (c >= 0x0b5c && c <= 0x0b5d)
|| (c >= 0x0b5f && c <= 0x0b61))
return 1;
/* Tamil */
if ((c >= 0x0b85 && c <= 0x0b8a)
|| (c >= 0x0b8e && c <= 0x0b90)
|| (c >= 0x0b92 && c <= 0x0b95)
|| (c >= 0x0b99 && c <= 0x0b9a)
|| (c == 0x0b9c)
|| (c >= 0x0b9e && c <= 0x0b9f)
|| (c >= 0x0ba3 && c <= 0x0ba4)
|| (c >= 0x0ba8 && c <= 0x0baa)
|| (c >= 0x0bae && c <= 0x0bb5)
|| (c >= 0x0bb7 && c <= 0x0bb9))
return 1;
/* Telugu */
if ((c >= 0x0c05 && c <= 0x0c0c)
|| (c >= 0x0c0e && c <= 0x0c10)
|| (c >= 0x0c12 && c <= 0x0c28)
|| (c >= 0x0c2a && c <= 0x0c33)
|| (c >= 0x0c35 && c <= 0x0c39)
|| (c >= 0x0c60 && c <= 0x0c61))
return 1;
/* Kannada */
if ((c >= 0x0c85 && c <= 0x0c8c)
|| (c >= 0x0c8e && c <= 0x0c90)
|| (c >= 0x0c92 && c <= 0x0ca8)
|| (c >= 0x0caa && c <= 0x0cb3)
|| (c >= 0x0cb5 && c <= 0x0cb9)
|| (c >= 0x0ce0 && c <= 0x0ce1))
return 1;
/* Malayalam */
if ((c >= 0x0d05 && c <= 0x0d0c)
|| (c >= 0x0d0e && c <= 0x0d10)
|| (c >= 0x0d12 && c <= 0x0d28)
|| (c >= 0x0d2a && c <= 0x0d39)
|| (c >= 0x0d60 && c <= 0x0d61))
return 1;
/* Thai */
if ((c >= 0x0e01 && c <= 0x0e30)
|| (c >= 0x0e32 && c <= 0x0e33)
|| (c >= 0x0e40 && c <= 0x0e46)
|| (c >= 0x0e4f && c <= 0x0e5b))
return 1;
/* Lao */
if ((c >= 0x0e81 && c <= 0x0e82)
|| (c == 0x0e84)
|| (c == 0x0e87)
|| (c == 0x0e88)
|| (c == 0x0e8a)
|| (c == 0x0e0d)
|| (c >= 0x0e94 && c <= 0x0e97)
|| (c >= 0x0e99 && c <= 0x0e9f)
|| (c >= 0x0ea1 && c <= 0x0ea3)
|| (c == 0x0ea5)
|| (c == 0x0ea7)
|| (c == 0x0eaa)
|| (c == 0x0eab)
|| (c >= 0x0ead && c <= 0x0eb0)
|| (c == 0x0eb2)
|| (c == 0x0eb3)
|| (c == 0x0ebd)
|| (c >= 0x0ec0 && c <= 0x0ec4)
|| (c == 0x0ec6))
return 1;
/* Georgian */
if ((c >= 0x10a0 && c <= 0x10c5)
|| (c >= 0x10d0 && c <= 0x10f6))
return 1;
/* Hiragana */
if ((c >= 0x3041 && c <= 0x3094)
|| (c >= 0x309b && c <= 0x309e))
return 1;
/* Katakana */
if ((c >= 0x30a1 && c <= 0x30fe))
return 1;
/* Bopmofo */
if ((c >= 0x3105 && c <= 0x312c))
return 1;
/* Hangul */
if ((c >= 0x1100 && c <= 0x1159)
|| (c >= 0x1161 && c <= 0x11a2)
|| (c >= 0x11a8 && c <= 0x11f9))
return 1;
/* CJK Unified Ideographs */
if ((c >= 0xf900 && c <= 0xfa2d)
|| (c >= 0xfb1f && c <= 0xfb36)
|| (c >= 0xfb38 && c <= 0xfb3c)
|| (c == 0xfb3e)
|| (c >= 0xfb40 && c <= 0xfb41)
|| (c >= 0xfb42 && c <= 0xfb44)
|| (c >= 0xfb46 && c <= 0xfbb1)
|| (c >= 0xfbd3 && c <= 0xfd3f)
|| (c >= 0xfd50 && c <= 0xfd8f)
|| (c >= 0xfd92 && c <= 0xfdc7)
|| (c >= 0xfdf0 && c <= 0xfdfb)
|| (c >= 0xfe70 && c <= 0xfe72)
|| (c == 0xfe74)
|| (c >= 0xfe76 && c <= 0xfefc)
|| (c >= 0xff21 && c <= 0xff3a)
|| (c >= 0xff41 && c <= 0xff5a)
|| (c >= 0xff66 && c <= 0xffbe)
|| (c >= 0xffc2 && c <= 0xffc7)
|| (c >= 0xffca && c <= 0xffcf)
|| (c >= 0xffd2 && c <= 0xffd7)
|| (c >= 0xffda && c <= 0xffdc)
|| (c >= 0x4e00 && c <= 0x9fa5))
return 1;
error ("universal-character-name '\\u%04x' not valid in identifier", c);
return 1;
#endif
}
/* Add the UTF-8 representation of C to the token_buffer. */
static void
utf8_extend_token (c)
int c;
{
int shift, mask;
if (c <= 0x0000007f)
{
extend_token (c);
return;
}
else if (c <= 0x000007ff)
shift = 6, mask = 0xc0;
else if (c <= 0x0000ffff)
shift = 12, mask = 0xe0;
else if (c <= 0x001fffff)
shift = 18, mask = 0xf0;
else if (c <= 0x03ffffff)
shift = 24, mask = 0xf8;
else
shift = 30, mask = 0xfc;
extend_token (mask | (c >> shift));
do
{
shift -= 6;
extend_token ((unsigned char) (0x80 | (c >> shift)));
}
while (shift);
}
#endif
int
c_lex (value)

591
gcc/cppcharset.c Normal file
View File

@ -0,0 +1,591 @@
/* CPP Library - charsets
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
Free Software Foundation, Inc.
Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "cpplib.h"
#include "cpphash.h"
static int ucn_valid_in_identifier PARAMS ((cpp_reader *, cppchar_t));
/* [lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
ISO/IEC 10646 is NNNNNNNN; the character designated by the
universal character name \uNNNN is that character whose character
short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
for a universal character name is less than 0x20 or in the range
0x7F-0x9F (inclusive), or if the universal character name
designates a character in the basic source character set, then the
program is ill-formed.
*PSTR must be preceded by "\u" or "\U"; it is assumed that the
buffer end is delimited by a non-hex digit. Returns zero if UCNs
are not part of the relevant standard, or if the string beginning
at *PSTR doesn't syntactically match the form 'NNNN' or 'NNNNNNNN'.
Otherwise the non-zero value of the UCN, whether valid or invalid,
is returned. Diagnostics are emitted for invalid values. PSTR
is updated to point one beyond the UCN, or to the syntactically
invalid character.
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
an identifier, or 2 otherwise.
*/
cppchar_t
_cpp_valid_ucn (pfile, pstr, identifier_pos)
cpp_reader *pfile;
const uchar **pstr;
int identifier_pos;
{
cppchar_t result, c;
unsigned int length;
const uchar *str = *pstr;
const uchar *base = str - 2;
/* Only attempt to interpret a UCS for C++ and C99. */
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
return 0;
/* We don't accept UCNs for an EBCDIC target. */
if (CPP_OPTION (pfile, EBCDIC))
return 0;
if (str[-1] == 'u')
length = 4;
else if (str[-1] == 'U')
length = 8;
else
abort();
result = 0;
do
{
c = *str;
if (!ISXDIGIT (c))
break;
str++;
result = (result << 4) + hex_value (c);
}
while (--length);
*pstr = str;
if (length)
/* We'll error when we try it out as the start of an identifier. */
cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s",
str - base, base);
/* The standard permits $, @ and ` to be specified as UCNs. We use
hex escapes so that this also works with EBCDIC hosts. */
else if ((result < 0xa0
&& (result != 0x24 && result != 0x40 && result != 0x60))
|| (result & 0x80000000)
|| (result >= 0xD800 && result <= 0xDFFF))
{
cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character",
str - base, base);
}
else if (identifier_pos)
{
int validity = ucn_valid_in_identifier (pfile, result);
if (validity == 0)
cpp_error (pfile, DL_ERROR,
"universal character %.*s is not valid in an identifier",
str - base, base);
else if (validity == 2 && identifier_pos == 1)
cpp_error (pfile, DL_ERROR,
"universal character %.*s is not valid at the start of an identifier",
str - base, base);
}
if (result == 0)
result = 1;
return result;
}
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an
identifier. We assume C has already gone through the checks of
_cpp_valid_ucn. */
static int
ucn_valid_in_identifier (pfile, c)
cpp_reader *pfile;
cppchar_t c;
{
/* None of the valid chars are outside the Basic Multilingual Plane (the
low 16 bits). */
if (c > 0xffff)
return 0;
if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile))
{
/* Latin. */
if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b)
return 1;
/* Greek. */
if (c == 0x0386)
return 1;
/* Cyrillic. */
if (c == 0x040c)
return 1;
/* Hebrew. */
if ((c >= 0x05b0 && c <= 0x05b9)
|| (c >= 0x05bb && c <= 0x005bd)
|| c == 0x05bf
|| (c >= 0x05c1 && c <= 0x05c2))
return 1;
/* Arabic. */
if ((c >= 0x06d0 && c <= 0x06dc)
|| c == 0x06e8
|| (c >= 0x06ea && c <= 0x06ed))
return 1;
/* Devanagari */
if ((c >= 0x0901 && c <= 0x0903)
|| (c >= 0x093e && c <= 0x094d)
|| (c >= 0x0950 && c <= 0x0952)
|| c == 0x0963)
return 1;
/* Bengali */
if ((c >= 0x0981 && c <= 0x0983)
|| (c >= 0x09be && c <= 0x09c4)
|| (c >= 0x09c7 && c <= 0x09c8)
|| (c >= 0x09cb && c <= 0x09cd)
|| (c >= 0x09e2 && c <= 0x09e3))
return 1;
/* Gurmukhi */
if (c == 0x0a02
|| (c >= 0x0a3e && c <= 0x0a42)
|| (c >= 0x0a47 && c <= 0x0a48)
|| (c >= 0x0a4b && c <= 0x0a4d)
|| (c == 0x0a74))
return 1;
/* Gujarati */
if ((c >= 0x0a81 && c <= 0x0a83)
|| (c >= 0x0abd && c <= 0x0ac5)
|| (c >= 0x0ac7 && c <= 0x0ac9)
|| (c >= 0x0acb && c <= 0x0acd)
|| (c == 0x0ad0))
return 1;
/* Oriya */
if ((c >= 0x0b01 && c <= 0x0b03)
|| (c >= 0x0b3e && c <= 0x0b43)
|| (c >= 0x0b47 && c <= 0x0b48)
|| (c >= 0x0b4b && c <= 0x0b4d))
return 1;
/* Tamil */
if ((c >= 0x0b82 && c <= 0x0b83)
|| (c >= 0x0bbe && c <= 0x0bc2)
|| (c >= 0x0bc6 && c <= 0x0bc8)
|| (c >= 0x0bc8 && c <= 0x0bcd))
return 1;
/* Telugu */
if ((c >= 0x0c01 && c <= 0x0c03)
|| (c >= 0x0c3e && c <= 0x0c44)
|| (c >= 0x0c46 && c <= 0x0c48)
|| (c >= 0x0c4a && c <= 0x0c4d))
return 1;
/* Kannada */
if ((c >= 0x0c82 && c <= 0x0c83)
|| (c >= 0x0cbe && c <= 0x0cc4)
|| (c >= 0x0cc6 && c <= 0x0cc8)
|| (c >= 0x0cca && c <= 0x0ccd)
|| c == 0x0cde)
return 1;
/* Malayalam */
if ((c >= 0x0d02 && c <= 0x0d03)
|| (c >= 0x0d3e && c <= 0x0d43)
|| (c >= 0x0d46 && c <= 0x0d48)
|| (c >= 0x0d4a && c <= 0x0d4d))
return 1;
/* Thai */
if ((c >= 0x0e01 && c <= 0x0e3a)
|| (c >= 0x0e40 && c <= 0x0e5b))
return 1;
/* Lao */
if ((c >= 0x0ead && c <= 0x0eae)
|| (c >= 0x0eb0 && c <= 0x0eb9)
|| (c >= 0x0ebb && c <= 0x0ebd)
|| (c >= 0x0ec0 && c <= 0x0ec4)
|| c == 0x0ec6
|| (c >= 0x0ec8 && c <= 0x0ecd)
|| (c >= 0x0edc && c <= 0x0ed))
return 1;
/* Tibetan. */
if (c == 0x0f00
|| (c >= 0x0f18 && c <= 0x0f19)
|| c == 0x0f35
|| c == 0x0f37
|| c == 0x0f39
|| (c >= 0x0f3e && c <= 0x0f47)
|| (c >= 0x0f49 && c <= 0x0f69)
|| (c >= 0x0f71 && c <= 0x0f84)
|| (c >= 0x0f86 && c <= 0x0f8b)
|| (c >= 0x0f90 && c <= 0x0f95)
|| c == 0x0f97
|| (c >= 0x0f99 && c <= 0x0fad)
|| (c >= 0x0fb1 && c <= 0x0fb7)
|| c == 0x0fb9)
return 1;
/* Katakana */
if ((c >= 0x30a1 && c <= 0x30f6)
|| (c >= 0x30fb && c <= 0x30fc))
return 1;
/* CJK Unified Ideographs. */
if (c >= 0x4e00 && c <= 0x9fa5)
return 1;
/* Hangul. */
if (c >= 0xac00 && c <= 0xd7a3)
return 1;
/* Digits. */
if ((c >= 0x0660 && c <= 0x0669)
|| (c >= 0x06f0 && c <= 0x06f9)
|| (c >= 0x0966 && c <= 0x096f)
|| (c >= 0x09e6 && c <= 0x09ef)
|| (c >= 0x0a66 && c <= 0x0a6f)
|| (c >= 0x0ae6 && c <= 0x0aef)
|| (c >= 0x0b66 && c <= 0x0b6f)
|| (c >= 0x0be7 && c <= 0x0bef)
|| (c >= 0x0c66 && c <= 0x0c6f)
|| (c >= 0x0ce6 && c <= 0x0cef)
|| (c >= 0x0d66 && c <= 0x0d6f)
|| (c >= 0x0e50 && c <= 0x0e59)
|| (c >= 0x0ed0 && c <= 0x0ed9)
|| (c >= 0x0f20 && c <= 0x0f33))
return 2;
/* Special characters. */
if (c == 0x00b5
|| c == 0x00b7
|| (c >= 0x02b0 && c <= 0x02b8)
|| c == 0x02bb
|| (c >= 0x02bd && c <= 0x02c1)
|| (c >= 0x02d0 && c <= 0x02d1)
|| (c >= 0x02e0 && c <= 0x02e4)
|| c == 0x037a
|| c == 0x0559
|| c == 0x093d
|| c == 0x0b3d
|| c == 0x1fbe
|| (c >= 0x203f && c <= 0x2040)
|| c == 0x2102
|| c == 0x2107
|| (c >= 0x210a && c <= 0x2113)
|| c == 0x2115
|| (c >= 0x2118 && c <= 0x211d)
|| c == 0x2124
|| c == 0x2126
|| c == 0x2128
|| (c >= 0x212a && c <= 0x2131)
|| (c >= 0x2133 && c <= 0x2138)
|| (c >= 0x2160 && c <= 0x2182)
|| (c >= 0x3005 && c <= 0x3007)
|| (c >= 0x3021 && c <= 0x3029))
return 1;
}
if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile))
{
/* Greek. */
if (c == 0x0384)
return 1;
/* Cyrillic. */
if (c == 0x040d)
return 1;
/* Hebrew. */
if (c >= 0x05f3 && c <= 0x05f4)
return 1;
/* Lao. */
if ((c >= 0x0ead && c <= 0x0eb0)
|| (c == 0x0eb2)
|| (c == 0x0eb3)
|| (c == 0x0ebd)
|| (c >= 0x0ec0 && c <= 0x0ec4)
|| (c == 0x0ec6))
return 1;
/* Hiragana */
if (c == 0x3094
|| (c >= 0x309d && c <= 0x309e))
return 1;
/* Katakana */
if ((c >= 0x30a1 && c <= 0x30fe))
return 1;
/* Hangul */
if ((c >= 0x1100 && c <= 0x1159)
|| (c >= 0x1161 && c <= 0x11a2)
|| (c >= 0x11a8 && c <= 0x11f9))
return 1;
/* CJK Unified Ideographs */
if ((c >= 0xf900 && c <= 0xfa2d)
|| (c >= 0xfb1f && c <= 0xfb36)
|| (c >= 0xfb38 && c <= 0xfb3c)
|| (c == 0xfb3e)
|| (c >= 0xfb40 && c <= 0xfb41)
|| (c >= 0xfb42 && c <= 0xfb44)
|| (c >= 0xfb46 && c <= 0xfbb1)
|| (c >= 0xfbd3 && c <= 0xfd3f)
|| (c >= 0xfd50 && c <= 0xfd8f)
|| (c >= 0xfd92 && c <= 0xfdc7)
|| (c >= 0xfdf0 && c <= 0xfdfb)
|| (c >= 0xfe70 && c <= 0xfe72)
|| (c == 0xfe74)
|| (c >= 0xfe76 && c <= 0xfefc)
|| (c >= 0xff21 && c <= 0xff3a)
|| (c >= 0xff41 && c <= 0xff5a)
|| (c >= 0xff66 && c <= 0xffbe)
|| (c >= 0xffc2 && c <= 0xffc7)
|| (c >= 0xffca && c <= 0xffcf)
|| (c >= 0xffd2 && c <= 0xffd7)
|| (c >= 0xffda && c <= 0xffdc)
|| (c >= 0x4e00 && c <= 0x9fa5))
return 1;
}
/* Latin */
if ((c >= 0x00c0 && c <= 0x00d6)
|| (c >= 0x00d8 && c <= 0x00f6)
|| (c >= 0x00f8 && c <= 0x01f5)
|| (c >= 0x01fa && c <= 0x0217)
|| (c >= 0x0250 && c <= 0x02a8)
|| (c >= 0x1e00 && c <= 0x1e9a)
|| (c >= 0x1ea0 && c <= 0x1ef9))
return 1;
/* Greek */
if ((c >= 0x0388 && c <= 0x038a)
|| (c == 0x038c)
|| (c >= 0x038e && c <= 0x03a1)
|| (c >= 0x03a3 && c <= 0x03ce)
|| (c >= 0x03d0 && c <= 0x03d6)
|| (c == 0x03da)
|| (c == 0x03dc)
|| (c == 0x03de)
|| (c == 0x03e0)
|| (c >= 0x03e2 && c <= 0x03f3)
|| (c >= 0x1f00 && c <= 0x1f15)
|| (c >= 0x1f18 && c <= 0x1f1d)
|| (c >= 0x1f20 && c <= 0x1f45)
|| (c >= 0x1f48 && c <= 0x1f4d)
|| (c >= 0x1f50 && c <= 0x1f57)
|| (c == 0x1f59)
|| (c == 0x1f5b)
|| (c == 0x1f5d)
|| (c >= 0x1f5f && c <= 0x1f7d)
|| (c >= 0x1f80 && c <= 0x1fb4)
|| (c >= 0x1fb6 && c <= 0x1fbc)
|| (c >= 0x1fc2 && c <= 0x1fc4)
|| (c >= 0x1fc6 && c <= 0x1fcc)
|| (c >= 0x1fd0 && c <= 0x1fd3)
|| (c >= 0x1fd6 && c <= 0x1fdb)
|| (c >= 0x1fe0 && c <= 0x1fec)
|| (c >= 0x1ff2 && c <= 0x1ff4)
|| (c >= 0x1ff6 && c <= 0x1ffc))
return 1;
/* Cyrillic */
if ((c >= 0x0401 && c <= 0x040c)
|| (c >= 0x040f && c <= 0x044f)
|| (c >= 0x0451 && c <= 0x045c)
|| (c >= 0x045e && c <= 0x0481)
|| (c >= 0x0490 && c <= 0x04c4)
|| (c >= 0x04c7 && c <= 0x04c8)
|| (c >= 0x04cb && c <= 0x04cc)
|| (c >= 0x04d0 && c <= 0x04eb)
|| (c >= 0x04ee && c <= 0x04f5)
|| (c >= 0x04f8 && c <= 0x04f9))
return 1;
/* Armenian */
if ((c >= 0x0531 && c <= 0x0556)
|| (c >= 0x0561 && c <= 0x0587))
return 1;
/* Hebrew */
if ((c >= 0x05d0 && c <= 0x05ea)
|| (c >= 0x05f0 && c <= 0x05f2))
return 1;
/* Arabic */
if ((c >= 0x0621 && c <= 0x063a)
|| (c >= 0x0640 && c <= 0x0652)
|| (c >= 0x0670 && c <= 0x06b7)
|| (c >= 0x06ba && c <= 0x06be)
|| (c >= 0x06c0 && c <= 0x06ce)
|| (c >= 0x06e5 && c <= 0x06e7))
return 1;
/* Devanagari */
if ((c >= 0x0905 && c <= 0x0939)
|| (c >= 0x0958 && c <= 0x0962))
return 1;
/* Bengali */
if ((c >= 0x0985 && c <= 0x098c)
|| (c >= 0x098f && c <= 0x0990)
|| (c >= 0x0993 && c <= 0x09a8)
|| (c >= 0x09aa && c <= 0x09b0)
|| (c == 0x09b2)
|| (c >= 0x09b6 && c <= 0x09b9)
|| (c >= 0x09dc && c <= 0x09dd)
|| (c >= 0x09df && c <= 0x09e1)
|| (c >= 0x09f0 && c <= 0x09f1))
return 1;
/* Gurmukhi */
if ((c >= 0x0a05 && c <= 0x0a0a)
|| (c >= 0x0a0f && c <= 0x0a10)
|| (c >= 0x0a13 && c <= 0x0a28)
|| (c >= 0x0a2a && c <= 0x0a30)
|| (c >= 0x0a32 && c <= 0x0a33)
|| (c >= 0x0a35 && c <= 0x0a36)
|| (c >= 0x0a38 && c <= 0x0a39)
|| (c >= 0x0a59 && c <= 0x0a5c)
|| (c == 0x0a5e))
return 1;
/* Gujarati */
if ((c >= 0x0a85 && c <= 0x0a8b)
|| (c == 0x0a8d)
|| (c >= 0x0a8f && c <= 0x0a91)
|| (c >= 0x0a93 && c <= 0x0aa8)
|| (c >= 0x0aaa && c <= 0x0ab0)
|| (c >= 0x0ab2 && c <= 0x0ab3)
|| (c >= 0x0ab5 && c <= 0x0ab9)
|| (c == 0x0ae0))
return 1;
/* Oriya */
if ((c >= 0x0b05 && c <= 0x0b0c)
|| (c >= 0x0b0f && c <= 0x0b10)
|| (c >= 0x0b13 && c <= 0x0b28)
|| (c >= 0x0b2a && c <= 0x0b30)
|| (c >= 0x0b32 && c <= 0x0b33)
|| (c >= 0x0b36 && c <= 0x0b39)
|| (c >= 0x0b5c && c <= 0x0b5d)
|| (c >= 0x0b5f && c <= 0x0b61))
return 1;
/* Tamil */
if ((c >= 0x0b85 && c <= 0x0b8a)
|| (c >= 0x0b8e && c <= 0x0b90)
|| (c >= 0x0b92 && c <= 0x0b95)
|| (c >= 0x0b99 && c <= 0x0b9a)
|| (c == 0x0b9c)
|| (c >= 0x0b9e && c <= 0x0b9f)
|| (c >= 0x0ba3 && c <= 0x0ba4)
|| (c >= 0x0ba8 && c <= 0x0baa)
|| (c >= 0x0bae && c <= 0x0bb5)
|| (c >= 0x0bb7 && c <= 0x0bb9))
return 1;
/* Telugu */
if ((c >= 0x0c05 && c <= 0x0c0c)
|| (c >= 0x0c0e && c <= 0x0c10)
|| (c >= 0x0c12 && c <= 0x0c28)
|| (c >= 0x0c2a && c <= 0x0c33)
|| (c >= 0x0c35 && c <= 0x0c39)
|| (c >= 0x0c60 && c <= 0x0c61))
return 1;
/* Kannada */
if ((c >= 0x0c85 && c <= 0x0c8c)
|| (c >= 0x0c8e && c <= 0x0c90)
|| (c >= 0x0c92 && c <= 0x0ca8)
|| (c >= 0x0caa && c <= 0x0cb3)
|| (c >= 0x0cb5 && c <= 0x0cb9)
|| (c >= 0x0ce0 && c <= 0x0ce1))
return 1;
/* Malayalam */
if ((c >= 0x0d05 && c <= 0x0d0c)
|| (c >= 0x0d0e && c <= 0x0d10)
|| (c >= 0x0d12 && c <= 0x0d28)
|| (c >= 0x0d2a && c <= 0x0d39)
|| (c >= 0x0d60 && c <= 0x0d61))
return 1;
/* Thai */
if ((c >= 0x0e01 && c <= 0x0e30)
|| (c >= 0x0e32 && c <= 0x0e33)
|| (c >= 0x0e40 && c <= 0x0e46)
|| (c >= 0x0e4f && c <= 0x0e5b))
return 1;
/* Lao */
if ((c >= 0x0e81 && c <= 0x0e82)
|| (c == 0x0e84)
|| (c == 0x0e87)
|| (c == 0x0e88)
|| (c == 0x0e8a)
|| (c == 0x0e8d)
|| (c >= 0x0e94 && c <= 0x0e97)
|| (c >= 0x0e99 && c <= 0x0e9f)
|| (c >= 0x0ea1 && c <= 0x0ea3)
|| (c == 0x0ea5)
|| (c == 0x0ea7)
|| (c == 0x0eaa)
|| (c == 0x0eab))
return 1;
/* Georgian */
if ((c >= 0x10a0 && c <= 0x10c5)
|| (c >= 0x10d0 && c <= 0x10f6))
return 1;
/* Hiragana */
if ((c >= 0x3041 && c <= 0x3093)
|| (c >= 0x309b && c <= 0x309c))
return 1;
/* Bopmofo */
if ((c >= 0x3105 && c <= 0x312c))
return 1;
return 0;
}

View File

@ -555,6 +555,10 @@ extern bool _cpp_expansions_different_trad PARAMS ((const cpp_macro *,
extern uchar *_cpp_copy_replacement_text PARAMS ((const cpp_macro *, uchar *));
extern size_t _cpp_replacement_text_len PARAMS ((const cpp_macro *));
/* In cppcharset.c. */
cppchar_t _cpp_valid_ucn PARAMS ((cpp_reader *, const uchar **,
int identifer_p));
/* Utility routines and macros. */
#define DSC(str) (const uchar *)str, sizeof str - 1
#define xnew(T) (T *) xmalloc (sizeof(T))

View File

@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
static int skip_line_comment PARAMS ((cpp_reader *));
static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
static void lex_number PARAMS ((cpp_reader *, cpp_string *));
static bool continues_identifier_p PARAMS ((cpp_reader *));
static bool forms_identifier_p PARAMS ((cpp_reader *, int));
static void lex_string PARAMS ((cpp_reader *, cpp_token *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
cppchar_t));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, cppchar_t *));
static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
static tokenrun *next_tokenrun PARAMS ((tokenrun *));
static unsigned int hex_digit_value PARAMS ((unsigned int));
@ -361,33 +360,53 @@ name_p (pfile, string)
}
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
an identifier. */
an identifier. FIRST is TRUE if this starts an identifier. */
static bool
continues_identifier_p (pfile)
forms_identifier_p (pfile, first)
cpp_reader *pfile;
int first;
{
if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident))
return false;
cpp_buffer *buffer = pfile->buffer;
if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar)
if (*buffer->cur == '$')
{
pfile->warned_dollar = true;
cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
}
pfile->buffer->cur++;
if (!CPP_OPTION (pfile, dollars_in_ident))
return false;
return true;
buffer->cur++;
if (CPP_PEDANTIC (pfile)
&& !pfile->state.skipping
&& !pfile->warned_dollar)
{
pfile->warned_dollar = true;
cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
}
return true;
}
/* Is this a syntactically valid UCN? */
if (0 && *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
return true;
buffer->cur -= 2;
}
return false;
}
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (pfile)
lex_identifier (pfile, base)
cpp_reader *pfile;
const uchar *base;
{
cpp_hashnode *result;
const uchar *cur, *base;
const uchar *cur;
base = pfile->buffer->cur - 1;
do
{
cur = pfile->buffer->cur;
@ -398,7 +417,7 @@ lex_identifier (pfile)
pfile->buffer->cur = cur;
}
while (continues_identifier_p (pfile));
while (forms_identifier_p (pfile, false));
result = (cpp_hashnode *)
ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
@ -444,7 +463,7 @@ lex_number (pfile, number)
pfile->buffer->cur = cur;
}
while (continues_identifier_p (pfile));
while (forms_identifier_p (pfile, false));
number->len = cur - base;
dest = _cpp_unaligned_alloc (pfile, number->len + 1);
@ -803,7 +822,6 @@ _cpp_lex_direct (pfile)
}
/* Fall through. */
start_ident:
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
@ -816,7 +834,7 @@ _cpp_lex_direct (pfile)
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
result->val.node = lex_identifier (pfile);
result->val.node = lex_identifier (pfile, buffer->cur - 1);
/* Convert named operators to their proper types. */
if (result->val.node->flags & NODE_OPERATOR)
@ -1044,14 +1062,23 @@ _cpp_lex_direct (pfile)
case '@': result->type = CPP_ATSIGN; break;
case '$':
if (CPP_OPTION (pfile, dollars_in_ident))
goto start_ident;
/* Fall through... */
case '\\':
{
const uchar *base = --buffer->cur;
default:
result->type = CPP_OTHER;
result->val.c = c;
break;
if (forms_identifier_p (pfile, true))
{
result->type = CPP_NAME;
result->val.node = lex_identifier (pfile, base);
break;
}
buffer->cur++;
default:
result->type = CPP_OTHER;
result->val.c = c;
break;
}
}
return result;
@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2)
|| b == CPP_CHAR || b == CPP_STRING); /* L */
case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
|| c == '.' || c == '+' || c == '-');
case CPP_OTHER: return (CPP_OPTION (pfile, objc)
&& token1->val.c == '@'
&& (b == CPP_NAME || b == CPP_STRING));
/* UCNs */
case CPP_OTHER: return ((token1->val.c == '\\' && b == CPP_NAME)
|| (CPP_OPTION (pfile, objc)
&& token1->val.c == '@'
&& (b == CPP_NAME || b == CPP_STRING)));
default: break;
}
@ -1363,93 +1392,31 @@ hex_digit_value (c)
abort ();
}
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
failure if cpplib is not parsing C++ or C99. Such failure is
silent, and no variables are updated. Otherwise returns 0, and
warns if -Wtraditional.
[lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
ISO/IEC 10646 is NNNNNNNN; the character designated by the
universal character name \uNNNN is that character whose character
short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
for a universal character name is less than 0x20 or in the range
0x7F-0x9F (inclusive), or if the universal character name
designates a character in the basic source character set, then the
program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
mapping. Is this ever wrong?
PC points to the 'u' or 'U', PSTR is points to the byte after PC,
LIMIT is the end of the string or charconst. PSTR is updated to
point after the UCS on return, and the UCS is written into PC. */
static int
maybe_read_ucs (pfile, pstr, limit, pc)
/* Read a possible universal character name starting at *PSTR. */
static cppchar_t
maybe_read_ucn (pfile, pstr)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
cppchar_t *pc;
const uchar **pstr;
{
const unsigned char *p = *pstr;
unsigned int code = 0;
unsigned int c = *pc, length;
cppchar_t result, c = (*pstr)[-1];
/* Only attempt to interpret a UCS for C++ and C99. */
if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
return 1;
if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, DL_WARNING,
"the meaning of '\\%c' is different in traditional C", c);
length = (c == 'u' ? 4: 8);
if ((size_t) (limit - p) < length)
result = _cpp_valid_ucn (pfile, pstr, false);
if (result)
{
cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
/* Skip to the end to avoid more diagnostics. */
p = limit;
}
else
{
for (; length; length--, p++)
if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, DL_WARNING,
"the meaning of '\\%c' is different in traditional C",
(int) c);
if (CPP_OPTION (pfile, EBCDIC))
{
c = *p;
if (ISXDIGIT (c))
code = (code << 4) + hex_digit_value (c);
else
{
cpp_error (pfile, DL_ERROR,
"non-hex digit '%c' in universal-character-name", c);
/* We shouldn't skip in case there are multibyte chars. */
break;
}
cpp_error (pfile, DL_ERROR,
"universal character with an EBCDIC target");
result = 0x3f; /* EBCDIC invalid character */
}
}
if (CPP_OPTION (pfile, EBCDIC))
{
cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
}
/* True extended characters are OK. */
else if (code >= 0xa0
&& !(code & 0x80000000)
&& !(code >= 0xD800 && code <= 0xDFFF))
;
/* The standard permits $, @ and ` to be specified as UCNs. We use
hex escapes so that this also works with EBCDIC hosts. */
else if (code == 0x24 || code == 0x40 || code == 0x60)
;
/* Don't give another error if one occurred above. */
else if (length == 0)
cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
*pstr = p;
*pc = code;
return 0;
return result;
}
/* Returns the value of an escape sequence, truncated to the correct
@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide)
int unknown = 0;
const unsigned char *str = *pstr, *charconsts;
cppchar_t c, mask;
cppchar_t c, ucn, mask;
unsigned int width;
if (CPP_OPTION (pfile, EBCDIC))
@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide)
break;
case 'u': case 'U':
unknown = maybe_read_ucs (pfile, &str, limit, &c);
ucn = maybe_read_ucn (pfile, &str);
if (ucn)
c = ucn;
else
unknown = true;
break;
case 'x':

View File

@ -1,3 +1,7 @@
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* ucs.c: Update diagnostic messages.
2003-04-19 Neil Booth <neil@daikokuya.co.uk>
* gcc.dg/cpp/truefalse.cpp: New test.

View File

@ -51,7 +51,7 @@ void foo ()
c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */
c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */
/* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character
constant warning even for wide characters. */
/* { dg-warning "too long|multi-character" "" { target *-*-* } 54 } */
@ -61,7 +61,7 @@ void foo ()
c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */
c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */
}