charset.c (cpp_init_iconv): Initialize utf8_cset_desc.

* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
	(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
	and char32_cset_desc.
	(converter_for_type): Handle CPP_UTF8STRING.
	(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
	* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
	(parse_include): Reject raw strings.
	* include/cpplib.h (CPP_UTF8STRING): New token type.
	* internal.h (struct cpp_reader): Add utf8_cset_desc field.
	* lex.c (lex_raw_string): New function.
	(lex_string): Handle u8 string literals, call lex_raw_string
	for raw string literals.
	(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
	sequences.
	* macro.c (stringify_arg): Handle CPP_UTF8STRING.

	* c-common.c (c_parse_error): Handle CPP_UTF8STRING.
	* c-lex.c (c_lex_with_flags): Likewise.  Test C_LEX_STRING_NO_JOIN
	instead of C_LEX_RAW_STRINGS.
	(lex_string): Handle CPP_UTF8STRING.
	* c-parser.c (c_parser_postfix_expression): Likewise.
	* c-pragma.h (C_LEX_RAW_STRINGS): Rename to ...
	(C_LEX_STRING_NO_JOIN): ... this.

	* parser.c (cp_lexer_print_token, cp_parser_is_string_literal,
	cp_parser_string_literal, cp_parser_primary_expression): Likewise.
	(cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead
	of C_LEX_RAW_STRINGS.

	* gcc.dg/raw-string-1.c: New test.
	* gcc.dg/raw-string-2.c: New test.
	* gcc.dg/raw-string-3.c: New test.
	* gcc.dg/raw-string-4.c: New test.
	* gcc.dg/raw-string-5.c: New test.
	* gcc.dg/raw-string-6.c: New test.
	* gcc.dg/raw-string-7.c: New test.
	* gcc.dg/utf8-1.c: New test.
	* gcc.dg/utf8-2.c: New test.
	* gcc.dg/utf-badconcat2.c: New test.
	* gcc.dg/utf-dflt2.c: New test.
	* gcc.dg/cpp/include6.c: New test.
	* g++.dg/ext/raw-string-1.C: New test.
	* g++.dg/ext/raw-string-2.C: New test.
	* g++.dg/ext/raw-string-3.C: New test.
	* g++.dg/ext/raw-string-4.C: New test.
	* g++.dg/ext/raw-string-5.C: New test.
	* g++.dg/ext/raw-string-6.C: New test.
	* g++.dg/ext/raw-string-7.C: New test.
	* g++.dg/ext/utf8-1.C: New test.
	* g++.dg/ext/utf8-2.C: New test.
	* g++.dg/ext/utf-badconcat2.C: New test.
	* g++.dg/ext/utf-dflt2.C: New test.

From-SVN: r152995
This commit is contained in:
Jakub Jelinek 2009-10-19 23:41:15 +02:00 committed by Jakub Jelinek
parent 4d696ad011
commit 2c6e3f5540
38 changed files with 1244 additions and 28 deletions

View File

@ -1,3 +1,13 @@
2009-10-19 Jakub Jelinek <jakub@redhat.com>
* c-common.c (c_parse_error): Handle CPP_UTF8STRING.
* c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN
instead of C_LEX_RAW_STRINGS.
(lex_string): Handle CPP_UTF8STRING.
* c-parser.c (c_parser_postfix_expression): Likewise.
* c-pragma.h (C_LEX_RAW_STRINGS): Rename to ...
(C_LEX_STRING_NO_JOIN): ... this.
2009-10-19 Anatoly Sokolov <aesok@post.ru>
* config/cris/cris.c (cris_function_value, cris_libcall_value,

View File

@ -8181,7 +8181,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
else if (token_type == CPP_STRING
|| token_type == CPP_WSTRING
|| token_type == CPP_STRING16
|| token_type == CPP_STRING32)
|| token_type == CPP_STRING32
|| token_type == CPP_UTF8STRING)
message = catenate_messages (gmsgid, " before string constant");
else if (token_type == CPP_NUMBER)
message = catenate_messages (gmsgid, " before numeric constant");

View File

@ -365,6 +365,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
case CPP_UTF8STRING:
type = lex_string (tok, value, true, true);
break;
@ -423,7 +424,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
if ((lex_flags & C_LEX_RAW_STRINGS) == 0)
case CPP_UTF8STRING:
if ((lex_flags & C_LEX_STRING_NO_JOIN) == 0)
{
type = lex_string (tok, value, false,
(lex_flags & C_LEX_STRING_NO_TRANSLATE) == 0);
@ -871,12 +873,13 @@ interpret_fixed (const cpp_token *token, unsigned int flags)
return value;
}
/* Convert a series of STRING, WSTRING, STRING16 and/or STRING32 tokens
into a tree, performing string constant concatenation. TOK is the
first of these. VALP is the location to write the string into.
OBJC_STRING indicates whether an '@' token preceded the incoming token.
/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
UTF8STRING tokens into a tree, performing string constant
concatenation. TOK is the first of these. VALP is the location
to write the string into. OBJC_STRING indicates whether an '@' token
preceded the incoming token.
Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
CPP_STRING32, CPP_STRING16, or CPP_OBJC_STRING).
CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING).
This is unfortunately more work than it should be. If any of the
strings in the series has an L prefix, the result is a wide string
@ -921,6 +924,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
case CPP_UTF8STRING:
if (type != tok->type)
{
if (type == CPP_STRING)
@ -966,6 +970,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
{
default:
case CPP_STRING:
case CPP_UTF8STRING:
value = build_string (1, "");
break;
case CPP_STRING16:
@ -991,6 +996,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
{
default:
case CPP_STRING:
case CPP_UTF8STRING:
TREE_TYPE (value) = char_array_type_node;
break;
case CPP_STRING16:

View File

@ -5349,6 +5349,7 @@ c_parser_postfix_expression (c_parser *parser)
case CPP_STRING16:
case CPP_STRING32:
case CPP_WSTRING:
case CPP_UTF8STRING:
expr.value = c_parser_peek_token (parser)->value;
expr.original_code = STRING_CST;
c_parser_consume_token (parser);

View File

@ -118,9 +118,9 @@ extern enum cpp_ttype pragma_lex (tree *);
so that 0 means to translate and join strings. */
#define C_LEX_STRING_NO_TRANSLATE 1 /* Do not lex strings into
execution character set. */
#define C_LEX_RAW_STRINGS 2 /* Return raw strings -- no
concatenation, no
translation. */
#define C_LEX_STRING_NO_JOIN 2 /* Do not concatenate strings
nor translate them into execution
character set. */
/* This is not actually available to pragma parsers. It's merely a
convenient location to declare this function for c-lex, after

View File

@ -1,3 +1,10 @@
2009-10-19 Jakub Jelinek <jakub@redhat.com>
* parser.c (cp_lexer_print_token, cp_parser_is_string_literal,
cp_parser_string_literal, cp_parser_primary_expression): Likewise.
(cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead
of C_LEX_RAW_STRINGS.
2009-10-15 Jason Merrill <jason@redhat.com>
PR c++/38888

View File

@ -402,7 +402,7 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token *token)
/* Get a new token from the preprocessor. */
token->type
= c_lex_with_flags (&token->u.value, &token->location, &token->flags,
lexer == NULL ? 0 : C_LEX_RAW_STRINGS);
lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
token->keyword = RID_MAX;
token->pragma_kind = PRAGMA_NONE;
@ -792,6 +792,7 @@ cp_lexer_print_token (FILE * stream, cp_token *token)
case CPP_STRING16:
case CPP_STRING32:
case CPP_WSTRING:
case CPP_UTF8STRING:
fprintf (stream, " \"%s\"", TREE_STRING_POINTER (token->u.value));
break;
@ -2065,7 +2066,8 @@ cp_parser_is_string_literal (cp_token* token)
return (token->type == CPP_STRING ||
token->type == CPP_STRING16 ||
token->type == CPP_STRING32 ||
token->type == CPP_WSTRING);
token->type == CPP_WSTRING ||
token->type == CPP_UTF8STRING);
}
/* Returns nonzero if TOKEN is the indicated KEYWORD. */
@ -3004,6 +3006,7 @@ cp_parser_string_literal (cp_parser *parser, bool translate, bool wide_ok)
{
default:
case CPP_STRING:
case CPP_UTF8STRING:
TREE_TYPE (value) = char_array_type_node;
break;
case CPP_STRING16:
@ -3233,6 +3236,7 @@ cp_parser_primary_expression (cp_parser *parser,
case CPP_STRING16:
case CPP_STRING32:
case CPP_WSTRING:
case CPP_UTF8STRING:
/* ??? Should wide strings be allowed when parser->translate_strings_p
is false (i.e. in attributes)? If not, we can kill the third
argument to cp_parser_string_literal. */

View File

@ -1,5 +1,29 @@
2009-10-19 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/raw-string-1.c: New test.
* gcc.dg/raw-string-2.c: New test.
* gcc.dg/raw-string-3.c: New test.
* gcc.dg/raw-string-4.c: New test.
* gcc.dg/raw-string-5.c: New test.
* gcc.dg/raw-string-6.c: New test.
* gcc.dg/raw-string-7.c: New test.
* gcc.dg/utf8-1.c: New test.
* gcc.dg/utf8-2.c: New test.
* gcc.dg/utf-badconcat2.c: New test.
* gcc.dg/utf-dflt2.c: New test.
* gcc.dg/cpp/include6.c: New test.
* g++.dg/ext/raw-string-1.C: New test.
* g++.dg/ext/raw-string-2.C: New test.
* g++.dg/ext/raw-string-3.C: New test.
* g++.dg/ext/raw-string-4.C: New test.
* g++.dg/ext/raw-string-5.C: New test.
* g++.dg/ext/raw-string-6.C: New test.
* g++.dg/ext/raw-string-7.C: New test.
* g++.dg/ext/utf8-1.C: New test.
* g++.dg/ext/utf8-2.C: New test.
* g++.dg/ext/utf-badconcat2.C: New test.
* g++.dg/ext/utf-dflt2.C: New test.
* gcc.dg/cleanup-13.c: New test.
2009-10-19 Janus Weil <janus@gcc.gnu.org>

View File

@ -0,0 +1,96 @@
// { dg-do run }
// { dg-options "-std=c++0x" }
const char s0[] = R"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char s2[] = R"*|*[a\
b
c]"
c]*|"
c]*|*";
const char s3[] = "ab\nc]\"\nc]*|\"\nc";
const char t0[] = u8R"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char t2[] = u8R"*|*[a\
b
c]"
c]*|"
c]*|*";
const char t3[] = u8"ab\nc]\"\nc]*|\"\nc";
const char16_t u0[] = uR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char16_t u2[] = uR"*|*[a\
b
c]"
c]*|"
c]*|*";
const char16_t u3[] = u"ab\nc]\"\nc]*|\"\nc";
const char32_t U0[] = UR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char32_t U2[] = UR"*|*[a\
b
c]"
c]*|"
c]*|*";
const char32_t U3[] = U"ab\nc]\"\nc]*|\"\nc";
const wchar_t L0[] = LR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const wchar_t L2[] = LR"*|*[a\
b
c]"
c]*|"
c]*|*";
const wchar_t L3[] = L"ab\nc]\"\nc]*|\"\nc";
int
main (void)
{
if (sizeof (s0) != sizeof (s1)
|| __builtin_memcmp (s0, s1, sizeof (s0)) != 0)
__builtin_abort ();
if (sizeof (s2) != sizeof (s3)
|| __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
__builtin_abort ();
if (sizeof (t0) != sizeof (t1)
|| __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
__builtin_abort ();
if (sizeof (t2) != sizeof (t3)
|| __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
__builtin_abort ();
if (sizeof (u0) != sizeof (u1)
|| __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
__builtin_abort ();
if (sizeof (u2) != sizeof (u3)
|| __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
__builtin_abort ();
if (sizeof (U0) != sizeof (U1)
|| __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
__builtin_abort ();
if (sizeof (U2) != sizeof (U3)
|| __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
__builtin_abort ();
if (sizeof (L0) != sizeof (L1)
|| __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
__builtin_abort ();
if (sizeof (L2) != sizeof (L3)
|| __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
__builtin_abort ();
if (sizeof (R"*[]*") != 1
|| __builtin_memcmp (R"*[]*", "", 1) != 0)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,104 @@
// { dg-do run }
// { dg-options "-std=c++0x" }
#define R
#define u
#define uR
#define U
#define UR
#define u8
#define u8R
#define L
#define LR
const char s00[] = R"[a]" "[b]";
const char s01[] = "[a]" R"*[b]*";
const char s02[] = R"[a]" R"[b]";
const char s03[] = R"-[a]-" u8"[b]";
const char s04[] = "[a]" u8R"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char s05[] = R"[a]" u8R"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char s06[] = u8R";([a];(" "[b]";
const char s07[] = u8"[a]" R"[b]";
const char s08[] = u8R"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char s09[] = u8R"/^&|~!=,"'\[a]/^&|~!=,"'\" u8"[b]";
const char s10[] = u8"[a]" u8R"0123456789abcdef[b]0123456789abcdef";
const char s11[] = u8R"ghijklmnopqrstuv[a]ghijklmnopqrstuv" u8R"w[b]w";
const char16_t u03[] = R"-[a]-" u"[b]";
const char16_t u04[] = "[a]" uR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char16_t u05[] = R"[a]" uR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char16_t u06[] = uR";([a];(" "[b]";
const char16_t u07[] = u"[a]" R"[b]";
const char16_t u08[] = uR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char16_t u09[] = uR"/^&|~!=,"'\[a]/^&|~!=,"'\" u"[b]";
const char16_t u10[] = u"[a]" uR"0123456789abcdef[b]0123456789abcdef";
const char16_t u11[] = uR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" uR"w[b]w";
const char32_t U03[] = R"-[a]-" U"[b]";
const char32_t U04[] = "[a]" UR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char32_t U05[] = R"[a]" UR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char32_t U06[] = UR";([a];(" "[b]";
const char32_t U07[] = U"[a]" R"[b]";
const char32_t U08[] = UR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char32_t U09[] = UR"/^&|~!=,"'\[a]/^&|~!=,"'\" U"[b]";
const char32_t U10[] = U"[a]" UR"0123456789abcdef[b]0123456789abcdef";
const char32_t U11[] = UR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" UR"w[b]w";
const wchar_t L03[] = R"-[a]-" L"[b]";
const wchar_t L04[] = "[a]" LR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const wchar_t L05[] = R"[a]" LR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const wchar_t L06[] = LR";([a];(" "[b]";
const wchar_t L07[] = L"[a]" R"[b]";
const wchar_t L08[] = LR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const wchar_t L09[] = LR"/^&|~!=,"'\[a]/^&|~!=,"'\" L"[b]";
const wchar_t L10[] = L"[a]" LR"0123456789abcdef[b]0123456789abcdef";
const wchar_t L11[] = LR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" LR"w[b]w";
int
main (void)
{
#define TEST(str, val) \
if (sizeof (str) != sizeof (val) \
|| __builtin_memcmp (str, val, sizeof (str)) != 0) \
__builtin_abort ()
TEST (s00, "a[b]");
TEST (s01, "[a]b");
TEST (s02, "ab");
TEST (s03, "a[b]");
TEST (s04, "[a]b");
TEST (s05, "ab");
TEST (s06, "a[b]");
TEST (s07, "[a]b");
TEST (s08, "ab");
TEST (s09, "a[b]");
TEST (s10, "[a]b");
TEST (s11, "ab");
TEST (u03, u"a[b]");
TEST (u04, u"[a]b");
TEST (u05, u"ab");
TEST (u06, u"a[b]");
TEST (u07, u"[a]b");
TEST (u08, u"ab");
TEST (u09, u"a[b]");
TEST (u10, u"[a]b");
TEST (u11, u"ab");
TEST (U03, U"a[b]");
TEST (U04, U"[a]b");
TEST (U05, U"ab");
TEST (U06, U"a[b]");
TEST (U07, U"[a]b");
TEST (U08, U"ab");
TEST (U09, U"a[b]");
TEST (U10, U"[a]b");
TEST (U11, U"ab");
TEST (L03, L"a[b]");
TEST (L04, L"[a]b");
TEST (L05, L"ab");
TEST (L06, L"a[b]");
TEST (L07, L"[a]b");
TEST (L08, L"ab");
TEST (L09, L"a[b]");
TEST (L10, L"[a]b");
TEST (L11, L"ab");
return 0;
}

View File

@ -0,0 +1,58 @@
// If c++98, the {,u,u8,U,L}R prefix should be parsed as separate
// token.
// { dg-do compile }
// { dg-options "-std=c++98" }
const void *s0 = R"[a]"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 6 }
const void *s1 = uR"[a]"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 8 }
const void *s2 = UR"[a]"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 10 }
const void *s3 = u8R"[a]"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 12 }
const void *s4 = LR"[a]"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 14 }
const int i0 = R'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 17 }
const int i1 = uR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 19 }
const int i2 = UR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 21 }
const int i3 = u8R'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 23 }
const int i4 = LR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 25 }
#define R "a"
#define uR "b"
#define UR "c"
#define u8R "d"
#define LR "e"
const void *s5 = R"[a]";
const void *s6 = uR"[a]";
const void *s7 = UR"[a]";
const void *s8 = u8R"[a]";
const void *s9 = LR"[a]";
#undef R
#undef uR
#undef UR
#undef u8R
#undef LR
#define R 1 +
#define uR 2 +
#define UR 3 +
#define u8R 4 +
#define LR 5 +
const int i5 = R'a';
const int i6 = uR'a';
const int i7 = UR'a';
const int i8 = u8R'a';
const int i9 = LR'a';
int main () {}

View File

@ -0,0 +1,28 @@
// R is not applicable for character literals.
// { dg-do compile }
// { dg-options "-std=c++0x" }
const int i0 = R'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 5 }
const int i1 = uR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 7 }
const int i2 = UR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 9 }
const int i3 = u8R'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 11 }
const int i4 = LR'a'; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 13 }
#define R 1 +
#define uR 2 +
#define UR 3 +
#define u8R 4 +
#define LR 5 +
const int i5 = R'a';
const int i6 = uR'a';
const int i7 = UR'a';
const int i8 = u8R'a';
const int i9 = LR'a';
int main () {}

View File

@ -0,0 +1,23 @@
// { dg-do compile }
// { dg-options "-std=c++0x" }
const void *s0 = R"0123456789abcdefg[]0123456789abcdefg";
// { dg-error "raw string delimiter longer" "" { target *-*-* } 4 }
// { dg-error "stray" "" { target *-*-* } 4 }
const void *s1 = R" [] ";
// { dg-error "invalid character" "" { target *-*-* } 7 }
// { dg-error "stray" "" { target *-*-* } 7 }
const void *s2 = R" [] ";
// { dg-error "invalid character" "" { target *-*-* } 10 }
// { dg-error "stray" "" { target *-*-* } 10 }
const void *s3 = R"][]]";
// { dg-error "invalid character" "" { target *-*-* } 13 }
// { dg-error "stray" "" { target *-*-* } 13 }
const void *s4 = R"@[]@";
// { dg-error "invalid character" "" { target *-*-* } 16 }
// { dg-error "stray" "" { target *-*-* } 16 }
const void *s5 = R"$[]$";
// { dg-error "invalid character" "" { target *-*-* } 19 }
// { dg-error "stray" "" { target *-*-* } 19 }
int main () {}

View File

@ -0,0 +1,5 @@
// { dg-do compile }
// { dg-options "-std=c++0x" }
const void *s0 = R"ouch[]ouCh"; // { dg-error "at end of input" }
// { dg-error "unterminated raw string" "" { target *-*-* } 4 }

View File

@ -0,0 +1,23 @@
// The trailing whitespace after \ and before newline extension
// breaks full compliance for raw strings.
// { dg-do run { xfail *-*-* } }
// { dg-options "-std=c++0x" }
// Note, there is a single space after \ on the following line.
const char *s0 = R"[\
]";
// { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 7 }
// Note, there is a single tab after \ on the following line.
const char *s1 = R"[\
]";
// { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 12 }
int
main (void)
{
if (__builtin_strcmp (s0, "\\ \n") != 0
|| __builtin_strcmp (s1, "\\\t\n") != 0)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,15 @@
// Test unsupported concatenation of UTF-8 string literals.
// { dg-do compile }
// { dg-options "-std=c++0x" }
const void *s0 = u8"a" "b";
const void *s1 = "a" u8"b";
const void *s2 = u8"a" u8"b";
const void *s3 = u8"a" u"b"; // { dg-error "non-standard concatenation" }
const void *s4 = u"a" u8"b"; // { dg-error "non-standard concatenation" }
const void *s5 = u8"a" U"b"; // { dg-error "non-standard concatenation" }
const void *s6 = U"a" u8"b"; // { dg-error "non-standard concatenation" }
const void *s7 = u8"a" L"b"; // { dg-error "non-standard concatenation" }
const void *s8 = L"a" u8"b"; // { dg-error "non-standard concatenation" }
int main () {}

View File

@ -0,0 +1,12 @@
// In C++0x, the u8 prefix should be parsed as separate tokens.
// { dg-do compile }
// { dg-options "-std=c++98" }
const void *s0 = u8"a"; // { dg-error "was not declared" }
// { dg-error "expected ',' or ';'" "" { target *-*-* } 5 }
#define u8 "a"
const void *s1 = u8"a";
int main () {}

View File

@ -0,0 +1,45 @@
// { dg-do run }
// { dg-require-iconv "ISO-8859-2" }
// { dg-options "-std=c++0x -fexec-charset=ISO-8859-2" }
const char *str1 = "h\u00e1\U0000010Dky ";
const char *str2 = "\u010d\u00E1rky\n";
const char *str3 = u8"h\u00e1\U0000010Dky ";
const char *str4 = u8"\u010d\u00E1rky\n";
const char *str5 = "h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
const char *str6 = u8"h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
const char *str7 = "h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
#define u8
const char *str8 = u8"h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
const char latin2_1[] = "\x68\xe1\xe8\x6b\x79\x20";
const char latin2_2[] = "\xe8\xe1\x72\x6b\x79\n";
const char utf8_1[] = "\x68\xc3\xa1\xc4\x8d\x6b\x79\x20";
const char utf8_2[] = "\xc4\x8d\xc3\xa1\x72\x6b\x79\n";
int
main (void)
{
if (__builtin_strcmp (str1, latin2_1) != 0
|| __builtin_strcmp (str2, latin2_2) != 0
|| __builtin_strcmp (str3, utf8_1) != 0
|| __builtin_strcmp (str4, utf8_2) != 0
|| __builtin_strncmp (str5, latin2_1, sizeof (latin2_1) - 1) != 0
|| __builtin_strcmp (str5 + sizeof (latin2_1) - 1, latin2_2) != 0
|| __builtin_strncmp (str6, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str6 + sizeof (utf8_1) - 1, utf8_2) != 0
|| __builtin_strncmp (str7, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str7 + sizeof (utf8_1) - 1, utf8_2) != 0
|| __builtin_strncmp (str8, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str8 + sizeof (utf8_1) - 1, utf8_2) != 0)
__builtin_abort ();
if (sizeof ("a" u8"b"[0]) != 1
|| sizeof (u8"a" "b"[0]) != 1
|| sizeof (u8"a" u8"b"[0]) != 1
|| sizeof ("a" "\u010d") != 3
|| sizeof ("a" u8"\u010d") != 4
|| sizeof (u8"a" "\u010d") != 4
|| sizeof (u8"a" "\u010d") != 4)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,21 @@
// { dg-do compile }
// { dg-options "-std=c++0x" }
const char s0[] = u8"ab";
const char16_t s1[] = u8"ab"; // { dg-error "from non-wide" }
const char32_t s2[] = u8"ab"; // { dg-error "from non-wide" }
const wchar_t s3[] = u8"ab"; // { dg-error "from non-wide" }
const char t0[0] = u8"ab"; // { dg-error "chars is too long" }
const char t1[1] = u8"ab"; // { dg-error "chars is too long" }
const char t2[2] = u8"ab"; // { dg-error "chars is too long" }
const char t3[3] = u8"ab";
const char t4[4] = u8"ab";
const char u0[0] = u8"\u2160."; // { dg-error "chars is too long" }
const char u1[1] = u8"\u2160."; // { dg-error "chars is too long" }
const char u2[2] = u8"\u2160."; // { dg-error "chars is too long" }
const char u3[3] = u8"\u2160."; // { dg-error "chars is too long" }
const char u4[4] = u8"\u2160."; // { dg-error "chars is too long" }
const char u5[5] = u8"\u2160.";
const char u6[6] = u8"\u2160.";

View File

@ -0,0 +1,14 @@
/* { dg-do preprocess } */
/* { dg-options "-std=gnu99" } */
#include <stddef.h>
#include "stddef.h"
#include L"stddef.h" /* { dg-error "include expects" } */
#include u"stddef.h" /* { dg-error "include expects" } */
#include U"stddef.h" /* { dg-error "include expects" } */
#include u8"stddef.h" /* { dg-error "include expects" } */
#include R"[stddef.h]" /* { dg-error "include expects" } */
#include LR"[stddef.h]" /* { dg-error "include expects" } */
#include uR"[stddef.h]" /* { dg-error "include expects" } */
#include UR"[stddef.h]" /* { dg-error "include expects" } */
#include u8R"[stddef.h]" /* { dg-error "include expects" } */

View File

@ -0,0 +1,101 @@
/* { dg-do run } */
/* { dg-options "-std=gnu99" } */
#include <wchar.h>
typedef __CHAR16_TYPE__ char16_t;
typedef __CHAR32_TYPE__ char32_t;
const char s0[] = R"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char s2[] = R"*|*[a\
b
c]"
c]*|"
c]*|*";
const char s3[] = "ab\nc]\"\nc]*|\"\nc";
const char t0[] = u8R"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char t2[] = u8R"*|*[a\
b
c]"
c]*|"
c]*|*";
const char t3[] = u8"ab\nc]\"\nc]*|\"\nc";
const char16_t u0[] = uR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char16_t u2[] = uR"*|*[a\
b
c]"
c]*|"
c]*|*";
const char16_t u3[] = u"ab\nc]\"\nc]*|\"\nc";
const char32_t U0[] = UR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char32_t U2[] = UR"*|*[a\
b
c]"
c]*|"
c]*|*";
const char32_t U3[] = U"ab\nc]\"\nc]*|\"\nc";
const wchar_t L0[] = LR"[a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c]";
const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const wchar_t L2[] = LR"*|*[a\
b
c]"
c]*|"
c]*|*";
const wchar_t L3[] = L"ab\nc]\"\nc]*|\"\nc";
int
main (void)
{
if (sizeof (s0) != sizeof (s1)
|| __builtin_memcmp (s0, s1, sizeof (s0)) != 0)
__builtin_abort ();
if (sizeof (s2) != sizeof (s3)
|| __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
__builtin_abort ();
if (sizeof (t0) != sizeof (t1)
|| __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
__builtin_abort ();
if (sizeof (t2) != sizeof (t3)
|| __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
__builtin_abort ();
if (sizeof (u0) != sizeof (u1)
|| __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
__builtin_abort ();
if (sizeof (u2) != sizeof (u3)
|| __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
__builtin_abort ();
if (sizeof (U0) != sizeof (U1)
|| __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
__builtin_abort ();
if (sizeof (U2) != sizeof (U3)
|| __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
__builtin_abort ();
if (sizeof (L0) != sizeof (L1)
|| __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
__builtin_abort ();
if (sizeof (L2) != sizeof (L3)
|| __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
__builtin_abort ();
if (sizeof (R"*[]*") != 1
|| __builtin_memcmp (R"*[]*", "", 1) != 0)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,109 @@
/* { dg-do run } */
/* { dg-options "-std=gnu99" } */
#include <wchar.h>
typedef __CHAR16_TYPE__ char16_t;
typedef __CHAR32_TYPE__ char32_t;
#define R
#define u
#define uR
#define U
#define UR
#define u8
#define u8R
#define L
#define LR
const char s00[] = R"[a]" "[b]";
const char s01[] = "[a]" R"*[b]*";
const char s02[] = R"[a]" R"[b]";
const char s03[] = R"-[a]-" u8"[b]";
const char s04[] = "[a]" u8R"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char s05[] = R"[a]" u8R"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char s06[] = u8R";([a];(" "[b]";
const char s07[] = u8"[a]" R"[b]";
const char s08[] = u8R"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char s09[] = u8R"/^&|~!=,"'\[a]/^&|~!=,"'\" u8"[b]";
const char s10[] = u8"[a]" u8R"0123456789abcdef[b]0123456789abcdef";
const char s11[] = u8R"ghijklmnopqrstuv[a]ghijklmnopqrstuv" u8R"w[b]w";
const char16_t u03[] = R"-[a]-" u"[b]";
const char16_t u04[] = "[a]" uR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char16_t u05[] = R"[a]" uR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char16_t u06[] = uR";([a];(" "[b]";
const char16_t u07[] = u"[a]" R"[b]";
const char16_t u08[] = uR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char16_t u09[] = uR"/^&|~!=,"'\[a]/^&|~!=,"'\" u"[b]";
const char16_t u10[] = u"[a]" uR"0123456789abcdef[b]0123456789abcdef";
const char16_t u11[] = uR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" uR"w[b]w";
const char32_t U03[] = R"-[a]-" U"[b]";
const char32_t U04[] = "[a]" UR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const char32_t U05[] = R"[a]" UR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const char32_t U06[] = UR";([a];(" "[b]";
const char32_t U07[] = U"[a]" R"[b]";
const char32_t U08[] = UR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const char32_t U09[] = UR"/^&|~!=,"'\[a]/^&|~!=,"'\" U"[b]";
const char32_t U10[] = U"[a]" UR"0123456789abcdef[b]0123456789abcdef";
const char32_t U11[] = UR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" UR"w[b]w";
const wchar_t L03[] = R"-[a]-" L"[b]";
const wchar_t L04[] = "[a]" LR"MNOPQRSTUVWXYZ[b]MNOPQRSTUVWXYZ";
const wchar_t L05[] = R"[a]" LR"wxyzABCDEFGHIJKL[b]wxyzABCDEFGHIJKL";
const wchar_t L06[] = LR";([a];(" "[b]";
const wchar_t L07[] = L"[a]" R"[b]";
const wchar_t L08[] = LR"[a]" R"_{}#()<>%:;.?*+-[b]_{}#()<>%:;.?*+-";
const wchar_t L09[] = LR"/^&|~!=,"'\[a]/^&|~!=,"'\" L"[b]";
const wchar_t L10[] = L"[a]" LR"0123456789abcdef[b]0123456789abcdef";
const wchar_t L11[] = LR"ghijklmnopqrstuv[a]ghijklmnopqrstuv" LR"w[b]w";
int
main (void)
{
#define TEST(str, val) \
if (sizeof (str) != sizeof (val) \
|| __builtin_memcmp (str, val, sizeof (str)) != 0) \
__builtin_abort ()
TEST (s00, "a[b]");
TEST (s01, "[a]b");
TEST (s02, "ab");
TEST (s03, "a[b]");
TEST (s04, "[a]b");
TEST (s05, "ab");
TEST (s06, "a[b]");
TEST (s07, "[a]b");
TEST (s08, "ab");
TEST (s09, "a[b]");
TEST (s10, "[a]b");
TEST (s11, "ab");
TEST (u03, u"a[b]");
TEST (u04, u"[a]b");
TEST (u05, u"ab");
TEST (u06, u"a[b]");
TEST (u07, u"[a]b");
TEST (u08, u"ab");
TEST (u09, u"a[b]");
TEST (u10, u"[a]b");
TEST (u11, u"ab");
TEST (U03, U"a[b]");
TEST (U04, U"[a]b");
TEST (U05, U"ab");
TEST (U06, U"a[b]");
TEST (U07, U"[a]b");
TEST (U08, U"ab");
TEST (U09, U"a[b]");
TEST (U10, U"[a]b");
TEST (U11, U"ab");
TEST (L03, L"a[b]");
TEST (L04, L"[a]b");
TEST (L05, L"ab");
TEST (L06, L"a[b]");
TEST (L07, L"[a]b");
TEST (L08, L"ab");
TEST (L09, L"a[b]");
TEST (L10, L"[a]b");
TEST (L11, L"ab");
return 0;
}

View File

@ -0,0 +1,53 @@
/* If not gnu99, the {,u,u8,U,L}R prefix should be parsed as separate
token. */
/* { dg-do compile } */
/* { dg-options "" } */
const void *s0 = R"[a]"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 6 } */
const void *s1 = uR"[a]"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 8 } */
const void *s2 = UR"[a]"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 10 } */
const void *s3 = u8R"[a]"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 12 } */
const void *s4 = LR"[a]"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 14 } */
const int i0 = R'a'; /* { dg-error "expected ',' or ';'" } */
const int i1 = uR'a'; /* { dg-error "expected ',' or ';'" } */
const int i2 = UR'a'; /* { dg-error "expected ',' or ';'" } */
const int i3 = u8R'a'; /* { dg-error "expected ',' or ';'" } */
const int i4 = LR'a'; /* { dg-error "expected ',' or ';'" } */
#define R "a"
#define uR "b"
#define UR "c"
#define u8R "d"
#define LR "e"
const void *s5 = R"[a]";
const void *s6 = uR"[a]";
const void *s7 = UR"[a]";
const void *s8 = u8R"[a]";
const void *s9 = LR"[a]";
#undef R
#undef uR
#undef UR
#undef u8R
#undef LR
#define R 1 +
#define uR 2 +
#define UR 3 +
#define u8R 4 +
#define LR 5 +
const int i5 = R'a';
const int i6 = uR'a';
const int i7 = UR'a';
const int i8 = u8R'a';
const int i9 = LR'a';
int main () {}

View File

@ -0,0 +1,28 @@
/* R is not applicable for character literals. */
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
const int i0 = R'a'; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 5 } */
const int i1 = uR'a'; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 7 } */
const int i2 = UR'a'; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 9 } */
const int i3 = u8R'a'; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 11 } */
const int i4 = LR'a'; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 13 } */
#define R 1 +
#define uR 2 +
#define UR 3 +
#define u8R 4 +
#define LR 5 +
const int i5 = R'a';
const int i6 = uR'a';
const int i7 = UR'a';
const int i8 = u8R'a';
const int i9 = LR'a';
int main () {}

View File

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
const void *s0 = R"0123456789abcdefg[]0123456789abcdefg";
/* { dg-error "raw string delimiter longer" "" { target *-*-* } 4 } */
/* { dg-error "stray" "" { target *-*-* } 4 } */
const void *s1 = R" [] ";
/* { dg-error "invalid character" "" { target *-*-* } 7 } */
/* { dg-error "stray" "" { target *-*-* } 7 } */
const void *s2 = R" [] ";
/* { dg-error "invalid character" "" { target *-*-* } 10 } */
/* { dg-error "stray" "" { target *-*-* } 10 } */
const void *s3 = R"][]]";
/* { dg-error "invalid character" "" { target *-*-* } 13 } */
/* { dg-error "stray" "" { target *-*-* } 13 } */
const void *s4 = R"@[]@";
/* { dg-error "invalid character" "" { target *-*-* } 16 } */
/* { dg-error "stray" "" { target *-*-* } 16 } */
const void *s5 = R"$[]$";
/* { dg-error "invalid character" "" { target *-*-* } 19 } */
/* { dg-error "stray" "" { target *-*-* } 19 } */
int main () {}

View File

@ -0,0 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
const void *s0 = R"ouch[]ouCh"; /* { dg-error "expected expression at end of input" } */
/* { dg-error "unterminated raw string" "" { target *-*-* } 4 } */

View File

@ -0,0 +1,23 @@
/* The trailing whitespace after \ and before newline extension
breaks full compliance for raw strings. */
/* { dg-do run { xfail *-*-* } } */
/* { dg-options "-std=gnu99" } */
/* Note, there is a single space after \ on the following line. */
const void *s0 = R"[\
]";
/* { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 7 } */
/* Note, there is a single tab after \ on the following line. */
const void *s1 = R"[\
]";
/* { dg-bogus "backslash and newline separated by space" "" { xfail *-*-* } 12 } */
int
main (void)
{
if (__builtin_strcmp (s0, "\\ \n") != 0
|| __builtin_strcmp (s1, "\\\t\n") != 0)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,15 @@
/* Test unsupported concatenation of UTF-8 string literals. */
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
void *s0 = u8"a" "b";
void *s1 = "a" u8"b";
void *s2 = u8"a" u8"b";
void *s3 = u8"a" u"b"; /* { dg-error "non-standard concatenation" } */
void *s4 = u"a" u8"b"; /* { dg-error "non-standard concatenation" } */
void *s5 = u8"a" U"b"; /* { dg-error "non-standard concatenation" } */
void *s6 = U"a" u8"b"; /* { dg-error "non-standard concatenation" } */
void *s7 = u8"a" L"b"; /* { dg-error "non-standard concatenation" } */
void *s8 = L"a" u8"b"; /* { dg-error "non-standard concatenation" } */
int main () {}

View File

@ -0,0 +1,12 @@
/* If not gnu99, the u8 prefix should be parsed as separate tokens. */
/* { dg-do compile } */
/* { dg-options "" } */
const void *s0 = u8"a"; /* { dg-error "undeclared" } */
/* { dg-error "expected ',' or ';'" "" { target *-*-* } 5 } */
#define u8 "a"
const void *s1 = u8"a";
int main () {}

View File

@ -0,0 +1,45 @@
/* { dg-do run } */
/* { dg-require-iconv "ISO-8859-2" } */
/* { dg-options "-std=gnu99 -fexec-charset=ISO-8859-2" } */
const char *str1 = "h\u00e1\U0000010Dky ";
const char *str2 = "\u010d\u00E1rky\n";
const char *str3 = u8"h\u00e1\U0000010Dky ";
const char *str4 = u8"\u010d\u00E1rky\n";
const char *str5 = "h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
const char *str6 = u8"h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
const char *str7 = "h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
#define u8
const char *str8 = u8"h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
const char latin2_1[] = "\x68\xe1\xe8\x6b\x79\x20";
const char latin2_2[] = "\xe8\xe1\x72\x6b\x79\n";
const char utf8_1[] = "\x68\xc3\xa1\xc4\x8d\x6b\x79\x20";
const char utf8_2[] = "\xc4\x8d\xc3\xa1\x72\x6b\x79\n";
int
main (void)
{
if (__builtin_strcmp (str1, latin2_1) != 0
|| __builtin_strcmp (str2, latin2_2) != 0
|| __builtin_strcmp (str3, utf8_1) != 0
|| __builtin_strcmp (str4, utf8_2) != 0
|| __builtin_strncmp (str5, latin2_1, sizeof (latin2_1) - 1) != 0
|| __builtin_strcmp (str5 + sizeof (latin2_1) - 1, latin2_2) != 0
|| __builtin_strncmp (str6, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str6 + sizeof (utf8_1) - 1, utf8_2) != 0
|| __builtin_strncmp (str7, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str7 + sizeof (utf8_1) - 1, utf8_2) != 0
|| __builtin_strncmp (str8, utf8_1, sizeof (utf8_1) - 1) != 0
|| __builtin_strcmp (str8 + sizeof (utf8_1) - 1, utf8_2) != 0)
__builtin_abort ();
if (sizeof ("a" u8"b"[0]) != 1
|| sizeof (u8"a" "b"[0]) != 1
|| sizeof (u8"a" u8"b"[0]) != 1
|| sizeof ("a" "\u010d") != 3
|| sizeof ("a" u8"\u010d") != 4
|| sizeof (u8"a" "\u010d") != 4
|| sizeof (u8"a" "\u010d") != 4)
__builtin_abort ();
return 0;
}

View File

@ -0,0 +1,26 @@
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
#include <wchar.h>
typedef __CHAR16_TYPE__ char16_t;
typedef __CHAR32_TYPE__ char32_t;
const char s0[] = u8"ab";
const char16_t s1[] = u8"ab"; /* { dg-error "from non-wide" } */
const char32_t s2[] = u8"ab"; /* { dg-error "from non-wide" } */
const wchar_t s3[] = u8"ab"; /* { dg-error "from non-wide" } */
const char t0[0] = u8"ab"; /* { dg-warning "chars is too long" } */
const char t1[1] = u8"ab"; /* { dg-warning "chars is too long" } */
const char t2[2] = u8"ab";
const char t3[3] = u8"ab";
const char t4[4] = u8"ab";
const char u0[0] = u8"\u2160."; /* { dg-warning "chars is too long" } */
const char u1[1] = u8"\u2160."; /* { dg-warning "chars is too long" } */
const char u2[2] = u8"\u2160."; /* { dg-warning "chars is too long" } */
const char u3[3] = u8"\u2160."; /* { dg-warning "chars is too long" } */
const char u4[4] = u8"\u2160.";
const char u5[5] = u8"\u2160.";
const char u6[6] = u8"\u2160.";

View File

@ -1,3 +1,21 @@
2009-10-19 Jakub Jelinek <jakub@redhat.com>
* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
and char32_cset_desc.
(converter_for_type): Handle CPP_UTF8STRING.
(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
(parse_include): Reject raw strings.
* include/cpplib.h (CPP_UTF8STRING): New token type.
* internal.h (struct cpp_reader): Add utf8_cset_desc field.
* lex.c (lex_raw_string): New function.
(lex_string): Handle u8 string literals, call lex_raw_string
for raw string literals.
(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
sequences.
* macro.c (stringify_arg): Handle CPP_UTF8STRING.
2009-10-14 Jakub Jelinek <jakub@redhat.com>
PR preprocessor/41543

View File

@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)
pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
pfile->char16_cset_desc = init_iconv_desc (pfile,
be ? "UTF-16BE" : "UTF-16LE",
SOURCE_CHARSET);
@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)
{
if (pfile->narrow_cset_desc.func == convert_using_iconv)
iconv_close (pfile->narrow_cset_desc.cd);
if (pfile->utf8_cset_desc.func == convert_using_iconv)
iconv_close (pfile->utf8_cset_desc.cd);
if (pfile->char16_cset_desc.func == convert_using_iconv)
iconv_close (pfile->char16_cset_desc.cd);
if (pfile->char32_cset_desc.func == convert_using_iconv)
iconv_close (pfile->char32_cset_desc.cd);
if (pfile->wide_cset_desc.func == convert_using_iconv)
iconv_close (pfile->wide_cset_desc.cd);
}
@ -1339,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
{
default:
return pfile->narrow_cset_desc;
case CPP_UTF8STRING:
return pfile->utf8_cset_desc;
case CPP_CHAR16:
case CPP_STRING16:
return pfile->char16_cset_desc;
@ -1373,7 +1383,47 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
for (i = 0; i < count; i++)
{
p = from[i].text;
if (*p == 'L' || *p == 'u' || *p == 'U') p++;
if (*p == 'u')
{
if (*++p == '8')
p++;
}
else if (*p == 'L' || *p == 'U') p++;
if (*p == 'R')
{
const uchar *prefix;
/* Skip over 'R"'. */
p += 2;
prefix = p;
while (*p != '[')
p++;
p++;
limit = from[i].text + from[i].len;
if (limit >= p + (p - prefix) + 1)
limit -= (p - prefix) + 1;
for (;;)
{
base = p;
while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
p++;
if (p > base)
{
/* We have a run of normal characters; these can be fed
directly to convert_cset. */
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
goto fail;
}
if (p == limit)
break;
p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
}
continue;
}
p++; /* Skip leading quote. */
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */

View File

@ -697,7 +697,8 @@ parse_include (cpp_reader *pfile, int *pangle_brackets,
/* Allow macro expansion. */
header = get_token_no_padding (pfile);
*location = header->src_loc;
if (header->type == CPP_STRING || header->type == CPP_HEADER_NAME)
if ((header->type == CPP_STRING && header->val.str.text[0] != 'R')
|| header->type == CPP_HEADER_NAME)
{
fname = XNEWVEC (char, header->val.str.len - 1);
memcpy (fname, header->val.str.text + 1, header->val.str.len - 2);
@ -1537,7 +1538,8 @@ get__Pragma_string (cpp_reader *pfile)
if (string->type == CPP_EOF)
_cpp_backup_tokens (pfile, 1);
if (string->type != CPP_STRING && string->type != CPP_WSTRING
&& string->type != CPP_STRING32 && string->type != CPP_STRING16)
&& string->type != CPP_STRING32 && string->type != CPP_STRING16
&& string->type != CPP_UTF8STRING)
return NULL;
paren = get_token_no_padding (pfile);

View File

@ -127,6 +127,7 @@ struct _cpp_file;
TK(WSTRING, LITERAL) /* L"string" */ \
TK(STRING16, LITERAL) /* u"string" */ \
TK(STRING32, LITERAL) /* U"string" */ \
TK(UTF8STRING, LITERAL) /* u8"string" */ \
TK(OBJC_STRING, LITERAL) /* @"string" - Objective-C */ \
TK(HEADER_NAME, LITERAL) /* <stdio.h> in #include */ \
\
@ -728,10 +729,10 @@ extern const unsigned char *cpp_macro_definition (cpp_reader *,
extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
extern const cpp_token *cpp_peek_token (cpp_reader *, int);
/* Evaluate a CPP_CHAR or CPP_WCHAR token. */
/* Evaluate a CPP_*CHAR* token. */
extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
unsigned int *, int *);
/* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens. */
/* Evaluate a vector of CPP_*STRING* tokens. */
extern bool cpp_interpret_string (cpp_reader *,
const cpp_string *, size_t,
cpp_string *, enum cpp_ttype);

View File

@ -396,6 +396,10 @@ struct cpp_reader
execution character set. */
struct cset_converter narrow_cset_desc;
/* Descriptor for converting from the source character set to the
UTF-8 execution character set. */
struct cset_converter utf8_cset_desc;
/* Descriptor for converting from the source character set to the
UTF-16 execution character set. */
struct cset_converter char16_cset_desc;

View File

@ -617,12 +617,192 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
token->val.str.text = dest;
}
/* Lexes a raw string. The stored string contains the spelling, including
double quotes, delimiter string, '[' and ']', any leading
'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
literal, or CPP_OTHER if it was not properly terminated.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
static void
lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
const uchar *cur)
{
source_location saw_NUL = 0;
const uchar *raw_prefix;
unsigned int raw_prefix_len = 0;
enum cpp_ttype type;
size_t total_len = 0;
_cpp_buff *first_buff = NULL, *last_buff = NULL;
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
: CPP_STRING);
raw_prefix = cur + 1;
while (raw_prefix_len < 16)
{
switch (raw_prefix[raw_prefix_len])
{
case ' ': case '[': case ']': case '\t':
case '\v': case '\f': case '\n': default:
break;
/* Basic source charset except the above chars. */
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case '_': case '{': case '}': case '#': case '(': case ')':
case '<': case '>': case '%': case ':': case ';': case '.':
case '?': case '*': case '+': case '-': case '/': case '^':
case '&': case '|': case '~': case '!': case '=': case ',':
case '\\': case '"': case '\'':
raw_prefix_len++;
continue;
}
break;
}
if (raw_prefix[raw_prefix_len] != '[')
{
int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
+ 1;
if (raw_prefix_len == 16)
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
"raw string delimiter longer than 16 characters");
else
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
"invalid character '%c' in raw string delimiter",
(int) raw_prefix[raw_prefix_len]);
pfile->buffer->cur = raw_prefix - 1;
create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
return;
}
cur = raw_prefix + raw_prefix_len + 1;
for (;;)
{
cppchar_t c = *cur++;
if (c == ']'
&& strncmp ((const char *) cur, (const char *) raw_prefix,
raw_prefix_len) == 0
&& cur[raw_prefix_len] == '"')
{
cur += raw_prefix_len + 1;
break;
}
else if (c == '\n')
{
if (pfile->state.in_directive
|| pfile->state.parsing_args
|| pfile->state.in_deferred_pragma)
{
cur--;
type = CPP_OTHER;
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
"unterminated raw string");
break;
}
/* raw strings allow embedded non-escaped newlines, which
complicates this routine a lot. */
if (first_buff == NULL)
{
total_len = cur - base;
first_buff = last_buff = _cpp_get_buff (pfile, total_len);
memcpy (BUFF_FRONT (last_buff), base, total_len);
raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
BUFF_FRONT (last_buff) += total_len;
}
else
{
size_t len = cur - base;
size_t cur_len = len > BUFF_ROOM (last_buff)
? BUFF_ROOM (last_buff) : len;
total_len += len;
memcpy (BUFF_FRONT (last_buff), base, cur_len);
BUFF_FRONT (last_buff) += cur_len;
if (len > cur_len)
{
last_buff = _cpp_append_extend_buff (pfile, last_buff,
len - cur_len);
memcpy (BUFF_FRONT (last_buff), base + cur_len,
len - cur_len);
BUFF_FRONT (last_buff) += len - cur_len;
}
}
if (pfile->buffer->cur < pfile->buffer->rlimit)
CPP_INCREMENT_LINE (pfile, 0);
pfile->buffer->need_line = true;
if (!_cpp_get_fresh_line (pfile))
{
source_location src_loc = token->src_loc;
token->type = CPP_EOF;
/* Tell the compiler the line number of the EOF token. */
token->src_loc = pfile->line_table->highest_line;
token->flags = BOL;
if (first_buff != NULL)
_cpp_release_buff (pfile, first_buff);
cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
"unterminated raw string");
return;
}
cur = base = pfile->buffer->cur;
}
else if (c == '\0' && !saw_NUL)
LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, cur));
}
if (saw_NUL && !pfile->state.skipping)
cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
"null character(s) preserved in literal");
pfile->buffer->cur = cur;
if (first_buff == NULL)
create_literal (pfile, token, base, cur - base, type);
else
{
uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
token->type = type;
token->val.str.len = total_len + (cur - base);
token->val.str.text = dest;
last_buff = first_buff;
while (last_buff != NULL)
{
memcpy (dest, last_buff->base,
BUFF_FRONT (last_buff) - last_buff->base);
dest += BUFF_FRONT (last_buff) - last_buff->base;
last_buff = last_buff->next;
}
_cpp_release_buff (pfile, first_buff);
memcpy (dest, base, cur - base);
dest[cur - base] = '\0';
}
}
/* Lexes a string, character constant, or angle-bracketed header file
name. The stored string contains the spelling, including opening
quote and leading any leading 'L', 'u' or 'U'. It returns the type
of the literal, or CPP_OTHER if it was not properly terminated, or
CPP_LESS for an unterminated header name which must be relexed as
normal tokens.
quote and any leading 'L', 'u', 'U' or 'u8' and optional
'R' modifier. It returns the type of the literal, or CPP_OTHER
if it was not properly terminated, or CPP_LESS for an unterminated
header name which must be relexed as normal tokens.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
@ -636,12 +816,24 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
cur = base;
terminator = *cur++;
if (terminator == 'L' || terminator == 'u' || terminator == 'U')
if (terminator == 'L' || terminator == 'U')
terminator = *cur++;
if (terminator == '\"')
else if (terminator == 'u')
{
terminator = *cur++;
if (terminator == '8')
terminator = *cur++;
}
if (terminator == 'R')
{
lex_raw_string (pfile, token, base, cur);
return;
}
if (terminator == '"')
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
*base == 'u' ? CPP_STRING16 : CPP_STRING);
*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
: CPP_STRING);
else if (terminator == '\'')
type = (*base == 'L' ? CPP_WCHAR :
*base == 'U' ? CPP_CHAR32 :
@ -1101,10 +1293,21 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'L':
case 'u':
case 'U':
/* 'L', 'u' or 'U' may introduce wide characters or strings. */
case 'R':
/* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
wide strings or raw strings. */
if (c == 'L' || CPP_OPTION (pfile, uliterals))
{
if (*buffer->cur == '\'' || *buffer->cur == '"')
if ((*buffer->cur == '\'' && c != 'R')
|| *buffer->cur == '"'
|| (*buffer->cur == 'R'
&& c != 'R'
&& buffer->cur[1] == '"'
&& CPP_OPTION (pfile, uliterals))
|| (*buffer->cur == '8'
&& c == 'u'
&& (buffer->cur[1] == '"'
|| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
{
lex_string (pfile, result, buffer->cur - 1);
break;
@ -1120,7 +1323,7 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'M': case 'N': case 'O': case 'P': case 'Q':
case 'S': case 'T': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;

View File

@ -379,7 +379,8 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR
|| token->type == CPP_WSTRING || token->type == CPP_WCHAR
|| token->type == CPP_STRING32 || token->type == CPP_CHAR32
|| token->type == CPP_STRING16 || token->type == CPP_CHAR16);
|| token->type == CPP_STRING16 || token->type == CPP_CHAR16
|| token->type == CPP_UTF8STRING);
/* Room for each char being written in octal, initial space and
final quote and NUL. */