From 2c03d73667df53165834e9bcb5d09243db414ec3 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Nov 2019 21:24:38 +0100 Subject: [PATCH] PR c++/91370 - Implement P1041R4 and P1139R2 - Stronger Unicode reqs PR c++/91370 - Implement P1041R4 and P1139R2 - Stronger Unicode reqs * charset.c (narrow_str_to_charconst): Add TYPE argument. For CPP_UTF8CHAR diagnose whenever number of chars is > 1, using CPP_DL_ERROR instead of CPP_DL_WARNING. (wide_str_to_charconst): For CPP_CHAR16 or CPP_CHAR32, use CPP_DL_ERROR instead of CPP_DL_WARNING when multiple char16_t or char32_t chars are needed. (cpp_interpret_charconst): Adjust narrow_str_to_charconst caller. * g++.dg/cpp1z/utf8-neg.C: Expect errors rather than -Wmultichar warnings. * g++.dg/ext/utf16-4.C: Expect errors rather than warnings. * g++.dg/ext/utf32-4.C: Likewise. * g++.dg/cpp2a/ucn2.C: New test. From-SVN: r277929 --- gcc/testsuite/ChangeLog | 9 ++++++++ gcc/testsuite/g++.dg/cpp1z/utf8-neg.C | 6 +++--- gcc/testsuite/g++.dg/cpp2a/ucn2.C | 30 +++++++++++++++++++++++++++ gcc/testsuite/g++.dg/ext/utf16-4.C | 4 ++-- gcc/testsuite/g++.dg/ext/utf32-4.C | 4 ++-- libcpp/ChangeLog | 11 ++++++++++ libcpp/charset.c | 16 +++++++++----- 7 files changed, 68 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/ucn2.C diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b4d865c5389..5b3f9d5c8b0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2019-11-07 Jakub Jelinek + + PR c++/91370 - Implement P1041R4 and P1139R2 - Stronger Unicode reqs + * g++.dg/cpp1z/utf8-neg.C: Expect errors rather than -Wmultichar + warnings. + * g++.dg/ext/utf16-4.C: Expect errors rather than warnings. + * g++.dg/ext/utf32-4.C: Likewise. + * g++.dg/cpp2a/ucn2.C: New test. + 2019-11-07 Peter Bergner PR other/92090 diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C index 0a94f1ac650..7bca1248955 100644 --- a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C +++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C @@ -1,6 +1,6 @@ /* { dg-do compile { target c++17 } } */ const static char c0 = u8''; // { dg-error "empty character" } -const static char c1 = u8'ab'; // { dg-warning "multi-character character constant" } -const static char c2 = u8'\u0124'; // { dg-warning "multi-character character constant" } -const static char c3 = u8'\U00064321'; // { dg-warning "multi-character character constant" } +const static char c1 = u8'ab'; // { dg-error "character constant too long for its type" } +const static char c2 = u8'\u0124'; // { dg-error "character constant too long for its type" } +const static char c3 = u8'\U00064321'; // { dg-error "character constant too long for its type" } diff --git a/gcc/testsuite/g++.dg/cpp2a/ucn2.C b/gcc/testsuite/g++.dg/cpp2a/ucn2.C new file mode 100644 index 00000000000..2e6ce11d2e7 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/ucn2.C @@ -0,0 +1,30 @@ +// P1139R2 +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fchar8_t" { target c++17_down } } + +const char16_t *a = u"\U0001F914\u2753"; +const char32_t *b = U"\U0001F914\u2753"; +const char16_t *c = u"\uD802"; // { dg-error "is not a valid universal character" } +const char16_t *d = u"\U0000DFF0"; // { dg-error "is not a valid universal character" } +const char16_t *e = u"\U00110000"; // { dg-error "is outside the UCS codespace" "" { target c++2a } } + // { dg-error "converting UCN to execution character set" "" { target *-*-* } .-1 } +const char32_t *f = U"\uD802"; // { dg-error "is not a valid universal character" } +const char32_t *g = U"\U0000DFF0"; // { dg-error "is not a valid universal character" } +const char32_t *h = U"\U00110001"; // { dg-error "is outside the UCS codespace" "" { target c++2a } } +#if __cpp_unicode_characters >= 201411 +const char8_t i = u8'\u00C0'; // { dg-error "character constant too long for its type" "" { target c++17 } } +#endif +const char16_t j = u'\U0001F914'; // { dg-error "character constant too long for its type" } +const char32_t k = U'\U0001F914'; +#if __cpp_unicode_characters >= 201411 +const char8_t l = u8'ab'; // { dg-error "character constant too long for its type" "" { target c++17 } } +#endif +const char16_t m = u'ab'; // { dg-error "character constant too long for its type" } +const char32_t n = U'ab'; // { dg-error "character constant too long for its type" } +#if __cpp_unicode_characters >= 201411 +const char8_t o = u8'\U00110002'; // { dg-error "is outside the UCS codespace" "" { target c++2a } } + // { dg-error "character constant too long for its type" "" { target c++17 } .-1 } +#endif +const char16_t p = u'\U00110003'; // { dg-error "is outside the UCS codespace" "" { target c++2a } } + // { dg-error "converting UCN to execution character set" "" { target *-*-* } .-1 } +const char32_t q = U'\U00110004'; // { dg-error "is outside the UCS codespace" "" { target c++2a } } diff --git a/gcc/testsuite/g++.dg/ext/utf16-4.C b/gcc/testsuite/g++.dg/ext/utf16-4.C index e32c5edb573..030e085a82e 100644 --- a/gcc/testsuite/g++.dg/ext/utf16-4.C +++ b/gcc/testsuite/g++.dg/ext/utf16-4.C @@ -4,8 +4,8 @@ const static char16_t c0 = u''; /* { dg-error "empty character" } */ -const static char16_t c1 = u'ab'; /* { dg-warning "constant too long" } */ -const static char16_t c2 = u'\U00064321'; /* { dg-warning "constant too long" } */ +const static char16_t c1 = u'ab'; /* { dg-error "constant too long" } */ +const static char16_t c2 = u'\U00064321'; /* { dg-error "constant too long" } */ const static char16_t c3 = 'a'; const static char16_t c4 = U'a'; diff --git a/gcc/testsuite/g++.dg/ext/utf32-4.C b/gcc/testsuite/g++.dg/ext/utf32-4.C index 0b270304a04..96bf0bb2b4a 100644 --- a/gcc/testsuite/g++.dg/ext/utf32-4.C +++ b/gcc/testsuite/g++.dg/ext/utf32-4.C @@ -3,13 +3,13 @@ /* { dg-do compile { target c++11 } } */ const static char32_t c0 = U''; /* { dg-error "empty character" } */ -const static char32_t c1 = U'ab'; /* { dg-warning "constant too long" } */ +const static char32_t c1 = U'ab'; /* { dg-error "constant too long" } */ const static char32_t c2 = U'\U00064321'; const static char32_t c3 = 'a'; const static char32_t c4 = u'a'; const static char32_t c5 = u'\u2029'; -const static char32_t c6 = u'\U00064321'; /* { dg-warning "constant too long" } */ +const static char32_t c6 = u'\U00064321'; /* { dg-error "constant too long" } */ const static char32_t c7 = L'a'; const static char32_t c8 = L'\u2029'; const static char32_t c9 = L'\U00064321'; /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */ diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 8be84386d6c..b57f26584a1 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,14 @@ +2019-11-07 Jakub Jelinek + + PR c++/91370 - Implement P1041R4 and P1139R2 - Stronger Unicode reqs + * charset.c (narrow_str_to_charconst): Add TYPE argument. For + CPP_UTF8CHAR diagnose whenever number of chars is > 1, using + CPP_DL_ERROR instead of CPP_DL_WARNING. + (wide_str_to_charconst): For CPP_CHAR16 or CPP_CHAR32, use + CPP_DL_ERROR instead of CPP_DL_WARNING when multiple char16_t + or char32_t chars are needed. + (cpp_interpret_charconst): Adjust narrow_str_to_charconst caller. + 2019-11-05 Tim van Deurzen * cpplib.h: Add spaceship operator for C++. diff --git a/libcpp/charset.c b/libcpp/charset.c index 39af77a554a..0b8815af46b 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1881,10 +1881,11 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, /* Subroutine of cpp_interpret_charconst which performs the conversion to a number, for narrow strings. STR is the string structure returned by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for - cpp_interpret_charconst. */ + cpp_interpret_charconst. TYPE is the token type. */ static cppchar_t narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, - unsigned int *pchars_seen, int *unsignedp) + unsigned int *pchars_seen, int *unsignedp, + enum cpp_ttype type) { size_t width = CPP_OPTION (pfile, char_precision); size_t max_chars = CPP_OPTION (pfile, int_precision) / width; @@ -1913,10 +1914,12 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, result = c; } + if (type == CPP_UTF8CHAR) + max_chars = 1; if (i > max_chars) { i = max_chars; - cpp_error (pfile, CPP_DL_WARNING, + cpp_error (pfile, type == CPP_UTF8CHAR ? CPP_DL_ERROR : CPP_DL_WARNING, "character constant too long for its type"); } else if (i > 1 && CPP_OPTION (pfile, warn_multichar)) @@ -1980,7 +1983,9 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, character exactly fills a wchar_t, so a multi-character wide character constant is guaranteed to overflow. */ if (str.len > nbwc * 2) - cpp_error (pfile, CPP_DL_WARNING, + cpp_error (pfile, (CPP_OPTION (pfile, cplusplus) + && (type == CPP_CHAR16 || type == CPP_CHAR32)) + ? CPP_DL_ERROR : CPP_DL_WARNING, "character constant too long for its type"); /* Truncate the constant to its natural width, and simultaneously @@ -2038,7 +2043,8 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp, token->type); else - result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp); + result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp, + token->type); if (str.text != token->val.str.text) free ((void *)str.text);