From a3af5087d9b0e120764cb3852da73149be17dfac Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Mon, 4 May 2009 13:23:50 +0100 Subject: [PATCH] intl.c (locale_encoding, [...]): New. * intl.c (locale_encoding, locale_utf8): New. (gcc_init_libintl): Initialize locale_encoding and locale_utf8. * intl.h (locale_encoding, locale_utf8): Declare. * pretty-print.c: Include ggc.h. Include iconv.h if HAVE_ICONV. (pp_base_tree_identifier, decode_utf8_char, identifier_to_locale): New. * pretty-print.h (pp_identifier): Call identifier_to_locale on ID argument. (pp_tree_identifier): Define to call pp_base_tree_identifier. (pp_base_tree_identifier): Declare as function. (identifier_to_locale): Declare. * Makefile.in (pretty-print.o): Update dependencies. * varasm.c (finish_aliases_1): Use %qE for identifiers in diagnostics. testsuite: * gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests. From-SVN: r147096 --- gcc/ChangeLog | 17 ++ gcc/Makefile.in | 2 +- gcc/intl.c | 24 ++- gcc/intl.h | 4 +- gcc/pretty-print.c | 230 ++++++++++++++++++++++++++++ gcc/pretty-print.h | 8 +- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/attr-alias-5.c | 12 ++ gcc/testsuite/gcc.dg/ucnid-7.c | 7 + gcc/varasm.c | 8 +- 10 files changed, 299 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/attr-alias-5.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-7.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8b4433fd5e8..0897c6f637f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2009-05-04 Joseph Myers + + * intl.c (locale_encoding, locale_utf8): New. + (gcc_init_libintl): Initialize locale_encoding and locale_utf8. + * intl.h (locale_encoding, locale_utf8): Declare. + * pretty-print.c: Include ggc.h. Include iconv.h if HAVE_ICONV. + (pp_base_tree_identifier, decode_utf8_char, identifier_to_locale): + New. + * pretty-print.h (pp_identifier): Call identifier_to_locale on ID + argument. + (pp_tree_identifier): Define to call pp_base_tree_identifier. + (pp_base_tree_identifier): Declare as function. + (identifier_to_locale): Declare. + * Makefile.in (pretty-print.o): Update dependencies. + * varasm.c (finish_aliases_1): Use %qE for identifiers in + diagnostics. + 2009-05-04 Richard Guenther PR middle-end/40015 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c578d21266f..0e13e49eeec 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3068,7 +3068,7 @@ params.o : params.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(PARAMS_H) $(TO pointer-set.o: pointer-set.c pointer-set.h $(CONFIG_H) $(SYSTEM_H) hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(HOOKS_H) pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \ - $(TREE_H) + $(TREE_H) $(GGC_H) errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H) dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h errors.h $(DBGCNT_H) \ $(TM_H) $(RTL_H) output.h diff --git a/gcc/intl.c b/gcc/intl.c index 4c8943a43a4..5b486151dcd 100644 --- a/gcc/intl.c +++ b/gcc/intl.c @@ -1,5 +1,5 @@ /* Message translation utilities. - Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008 + Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GCC. @@ -34,6 +34,12 @@ const char *open_quote = "'"; /* Closing quotation mark for diagnostics. */ const char *close_quote = "'"; +/* The name of the locale encoding. */ +const char *locale_encoding = NULL; + +/* Whether the locale is using UTF-8. */ +bool locale_utf8 = false; + #ifdef ENABLE_NLS /* Initialize the translation library for GCC. This performs the @@ -60,20 +66,22 @@ gcc_init_libintl (void) /* Closing quotation mark. */ close_quote = _("'"); +#if defined HAVE_LANGINFO_CODESET + locale_encoding = nl_langinfo (CODESET); + if (locale_encoding != NULL + && (!strcasecmp (locale_encoding, "utf-8") + || !strcasecmp (locale_encoding, "utf8"))) + locale_utf8 = true; +#endif + if (!strcmp (open_quote, "`") && !strcmp (close_quote, "'")) { -#if defined HAVE_LANGINFO_CODESET - const char *encoding; -#endif /* Untranslated quotes that it may be possible to replace with U+2018 and U+2019; but otherwise use "'" instead of "`" as opening quote. */ open_quote = "'"; #if defined HAVE_LANGINFO_CODESET - encoding = nl_langinfo (CODESET); - if (encoding != NULL - && (!strcasecmp (encoding, "utf-8") - || !strcasecmp (encoding, "utf8"))) + if (locale_utf8) { open_quote = "\xe2\x80\x98"; close_quote = "\xe2\x80\x99"; diff --git a/gcc/intl.h b/gcc/intl.h index 3b6395f367d..902e7ae423c 100644 --- a/gcc/intl.h +++ b/gcc/intl.h @@ -1,5 +1,5 @@ /* intl.h - internationalization - Copyright 1998, 2001, 2003, 2004, 2007 Free Software Foundation, Inc. + Copyright 1998, 2001, 2003, 2004, 2007, 2009 Free Software Foundation, Inc. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,5 +56,7 @@ extern size_t gcc_gettext_width (const char *); extern const char *open_quote; extern const char *close_quote; +extern const char *locale_encoding; +extern bool locale_utf8; #endif /* intl.h */ diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c index b611ec98516..d531075c933 100644 --- a/gcc/pretty-print.c +++ b/gcc/pretty-print.c @@ -26,6 +26,11 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "pretty-print.h" #include "tree.h" +#include "ggc.h" + +#if HAVE_ICONV +#include +#endif #define obstack_chunk_alloc xmalloc #define obstack_chunk_free free @@ -844,3 +849,228 @@ pp_base_maybe_space (pretty_printer *pp) pp_base (pp)->padding = pp_none; } } + +/* Print the identifier ID to PRETTY-PRINTER. */ + +void +pp_base_tree_identifier (pretty_printer *pp, tree id) +{ + const char *text = identifier_to_locale (IDENTIFIER_POINTER (id)); + pp_append_text (pp, text, text + strlen (text)); +} + +/* The string starting at P has LEN (at least 1) bytes left; if they + start with a valid UTF-8 sequence, return the length of that + sequence and set *VALUE to the value of that sequence, and + otherwise return 0 and set *VALUE to (unsigned int) -1. */ + +static int +decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value) +{ + unsigned int t = *p; + + if (len == 0) + abort (); + if (t & 0x80) + { + size_t utf8_len = 0; + unsigned int ch; + size_t i; + for (t = *p; t & 0x80; t <<= 1) + utf8_len++; + + if (utf8_len > len || utf8_len < 2 || utf8_len > 6) + { + *value = (unsigned int) -1; + return 0; + } + ch = *p & ((1 << (7 - utf8_len)) - 1); + for (i = 1; i < utf8_len; i++) + { + unsigned int u = p[i]; + if ((u & 0xC0) != 0x80) + { + *value = (unsigned int) -1; + return 0; + } + ch = (ch << 6) | (u & 0x3F); + } + if ( (ch <= 0x7F && utf8_len > 1) + || (ch <= 0x7FF && utf8_len > 2) + || (ch <= 0xFFFF && utf8_len > 3) + || (ch <= 0x1FFFFF && utf8_len > 4) + || (ch <= 0x3FFFFFF && utf8_len > 5) + || (ch >= 0xD800 && ch <= 0xDFFF)) + { + *value = (unsigned int) -1; + return 0; + } + *value = ch; + return utf8_len; + } + else + { + *value = t; + return 1; + } +} + +/* Given IDENT, an identifier in the internal encoding, return a + version of IDENT suitable for diagnostics in the locale character + set: either IDENT itself, or a garbage-collected string converted + to the locale character set and using escape sequences if not + representable in the locale character set or containing control + characters or invalid byte sequences. Existing backslashes in + IDENT are not doubled, so the result may not uniquely specify the + contents of an arbitrary byte sequence identifier. */ + +const char * +identifier_to_locale (const char *ident) +{ + const unsigned char *uid = (const unsigned char *) ident; + size_t idlen = strlen (ident); + bool valid_printable_utf8 = true; + bool all_ascii = true; + size_t i; + + for (i = 0; i < idlen;) + { + unsigned int c; + size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); + if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F)) + { + valid_printable_utf8 = false; + break; + } + if (utf8_len > 1) + all_ascii = false; + i += utf8_len; + } + + /* If IDENT contains invalid UTF-8 sequences (which may occur with + attributes putting arbitrary byte sequences in identifiers), or + control characters, we use octal escape sequences for all bytes + outside printable ASCII. */ + if (!valid_printable_utf8) + { + char *ret = GGC_NEWVEC (char, 4 * idlen + 1); + char *p = ret; + for (i = 0; i < idlen; i++) + { + if (uid[i] > 0x1F && uid[i] < 0x7F) + *p++ = uid[i]; + else + { + sprintf (p, "\\%03o", uid[i]); + p += 4; + } + } + *p = 0; + return ret; + } + + /* Otherwise, if it is valid printable ASCII, or printable UTF-8 + with the locale character set being UTF-8, IDENT is used. */ + if (all_ascii || locale_utf8) + return ident; + + /* Otherwise IDENT is converted to the locale character set if + possible. */ +#if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV + if (locale_encoding != NULL) + { + iconv_t cd = iconv_open (locale_encoding, "UTF-8"); + bool conversion_ok = true; + char *ret = NULL; + if (cd != (iconv_t) -1) + { + size_t ret_alloc = 4 * idlen + 1; + for (;;) + { + /* Repeat the whole conversion process as needed with + larger buffers so non-reversible transformations can + always be detected. */ + ICONV_CONST char *inbuf = CONST_CAST (char *, ident); + char *outbuf; + size_t inbytesleft = idlen; + size_t outbytesleft = ret_alloc - 1; + size_t iconv_ret; + + ret = GGC_NEWVEC (char, ret_alloc); + outbuf = ret; + + if (iconv (cd, 0, 0, 0, 0) == (size_t) -1) + { + conversion_ok = false; + break; + } + + iconv_ret = iconv (cd, &inbuf, &inbytesleft, + &outbuf, &outbytesleft); + if (iconv_ret == (size_t) -1 || inbytesleft != 0) + { + if (errno == E2BIG) + { + ret_alloc *= 2; + ggc_free (ret); + ret = NULL; + continue; + } + else + { + conversion_ok = false; + break; + } + } + else if (iconv_ret != 0) + { + conversion_ok = false; + break; + } + /* Return to initial shift state. */ + if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1) + { + if (errno == E2BIG) + { + ret_alloc *= 2; + ggc_free (ret); + ret = NULL; + continue; + } + else + { + conversion_ok = false; + break; + } + } + *outbuf = 0; + break; + } + iconv_close (cd); + if (conversion_ok) + return ret; + } + } +#endif + + /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */ + { + char *ret = GGC_NEWVEC (char, 10 * idlen + 1); + char *p = ret; + for (i = 0; i < idlen;) + { + unsigned int c; + size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); + if (utf8_len == 1) + *p++ = uid[i]; + else + { + sprintf (p, "\\U%08x", c); + p += 10; + } + i += utf8_len; + } + *p = 0; + return ret; + } +} diff --git a/gcc/pretty-print.h b/gcc/pretty-print.h index dd3f0c0ad39..58292259604 100644 --- a/gcc/pretty-print.h +++ b/gcc/pretty-print.h @@ -273,10 +273,9 @@ struct pretty_print_info pp_scalar (PP, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) I) #define pp_pointer(PP, P) pp_scalar (PP, "%p", P) -#define pp_identifier(PP, ID) pp_string (PP, ID) +#define pp_identifier(PP, ID) pp_string (PP, identifier_to_locale (ID)) #define pp_tree_identifier(PP, T) \ - pp_append_text(PP, IDENTIFIER_POINTER (T), \ - IDENTIFIER_POINTER (T) + IDENTIFIER_LENGTH (T)) + pp_base_tree_identifier (pp_base (PP), T) #define pp_unsupported_tree(PP, T) \ pp_verbatim (pp_base (PP), "#%qs not supported by %s#", \ @@ -322,6 +321,7 @@ extern void pp_base_character (pretty_printer *, int); extern void pp_base_string (pretty_printer *, const char *); extern void pp_write_text_to_stream (pretty_printer *pp); extern void pp_base_maybe_space (pretty_printer *); +extern void pp_base_tree_identifier (pretty_printer *, tree); /* Switch into verbatim mode and return the old mode. */ static inline pp_wrapping_mode_t @@ -334,4 +334,6 @@ pp_set_verbatim_wrapping_ (pretty_printer *pp) } #define pp_set_verbatim_wrapping(PP) pp_set_verbatim_wrapping_ (pp_base (PP)) +extern const char *identifier_to_locale (const char *); + #endif /* GCC_PRETTY_PRINT_H */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8e85e53f509..9fa40cb4807 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2009-05-04 Joseph Myers + + * gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests. + 2009-05-03 Eric Botcazou * gcc.target/sparc/fpmul-2.c: Replace final_cleanup with optimized. diff --git a/gcc/testsuite/gcc.dg/attr-alias-5.c b/gcc/testsuite/gcc.dg/attr-alias-5.c new file mode 100644 index 00000000000..554668d212f --- /dev/null +++ b/gcc/testsuite/gcc.dg/attr-alias-5.c @@ -0,0 +1,12 @@ +/* Verify diagnostics for aliases to strings containing extended + identifiers or bad characters. */ +/* { dg-do compile } */ +/* { dg-options "-std=gnu99" } */ +/* { dg-require-alias "" } */ + +void f0 (void) __attribute__((alias("\xa1"))); /* { dg-error "undefined symbol '\\\\241'" } */ +void f1 (void) __attribute__((alias("\u00e9"))); /* { dg-error "undefined symbol '\\\\U000000e9'" } */ +void f2 (void) __attribute__((alias("\uffff"))); /* { dg-error "undefined symbol '\\\\U0000ffff'" } */ +void f3 (void) __attribute__((alias("\U000fffff"))); /* { dg-error "undefined symbol '\\\\U000fffff'" } */ +void f4 (void) __attribute__((alias("\U00ffffff"))); /* { dg-error "undefined symbol '\\\\U00ffffff'" } */ +void f5 (void) __attribute__((alias("\U0fffffff"))); /* { dg-error "undefined symbol '\\\\U0fffffff'" } */ diff --git a/gcc/testsuite/gcc.dg/ucnid-7.c b/gcc/testsuite/gcc.dg/ucnid-7.c new file mode 100644 index 00000000000..fe53a497ffa --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-7.c @@ -0,0 +1,7 @@ +/* Verify diagnostics for extended identifiers refer to UCNs (in the C + locale). */ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -fextended-identifiers" } */ + +void *p = &\u00e9; /* { dg-error "'\\\\U000000e9' undeclared" } */ +void *q = &\u1e00; /* { dg-error "'\\\\U00001e00' undeclared" } */ diff --git a/gcc/varasm.c b/gcc/varasm.c index 061ff06dd76..0a7b4808b34 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -5344,13 +5344,13 @@ finish_aliases_1 (void) if (target_decl == NULL) { if (! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl))) - error ("%q+D aliased to undefined symbol %qs", - p->decl, IDENTIFIER_POINTER (p->target)); + error ("%q+D aliased to undefined symbol %qE", + p->decl, p->target); } else if (DECL_EXTERNAL (target_decl) && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl))) - error ("%q+D aliased to external symbol %qs", - p->decl, IDENTIFIER_POINTER (p->target)); + error ("%q+D aliased to external symbol %qE", + p->decl, p->target); } }