intl.c (locale_encoding, [...]): New.

* intl.c (locale_encoding, locale_utf8): New.
	(gcc_init_libintl): Initialize locale_encoding and locale_utf8.
	* intl.h (locale_encoding, locale_utf8): Declare.
	* pretty-print.c: Include ggc.h.  Include iconv.h if HAVE_ICONV.
	(pp_base_tree_identifier, decode_utf8_char, identifier_to_locale):
	New.
	* pretty-print.h (pp_identifier): Call identifier_to_locale on ID
	argument.
	(pp_tree_identifier): Define to call pp_base_tree_identifier.
	(pp_base_tree_identifier): Declare as function.
	(identifier_to_locale): Declare.
	* Makefile.in (pretty-print.o): Update dependencies.
	* varasm.c (finish_aliases_1): Use %qE for identifiers in
	diagnostics.

testsuite:
	* gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests.

From-SVN: r147096
This commit is contained in:
Joseph Myers 2009-05-04 13:23:50 +01:00 committed by Joseph Myers
parent ea5cd5f17f
commit a3af5087d9
10 changed files with 299 additions and 17 deletions

View File

@ -1,3 +1,20 @@
2009-05-04 Joseph Myers <joseph@codesourcery.com>
* intl.c (locale_encoding, locale_utf8): New.
(gcc_init_libintl): Initialize locale_encoding and locale_utf8.
* intl.h (locale_encoding, locale_utf8): Declare.
* pretty-print.c: Include ggc.h. Include iconv.h if HAVE_ICONV.
(pp_base_tree_identifier, decode_utf8_char, identifier_to_locale):
New.
* pretty-print.h (pp_identifier): Call identifier_to_locale on ID
argument.
(pp_tree_identifier): Define to call pp_base_tree_identifier.
(pp_base_tree_identifier): Declare as function.
(identifier_to_locale): Declare.
* Makefile.in (pretty-print.o): Update dependencies.
* varasm.c (finish_aliases_1): Use %qE for identifiers in
diagnostics.
2009-05-04 Richard Guenther <rguenther@suse.de>
PR middle-end/40015

View File

@ -3068,7 +3068,7 @@ params.o : params.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(PARAMS_H) $(TO
pointer-set.o: pointer-set.c pointer-set.h $(CONFIG_H) $(SYSTEM_H)
hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(HOOKS_H)
pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \
$(TREE_H)
$(TREE_H) $(GGC_H)
errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H)
dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h errors.h $(DBGCNT_H) \
$(TM_H) $(RTL_H) output.h

View File

@ -1,5 +1,5 @@
/* Message translation utilities.
Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008
Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GCC.
@ -34,6 +34,12 @@ const char *open_quote = "'";
/* Closing quotation mark for diagnostics. */
const char *close_quote = "'";
/* The name of the locale encoding. */
const char *locale_encoding = NULL;
/* Whether the locale is using UTF-8. */
bool locale_utf8 = false;
#ifdef ENABLE_NLS
/* Initialize the translation library for GCC. This performs the
@ -60,20 +66,22 @@ gcc_init_libintl (void)
/* Closing quotation mark. */
close_quote = _("'");
#if defined HAVE_LANGINFO_CODESET
locale_encoding = nl_langinfo (CODESET);
if (locale_encoding != NULL
&& (!strcasecmp (locale_encoding, "utf-8")
|| !strcasecmp (locale_encoding, "utf8")))
locale_utf8 = true;
#endif
if (!strcmp (open_quote, "`") && !strcmp (close_quote, "'"))
{
#if defined HAVE_LANGINFO_CODESET
const char *encoding;
#endif
/* Untranslated quotes that it may be possible to replace with
U+2018 and U+2019; but otherwise use "'" instead of "`" as
opening quote. */
open_quote = "'";
#if defined HAVE_LANGINFO_CODESET
encoding = nl_langinfo (CODESET);
if (encoding != NULL
&& (!strcasecmp (encoding, "utf-8")
|| !strcasecmp (encoding, "utf8")))
if (locale_utf8)
{
open_quote = "\xe2\x80\x98";
close_quote = "\xe2\x80\x99";

View File

@ -1,5 +1,5 @@
/* intl.h - internationalization
Copyright 1998, 2001, 2003, 2004, 2007 Free Software Foundation, Inc.
Copyright 1998, 2001, 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -56,5 +56,7 @@ extern size_t gcc_gettext_width (const char *);
extern const char *open_quote;
extern const char *close_quote;
extern const char *locale_encoding;
extern bool locale_utf8;
#endif /* intl.h */

View File

@ -26,6 +26,11 @@ along with GCC; see the file COPYING3. If not see
#include "intl.h"
#include "pretty-print.h"
#include "tree.h"
#include "ggc.h"
#if HAVE_ICONV
#include <iconv.h>
#endif
#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
@ -844,3 +849,228 @@ pp_base_maybe_space (pretty_printer *pp)
pp_base (pp)->padding = pp_none;
}
}
/* Print the identifier ID to PRETTY-PRINTER. */
void
pp_base_tree_identifier (pretty_printer *pp, tree id)
{
const char *text = identifier_to_locale (IDENTIFIER_POINTER (id));
pp_append_text (pp, text, text + strlen (text));
}
/* The string starting at P has LEN (at least 1) bytes left; if they
start with a valid UTF-8 sequence, return the length of that
sequence and set *VALUE to the value of that sequence, and
otherwise return 0 and set *VALUE to (unsigned int) -1. */
static int
decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
{
unsigned int t = *p;
if (len == 0)
abort ();
if (t & 0x80)
{
size_t utf8_len = 0;
unsigned int ch;
size_t i;
for (t = *p; t & 0x80; t <<= 1)
utf8_len++;
if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
{
*value = (unsigned int) -1;
return 0;
}
ch = *p & ((1 << (7 - utf8_len)) - 1);
for (i = 1; i < utf8_len; i++)
{
unsigned int u = p[i];
if ((u & 0xC0) != 0x80)
{
*value = (unsigned int) -1;
return 0;
}
ch = (ch << 6) | (u & 0x3F);
}
if ( (ch <= 0x7F && utf8_len > 1)
|| (ch <= 0x7FF && utf8_len > 2)
|| (ch <= 0xFFFF && utf8_len > 3)
|| (ch <= 0x1FFFFF && utf8_len > 4)
|| (ch <= 0x3FFFFFF && utf8_len > 5)
|| (ch >= 0xD800 && ch <= 0xDFFF))
{
*value = (unsigned int) -1;
return 0;
}
*value = ch;
return utf8_len;
}
else
{
*value = t;
return 1;
}
}
/* Given IDENT, an identifier in the internal encoding, return a
version of IDENT suitable for diagnostics in the locale character
set: either IDENT itself, or a garbage-collected string converted
to the locale character set and using escape sequences if not
representable in the locale character set or containing control
characters or invalid byte sequences. Existing backslashes in
IDENT are not doubled, so the result may not uniquely specify the
contents of an arbitrary byte sequence identifier. */
const char *
identifier_to_locale (const char *ident)
{
const unsigned char *uid = (const unsigned char *) ident;
size_t idlen = strlen (ident);
bool valid_printable_utf8 = true;
bool all_ascii = true;
size_t i;
for (i = 0; i < idlen;)
{
unsigned int c;
size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
{
valid_printable_utf8 = false;
break;
}
if (utf8_len > 1)
all_ascii = false;
i += utf8_len;
}
/* If IDENT contains invalid UTF-8 sequences (which may occur with
attributes putting arbitrary byte sequences in identifiers), or
control characters, we use octal escape sequences for all bytes
outside printable ASCII. */
if (!valid_printable_utf8)
{
char *ret = GGC_NEWVEC (char, 4 * idlen + 1);
char *p = ret;
for (i = 0; i < idlen; i++)
{
if (uid[i] > 0x1F && uid[i] < 0x7F)
*p++ = uid[i];
else
{
sprintf (p, "\\%03o", uid[i]);
p += 4;
}
}
*p = 0;
return ret;
}
/* Otherwise, if it is valid printable ASCII, or printable UTF-8
with the locale character set being UTF-8, IDENT is used. */
if (all_ascii || locale_utf8)
return ident;
/* Otherwise IDENT is converted to the locale character set if
possible. */
#if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
if (locale_encoding != NULL)
{
iconv_t cd = iconv_open (locale_encoding, "UTF-8");
bool conversion_ok = true;
char *ret = NULL;
if (cd != (iconv_t) -1)
{
size_t ret_alloc = 4 * idlen + 1;
for (;;)
{
/* Repeat the whole conversion process as needed with
larger buffers so non-reversible transformations can
always be detected. */
ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
char *outbuf;
size_t inbytesleft = idlen;
size_t outbytesleft = ret_alloc - 1;
size_t iconv_ret;
ret = GGC_NEWVEC (char, ret_alloc);
outbuf = ret;
if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
{
conversion_ok = false;
break;
}
iconv_ret = iconv (cd, &inbuf, &inbytesleft,
&outbuf, &outbytesleft);
if (iconv_ret == (size_t) -1 || inbytesleft != 0)
{
if (errno == E2BIG)
{
ret_alloc *= 2;
ggc_free (ret);
ret = NULL;
continue;
}
else
{
conversion_ok = false;
break;
}
}
else if (iconv_ret != 0)
{
conversion_ok = false;
break;
}
/* Return to initial shift state. */
if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
{
if (errno == E2BIG)
{
ret_alloc *= 2;
ggc_free (ret);
ret = NULL;
continue;
}
else
{
conversion_ok = false;
break;
}
}
*outbuf = 0;
break;
}
iconv_close (cd);
if (conversion_ok)
return ret;
}
}
#endif
/* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
{
char *ret = GGC_NEWVEC (char, 10 * idlen + 1);
char *p = ret;
for (i = 0; i < idlen;)
{
unsigned int c;
size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
if (utf8_len == 1)
*p++ = uid[i];
else
{
sprintf (p, "\\U%08x", c);
p += 10;
}
i += utf8_len;
}
*p = 0;
return ret;
}
}

View File

@ -273,10 +273,9 @@ struct pretty_print_info
pp_scalar (PP, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) I)
#define pp_pointer(PP, P) pp_scalar (PP, "%p", P)
#define pp_identifier(PP, ID) pp_string (PP, ID)
#define pp_identifier(PP, ID) pp_string (PP, identifier_to_locale (ID))
#define pp_tree_identifier(PP, T) \
pp_append_text(PP, IDENTIFIER_POINTER (T), \
IDENTIFIER_POINTER (T) + IDENTIFIER_LENGTH (T))
pp_base_tree_identifier (pp_base (PP), T)
#define pp_unsupported_tree(PP, T) \
pp_verbatim (pp_base (PP), "#%qs not supported by %s#", \
@ -322,6 +321,7 @@ extern void pp_base_character (pretty_printer *, int);
extern void pp_base_string (pretty_printer *, const char *);
extern void pp_write_text_to_stream (pretty_printer *pp);
extern void pp_base_maybe_space (pretty_printer *);
extern void pp_base_tree_identifier (pretty_printer *, tree);
/* Switch into verbatim mode and return the old mode. */
static inline pp_wrapping_mode_t
@ -334,4 +334,6 @@ pp_set_verbatim_wrapping_ (pretty_printer *pp)
}
#define pp_set_verbatim_wrapping(PP) pp_set_verbatim_wrapping_ (pp_base (PP))
extern const char *identifier_to_locale (const char *);
#endif /* GCC_PRETTY_PRINT_H */

View File

@ -1,3 +1,7 @@
2009-05-04 Joseph Myers <joseph@codesourcery.com>
* gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests.
2009-05-03 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/sparc/fpmul-2.c: Replace final_cleanup with optimized.

View File

@ -0,0 +1,12 @@
/* Verify diagnostics for aliases to strings containing extended
identifiers or bad characters. */
/* { dg-do compile } */
/* { dg-options "-std=gnu99" } */
/* { dg-require-alias "" } */
void f0 (void) __attribute__((alias("\xa1"))); /* { dg-error "undefined symbol '\\\\241'" } */
void f1 (void) __attribute__((alias("\u00e9"))); /* { dg-error "undefined symbol '\\\\U000000e9'" } */
void f2 (void) __attribute__((alias("\uffff"))); /* { dg-error "undefined symbol '\\\\U0000ffff'" } */
void f3 (void) __attribute__((alias("\U000fffff"))); /* { dg-error "undefined symbol '\\\\U000fffff'" } */
void f4 (void) __attribute__((alias("\U00ffffff"))); /* { dg-error "undefined symbol '\\\\U00ffffff'" } */
void f5 (void) __attribute__((alias("\U0fffffff"))); /* { dg-error "undefined symbol '\\\\U0fffffff'" } */

View File

@ -0,0 +1,7 @@
/* Verify diagnostics for extended identifiers refer to UCNs (in the C
locale). */
/* { dg-do compile } */
/* { dg-options "-std=c99 -fextended-identifiers" } */
void *p = &\u00e9; /* { dg-error "'\\\\U000000e9' undeclared" } */
void *q = &\u1e00; /* { dg-error "'\\\\U00001e00' undeclared" } */

View File

@ -5344,13 +5344,13 @@ finish_aliases_1 (void)
if (target_decl == NULL)
{
if (! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)))
error ("%q+D aliased to undefined symbol %qs",
p->decl, IDENTIFIER_POINTER (p->target));
error ("%q+D aliased to undefined symbol %qE",
p->decl, p->target);
}
else if (DECL_EXTERNAL (target_decl)
&& ! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl)))
error ("%q+D aliased to external symbol %qs",
p->decl, IDENTIFIER_POINTER (p->target));
error ("%q+D aliased to external symbol %qE",
p->decl, p->target);
}
}