2004-06-09 22:10:13 +02:00
|
|
|
/* Structures that hang off cpp_identifier, for PCH.
|
2015-01-05 13:33:28 +01:00
|
|
|
Copyright (C) 1986-2015 Free Software Foundation, Inc.
|
2004-06-09 22:10:13 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
|
|
under the terms of the GNU General Public License as published by the
|
2009-04-09 17:00:19 +02:00
|
|
|
Free Software Foundation; either version 3, or (at your option) any
|
2004-06-09 22:10:13 +02:00
|
|
|
later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
2009-04-09 17:00:19 +02:00
|
|
|
along with this program; see the file COPYING3. If not see
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
2004-06-09 22:10:13 +02:00
|
|
|
|
|
|
|
#include "cpplib.h"
|
|
|
|
|
2004-11-27 22:59:38 +01:00
|
|
|
#if !defined (HAVE_UCHAR) && !defined (IN_GCC)
|
2004-06-09 22:10:13 +02:00
|
|
|
typedef unsigned char uchar;
|
|
|
|
#endif
|
2004-11-27 22:59:38 +01:00
|
|
|
|
cpp-id-data.h (UC): Was U, conflicts with U...
libcpp/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com>
* include/cpp-id-data.h (UC): Was U, conflicts with U... literal.
* include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens.
(struct cpp_options): Added uliterals.
(cpp_interpret_string): Update prototype.
(cpp_interpret_string_notranslate): Idem.
* charset.c (init_iconv_desc): New width member in cset_converter.
(cpp_init_iconv): Add support for char{16,32}_cset_desc.
(convert_ucn): Idem.
(emit_numeric_escape): Idem.
(convert_hex): Idem.
(convert_oct): Idem.
(convert_escape): Idem.
(converter_for_type): New function.
(cpp_interpret_string): Use converter_for_type, support u and U prefix.
(cpp_interpret_string_notranslate): Match changed prototype.
(wide_str_to_charconst): Use converter_for_type.
(cpp_interpret_charconst): Add support for CPP_CHAR{16,32}.
* directives.c (linemarker_dir): Macro U changed to UC.
(parse_include): Idem.
(register_pragma_1): Idem.
(restore_registered_pragmas): Idem.
(get__Pragma_string): Support CPP_STRING{16,32}.
* expr.c (eval_token): Support CPP_CHAR{16,32}.
* init.c (struct lang_flags): Added uliterals.
(lang_defaults): Idem.
* internal.h (struct cset_converter) <width>: New field.
(struct cpp_reader) <char16_cset_desc>: Idem.
(struct cpp_reader) <char32_cset_desc>: Idem.
* lex.c (digraph_spellings): Macro U changed to UC.
(OP, TK): Idem.
(lex_string): Add support for u'...', U'...', u... and U....
(_cpp_lex_direct): Idem.
* macro.c (_cpp_builtin_macro_text): Macro U changed to UC.
(stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}.
gcc/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com>
* c-common.c (CHAR16_TYPE, CHAR32_TYPE): New macros.
(fname_as_string): Match updated cpp_interpret_string prototype.
(fix_string_type): Support char16_t* and char32_t*.
(c_common_nodes_and_builtins): Add char16_t and char32_t (and
derivative) nodes. Register as builtin if C++0x.
(c_parse_error): Support CPP_CHAR{16,32}.
* c-common.h (RID_CHAR16, RID_CHAR32): New elements.
(enum c_tree_index) <CTI_CHAR16_TYPE, CTI_SIGNED_CHAR16_TYPE,
CTI_UNSIGNED_CHAR16_TYPE, CTI_CHAR32_TYPE, CTI_SIGNED_CHAR32_TYPE,
CTI_UNSIGNED_CHAR32_TYPE, CTI_CHAR16_ARRAY_TYPE,
CTI_CHAR32_ARRAY_TYPE>: New elements.
(char16_type_node, signed_char16_type_node, unsigned_char16_type_node,
char32_type_node, signed_char32_type_node, char16_array_type_node,
char32_array_type_node): New defines.
* c-lex.c (cb_ident): Match updated cpp_interpret_string prototype.
(c_lex_with_flags): Support CPP_CHAR{16,32} and CPP_STRING{16,32}.
(lex_string): Support CPP_STRING{16,32}, match updated
cpp_interpret_string and cpp_interpret_string_notranslate prototypes.
(lex_charconst): Support CPP_CHAR{16,32}.
* c-parser.c (c_parser_postfix_expression): Support CPP_CHAR{16,32}
and CPP_STRING{16,32}.
gcc/cp/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com>
* cvt.c (type_promotes_to): Support char16_t and char32_t.
* decl.c (grokdeclarator): Disallow signed/unsigned/short/long on
char16_t and char32_t.
* lex.c (reswords): Add char16_t and char32_t (for c++0x).
* mangle.c (write_builtin_type): Mangle char16_t/char32_t as vendor
extended builtin type u8char32_t.
* parser.c (cp_lexer_next_token_is_decl_specifier_keyword): Support
RID_CHAR{16,32}.
(cp_lexer_print_token): Support CPP_STRING{16,32}.
(cp_parser_is_string_literal): Idem.
(cp_parser_string_literal): Idem.
(cp_parser_primary_expression): Support CPP_CHAR{16,32} and
CPP_STRING{16,32}.
(cp_parser_simple_type_specifier): Support RID_CHAR{16,32}.
* tree.c (char_type_p): Support char16_t and char32_t as char types.
* typeck.c (string_conv_p): Support char16_t and char32_t.
gcc/testsuite/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com>
Tests for char16_t and char32_t support.
* g++.dg/ext/utf-cvt.C: New
* g++.dg/ext/utf-cxx0x.C: New
* g++.dg/ext/utf-cxx98.C: New
* g++.dg/ext/utf-dflt.C: New
* g++.dg/ext/utf-gnuxx0x.C: New
* g++.dg/ext/utf-gnuxx98.C: New
* g++.dg/ext/utf-mangle.C: New
* g++.dg/ext/utf-typedef-cxx0x.C: New
* g++.dg/ext/utf-typedef-
* g++.dg/ext/utf-typespec.C: New
* g++.dg/ext/utf16-1.C: New
* g++.dg/ext/utf16-2.C: New
* g++.dg/ext/utf16-3.C: New
* g++.dg/ext/utf16-4.C: New
* g++.dg/ext/utf32-1.C: New
* g++.dg/ext/utf32-2.C: New
* g++.dg/ext/utf32-3.C: New
* g++.dg/ext/utf32-4.C: New
* gcc.dg/utf-cvt.c: New
* gcc.dg/utf-dflt.c: New
* gcc.dg/utf16-1.c: New
* gcc.dg/utf16-2.c: New
* gcc.dg/utf16-3.c: New
* gcc.dg/utf16-4.c: New
* gcc.dg/utf32-1.c: New
* gcc.dg/utf32-2.c: New
* gcc.dg/utf32-3.c: New
* gcc.dg/utf32-4.c: New
libiberty/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com>
* testsuite/demangle-expected: Added tests for char16_t and char32_t.
From-SVN: r134438
2008-04-18 15:58:08 +02:00
|
|
|
#define UC (const unsigned char *) /* Intended use: UC"string" */
|
2004-06-09 22:10:13 +02:00
|
|
|
|
|
|
|
/* Chained list of answers to an assertion. */
|
2009-04-22 20:29:36 +02:00
|
|
|
struct GTY(()) answer {
|
2004-06-09 22:10:13 +02:00
|
|
|
struct answer *next;
|
|
|
|
unsigned int count;
|
|
|
|
cpp_token GTY ((length ("%h.count"))) first[1];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Each macro definition is recorded in a cpp_macro structure.
|
|
|
|
Variadic macros cannot occur with traditional cpp. */
|
2009-04-22 20:29:36 +02:00
|
|
|
struct GTY(()) cpp_macro {
|
Preserve original spellings of extended identifiers.
This patch makes cpplib track the original spellings of extended
identifiers, as well as the canonical UTF-8 version, in order to
follow standard semantics properly without needing a convoluted and
undocumented canonicalization in translation phase 1 (see bug 9449
comments 39-46 regarding such a canonicalization).
The spelling is tracked in cpp_identifier and cpp_macro_arg without
making cpp_token any larger. The original spelling is used for checks
of duplicate macro definitions, stringizing (see the C++ tests added;
this case is only an issue for C++ not C because C makes it
implementation-defined whether a \ is inserted before the \ of a UCN
in a string or character constant when stringizing, while C++ does
not), pasting (relevant when the result is then stringized for C++)
and when macro definitions are output as text (e.g. for -d options).
Once a macro has been defined, only the original spelling of the
argument names needs keeping in the argument list. While it is being
defined, however, both spellings are needed: the original one for
subsequent saving for checks of duplicate macro definitions, and the
canonical one which is the node marked specially to generate macro
argument tokens rather than normal identifier tokens. The buffer that
is used to save the original values of the identifier tokens is
changed so that it stores both those original values and a pointer to
the canonical hash nodes, so that those canonical nodes can be found
when their values need restoring after the macro definition has been
parsed.
I believe this covers the known standards issues in extended
identifiers support (the remaining unimplemented C99 areas in GCC all
being floating-point-related), except for C++ translation of extended
characters to UCNs in phase 1 (which I have no plans to work on).
There are however probably issues left with handling of extended
identifiers in other places, as listed in
<https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00337.html> (those
issues are generally the sort of thing that could be addressed as bugs
outside development stage 1). (The bulk of the potential issues Zack
was concerned about in 2003-5, that resulted in extended identifiers
being disabled in the absence of -fextended-identifiers, were
effectively eliminated by the audit and fixes I did in 2009, however;
that todo list reflects what was left over after that audit.)
Bootstrapped with no regressions on x86_64-unknown-linux-gnu.
libcpp:
* include/cpp-id-data.h (struct cpp_macro): Update comment
regarding parameters.
* include/cpplib.h (struct cpp_macro_arg, struct cpp_identifier):
Add spelling fields.
(struct cpp_token): Update comment on macro_arg.
* internal.h (_cpp_save_parameter): Add extra argument.
(_cpp_spell_ident_ucns): New declaration.
* lex.c (lex_identifier): Add SPELLING argument. Set *SPELLING to
original spelling of identifier.
(_cpp_lex_direct): Update calls to lex_identifier.
(_cpp_spell_ident_ucns): New function, factored out of
cpp_spell_token.
(cpp_spell_token): Adjust FORSTRING argument semantics to return
original spelling of identifiers. Use _cpp_spell_ident_ucns in
!FORSTRING case.
(_cpp_equiv_tokens): Check spellings of identifiers and macro
arguments are identical.
* macro.c (macro_arg_saved_data): New structure.
(paste_tokens): Use original spellings of identifiers from
cpp_spell_token.
(_cpp_save_parameter): Add argument SPELLING. Save both canonical
node and its value.
(parse_params): Update calls to _cpp_save_parameter.
(lex_expansion_token): Save spelling of macro argument tokens.
(_cpp_create_definition): Extract canonical node from saved data.
(cpp_macro_definition): Use UCNs in spelling of macro name. Use
original spellings of macro argument tokens and identifiers.
* traditional.c (scan_parameters): Update call to
_cpp_save_parameter.
gcc:
* doc/invoke.texi (-std=c99, -std=c11): Don't refer to corner
cases of extended identifiers.
gcc/testsuite:
* g++.dg/cpp/ucnid-2.C, g++.dg/cpp/ucnid-3.C,
gcc.dg/cpp/ucnid-11.c, gcc.dg/cpp/ucnid-12.c,
gcc.dg/cpp/ucnid-13.c, gcc.dg/cpp/ucnid-14.c,
gcc.dg/cpp/ucnid-15.c: New tests.
From-SVN: r217202
2014-11-06 22:08:52 +01:00
|
|
|
/* Parameters, if any. If parameter names use extended identifiers,
|
|
|
|
the original spelling of those identifiers, not the canonical
|
|
|
|
UTF-8 spelling, goes here. */
|
2004-06-09 22:10:13 +02:00
|
|
|
cpp_hashnode ** GTY ((nested_ptr (union tree_node,
|
|
|
|
"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
|
|
|
|
"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"),
|
|
|
|
length ("%h.paramc")))
|
|
|
|
params;
|
|
|
|
|
|
|
|
/* Replacement tokens (ISO) or replacement text (traditional). See
|
|
|
|
comment at top of cpptrad.c for how traditional function-like
|
|
|
|
macros are encoded. */
|
|
|
|
union cpp_macro_u
|
|
|
|
{
|
|
|
|
cpp_token * GTY ((tag ("0"), length ("%0.count"))) tokens;
|
2004-11-27 22:59:38 +01:00
|
|
|
const unsigned char * GTY ((tag ("1"))) text;
|
2004-06-09 22:10:13 +02:00
|
|
|
} GTY ((desc ("%1.traditional"))) exp;
|
|
|
|
|
|
|
|
/* Definition line number. */
|
|
|
|
source_location line;
|
|
|
|
|
|
|
|
/* Number of tokens in expansion, or bytes for traditional macros. */
|
|
|
|
unsigned int count;
|
|
|
|
|
|
|
|
/* Number of parameters. */
|
|
|
|
unsigned short paramc;
|
|
|
|
|
|
|
|
/* If a function-like macro. */
|
|
|
|
unsigned int fun_like : 1;
|
|
|
|
|
|
|
|
/* If a variadic macro. */
|
|
|
|
unsigned int variadic : 1;
|
|
|
|
|
|
|
|
/* If macro defined in system header. */
|
|
|
|
unsigned int syshdr : 1;
|
|
|
|
|
|
|
|
/* Nonzero if it has been expanded or had its existence tested. */
|
|
|
|
unsigned int used : 1;
|
|
|
|
|
|
|
|
/* Indicate which field of 'exp' is in use. */
|
|
|
|
unsigned int traditional : 1;
|
2009-04-19 19:10:56 +02:00
|
|
|
|
|
|
|
/* Indicate whether the tokens include extra CPP_PASTE tokens at the
|
|
|
|
end to track invalid redefinitions with consecutive CPP_PASTE
|
|
|
|
tokens. */
|
|
|
|
unsigned int extra_tokens : 1;
|
2004-06-09 22:10:13 +02:00
|
|
|
};
|