gcc/gcc/cppucnid.pl
Zack Weinberg e6cc3a24c2 cpplib.h (CPP_AT_NAME, [...]): New token types.
* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types.
	(struct cpp_options): Add narrow_charset, wide_charset,
	bytes_big_endian fields.  Remove EBCDIC field.
	(cpp_init_iconv, cpp_interpret_string): New external interfaces.

	* cpphash.h: Include <iconv.h> if we have it, otherwise
	provide a dummy definition of iconv_t.
	(struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields.
	(_cpp_valid_ucn): Update prototype.
	(_cpp_destroy_iconv): New prototype.

	* doc/cpp.texi: Document character set handling.
	* doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=.
	* doc/extend.texi: Delete entire section on multiline strings.
	Rewrite section on __FUNCTION__ etc now that these are
	variables in C.

	* cppucnid.tab, cppucnid.pl: New files.
	* cppucnid.h: New generated file.
	* cppcharset.c: Include cppucnid.h.  Lots of commentary added.
	(iconv_open, iconv, iconv_close): Provide dummy definitions
	if !HAVE_ICONV.
	(SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv,
	_cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn,
	emit_numeric_escape, convert_hex, convert_oct, convert_escape,
	cpp_interpret_string, narrow_str_to_charconst,
	wide_str_to_charconst): New.
	(ucn_valid_in_identifier): Use a binary search through the
	ucnranges table defined in cppucnid.h, not a long chain of if
	statements.
	(_cpp_valid_ucn): Add a limit pointer.  Downgrade "universal
	character names are only valid in C++ and C99" to a warning.
	Issue the "meaning of \[uU] is different in traditional C"
	warning here.  Take care not to let iconv see an invalid UCS
	value if we get a malformed UCN.  Issue an error if we don't
	have iconv.
	(cpp_interpret_charconst): Moved here from cpplex.c.  Use
	cpp_interpret_string to do the heavy lifting.

	* cppinit.c (cpp_create_reader): Initialize bytes_big_endian,
	narrow_charset, wide_charset fields of options structure.
	(cpp_destroy): Call _cpp_destroy_iconv.
	* cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn.
	(maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete.
	(cpp_interpret_charconst): Moved to cppcharset.c.
	* cpplib.c (dequote_string): Delete.
	(interpret_string_notranslate): New.
	(do_line, do_linemarker): Use interpret_string_notranslate.

	* Makefile.in (cppcharset.o): Depend on cppucnid.h.

	* c-common.c (fname_string, combine_strings): Delete.
	* c-common.h (fname_string, combine_strings): Delete prototypes.
	* c-lex.c (ignore_escape_flag): Delete.
	(cb_ident): Use cpp_interpret_string, not lex_string.
	(get_nonpadding_token): New function.
	(c_lex): Handle Objective-C @-prefixed identifiers and strings here.
	Adjust calls to lex_string.  Don't write *value twice.
	(lex_string): Now handles string constant concatenation.
	Most of the work handed off to cpp_interpret_string.
	Call fix_string_type here.
	* c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with
	FUNC_NAME, throughout.
	(OBJC_STRING): New token type.
	(primary:STRING): No need to call fix_string_type here.
	(primary:objc_string): Make that OBJC_STRING.
	(objc_string nonterminal): Delete.
	(yylexname): Delete code to handle fake string constants.
	(yylexstring): Delete entirely.
	(_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING.  No need
	to handle CPP_ATSIGN.

	* c.opt (-fexec-charset=, -fwide-exec-charset=): New options.
	* c-opts.c (missing_arg, c_common_handle_option): Handle
	OPT_fexec_charset_ and OPT_fwide_exec_charset_.
	(c_common_init): Set cpp_opts->bytes_big_endian, not
	cpp_opts->EBCDIC.  Call cpp_init_iconv.
	(print_help): Document -fexec-charset= and -fexec-wide-charset=.
	(TARGET_EBCDIC): Delete default definition.

	* objc/objc-act.c (build_objc_string_object): No need to
	handle string constant concatenation.

cp:
	* parser.c (cp_lexer_read_token): No need to handle string
	constant concatenation.

testsuite:
	* gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c
	everywhere.
	* gcc.dg/concat.c: Concatenation of string constants with
	__FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error.
	* gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp.
	* gcc.dg/cpp/escape-2.c: Use wide character constants where
	necessary to avoid multi-character character constant warning.
	* gcc.dg/cpp/escape.c: Likewise.
	* gcc.dg/cpp/ucs.c: Likewise.
	Remove backslashes from dg-bogus comments, as they confuse Tcl.
	Fix a typo.

libstdc++-v3:
	* testsuite/22_locale/collate/compare/wchar_t/2.cc
	* testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
	* testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
	* testsuite/22_locale/collate/hash/wchar_t/2.cc
	* testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
	* testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
	* testsuite/22_locale/collate/transform/wchar_t/2.cc
	* testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
	* testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
	XFAIL on all targets.

From-SVN: r68952
2003-07-05 00:24:00 +00:00

131 lines
3.0 KiB
Perl

#! /usr/bin/perl -w
use strict;
# Convert cppucnid.tab to cppucnid.h. We use two arrays of length
# 65536 to represent the table, since this is nice and simple. The
# first array holds the tags indicating which ranges are valid in
# which contexts. The second array holds the language name associated
# with each element.
our(@tags, @names);
@tags = ("") x 65536;
@names = ("") x 65536;
# Array mapping tag numbers to standard #defines
our @stds;
# Current standard and language
our($curstd, $curlang);
# First block of the file is a template to be saved for later.
our @template;
while (<>) {
chomp;
last if $_ eq '%%';
push @template, $_;
};
# Second block of the file is the UCN tables.
# The format looks like this:
#
# [std]
#
# ; language
# xxxx-xxxx xxxx xxxx-xxxx ....
#
# with comment lines starting with #.
while (<>) {
chomp;
/^#/ and next;
/^\s*$/ and next;
/^\[(.+)\]$/ and do {
$curstd = $1;
next;
};
/^; (.+)$/ and do {
$curlang = $1;
next;
};
process_range(split);
}
# Print out the template, inserting as requested.
$\ = "\n";
for (@template) {
print("/* Automatically generated from cppucnid.tab, do not edit */"),
next if $_ eq "[dne]";
print_table(), next if $_ eq "[table]";
print;
}
sub print_table {
my($lo, $hi);
my $prevname = "";
for ($lo = 0; $lo <= $#tags; $lo = $hi) {
$hi = $lo;
$hi++ while $hi <= $#tags
&& $tags[$hi] eq $tags[$lo]
&& $names[$hi] eq $names[$lo];
# Range from $lo to $hi-1.
# Don't make entries for ranges that are not valid idchars.
next if ($tags[$lo] eq "");
my $tag = $tags[$lo];
$tag = " ".$tag if $tag =~ /^C99/;
if ($names[$lo] eq $prevname) {
printf(" { 0x%04x, 0x%04x, %-11s },\n",
$lo, $hi-1, $tag);
} else {
printf(" { 0x%04x, 0x%04x, %-11s }, /* %s */\n",
$lo, $hi-1, $tag, $names[$lo]);
}
$prevname = $names[$lo];
}
}
# The line is a list of four-digit hexadecimal numbers or
# pairs of such numbers. Each is a valid identifier character
# from the given language, under the given standard.
sub process_range {
for my $range (@_) {
if ($range =~ /^[0-9a-f]{4}$/) {
my $i = hex($range);
if ($tags[$i] eq "") {
$tags[$i] = $curstd;
} else {
$tags[$i] = $curstd . "|" . $tags[$i];
}
if ($names[$i] ne "" && $names[$i] ne $curlang) {
warn sprintf ("language overlap: %s/%s at %x (tag %d)",
$names[$i], $curlang, $i, $tags[$i]);
next;
}
$names[$i] = $curlang;
} elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) {
my ($start, $end) = (hex($1), hex($2));
my $i;
for ($i = $start; $i <= $end; $i++) {
if ($tags[$i] eq "") {
$tags[$i] = $curstd;
} else {
$tags[$i] = $curstd . "|" . $tags[$i];
}
if ($names[$i] ne "" && $names[$i] ne $curlang) {
warn sprintf ("language overlap: %s/%s at %x (tag %d)",
$names[$i], $curlang, $i, $tags[$i]);
next;
}
$names[$i] = $curlang;
}
} else {
warn "malformed range expression $range";
}
}
}