c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension.

* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
	and sign-extension.
	(lex_charconst): Update for change in prototype of
	cpp_interpret_charconst.  Extend from cppchar_t to HOST_WIDE_INT
	appropriately.
	* cpphash.h (BITS_PER_CPPCHAR_T): New.
	* cppinit.c (cpp_create_reader): Initialize them for no
	change in semantics.
	(cpp_post_options): Add sanity checks.
	* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
	and truncation issues.  Calculate in type cppchar_t.
	(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
	(cpp_interpret_charconst): Calculate in type cppchar_t.  Handle
	run-time dependent precision correctly.  Return whether the
	result is signed or not.
	* cpplib.c (dequote_string): Use cppchar_t; update.
	* cpplib.h (cppchar_signed_t): New.
	struct cpp_options): New precision members.
	(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.

From-SVN: r53152
This commit is contained in:
Neil Booth 2002-05-04 07:30:32 +00:00 committed by Neil Booth
parent ac5ec76848
commit 4268e8bb94
8 changed files with 149 additions and 104 deletions

View File

@ -1,3 +1,25 @@
2002-05-04 Neil Booth <neil@daikokuya.demon.co.uk>
* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
and sign-extension.
(lex_charconst): Update for change in prototype of
cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT
appropriately.
* cpphash.h (BITS_PER_CPPCHAR_T): New.
* cppinit.c (cpp_create_reader): Initialize them for no
change in semantics.
(cpp_post_options): Add sanity checks.
* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
and truncation issues. Calculate in type cppchar_t.
(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
(cpp_interpret_charconst): Calculate in type cppchar_t. Handle
run-time dependent precision correctly. Return whether the
result is signed or not.
* cpplib.c (dequote_string): Use cppchar_t; update.
* cpplib.h (cppchar_signed_t): New.
struct cpp_options): New precision members.
(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.
2002-05-03 David S. Miller <davem@redhat.com>
* config/sparc/sparc-protos.h (sparc_rtx_costs): New.

View File

@ -1238,9 +1238,7 @@ lex_string (str, len, wide)
char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
char *q = buf;
const unsigned char *p = str, *limit = str + len;
unsigned int c;
unsigned width = wide ? WCHAR_TYPE_SIZE
: TYPE_PRECISION (char_type_node);
cppchar_t c;
#ifdef MULTIBYTE_CHARS
/* Reset multibyte conversion state. */
@ -1270,15 +1268,7 @@ lex_string (str, len, wide)
#endif
if (c == '\\' && !ignore_escape_flag)
{
unsigned int mask;
if (width < HOST_BITS_PER_INT)
mask = ((unsigned int) 1 << width) - 1;
else
mask = ~0;
c = cpp_parse_escape (parse_in, &p, limit, mask);
}
c = cpp_parse_escape (parse_in, &p, limit, wide);
/* Add this single character into the buffer either as a wchar_t,
a multibyte sequence, or as a single byte. */
@ -1345,45 +1335,31 @@ static tree
lex_charconst (token)
const cpp_token *token;
{
HOST_WIDE_INT result;
cppchar_t result;
tree type, value;
unsigned int chars_seen;
int unsignedp;
result = cpp_interpret_charconst (parse_in, token, warn_multichar,
&chars_seen);
if (token->type == CPP_WCHAR)
{
value = build_int_2 (result, 0);
type = wchar_type_node;
}
&chars_seen, &unsignedp);
/* Cast to cppchar_signed_t to get correct sign-extension of RESULT
before possibly widening to HOST_WIDE_INT for build_int_2. */
if (unsignedp || (cppchar_signed_t) result >= 0)
value = build_int_2 (result, 0);
else
{
if (result < 0)
value = build_int_2 (result, -1);
else
value = build_int_2 (result, 0);
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
if (c_language == clk_cplusplus && chars_seen <= 1)
type = char_type_node;
else
type = integer_type_node;
}
value = build_int_2 ((cppchar_signed_t) result, -1);
/* cpp_interpret_charconst issues a warning if the constant
overflows, but if the number fits in HOST_WIDE_INT anyway, it
will return it un-truncated, which may cause problems down the
line. So set the type to widest_integer_literal_type, call
convert to truncate it to the proper type, then clear
TREE_OVERFLOW so we don't get a second warning.
FIXME: cpplib's assessment of overflow may not be accurate on a
platform where the final type can change at (compiler's) runtime. */
TREE_TYPE (value) = widest_integer_literal_type_node;
value = convert (type, value);
TREE_OVERFLOW (value) = 0;
if (token->type == CPP_WCHAR)
type = wchar_type_node;
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
else if ((c_language == clk_c || c_language == clk_objective_c)
|| chars_seen > 1)
type = integer_type_node;
else
type = char_type_node;
TREE_TYPE (value) = type;
return value;
}

View File

@ -283,10 +283,10 @@ eval_token (pfile, token)
const cpp_token *token;
{
unsigned int temp;
int unsignedp = 0;
struct op op;
op.op = CPP_NUMBER;
op.unsignedp = 0;
switch (token->type)
{
@ -294,9 +294,8 @@ eval_token (pfile, token)
return parse_number (pfile, token);
case CPP_WCHAR:
op.unsignedp = WCHAR_UNSIGNED;
case CPP_CHAR: /* Always unsigned. */
op.value = cpp_interpret_charconst (pfile, token, 1, &temp);
case CPP_CHAR:
op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp);
break;
case CPP_NAME:
@ -331,6 +330,7 @@ eval_token (pfile, token)
op.value = temp;
}
op.unsignedp = unsignedp;
return op;
}

View File

@ -29,6 +29,8 @@ struct directive; /* Deliberately incomplete. */
struct pending_option;
struct op;
#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
/* Test if a sign is valid within a preprocessing number. */
#define VALID_SIGN(c, prevc) \
(((c) == '+' || (c) == '-') && \

View File

@ -502,6 +502,18 @@ cpp_create_reader (lang)
CPP_OPTION (pfile, pending) =
(struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
/* CPP arithmetic done to existing rules for now. */
#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
#ifndef MAX_CHAR_TYPE_SIZE
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
#endif
CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
#ifndef MAX_WCHAR_TYPE_SIZE
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
#endif
CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
/* It's simplest to just create this struct whether or not it will
be needed. */
pfile->deps = deps_init ();
@ -1796,6 +1808,27 @@ cpp_post_options (pfile)
fputc ('\n', stderr);
}
#if ENABLE_CHECKING
/* Sanity checks for CPP arithmetic. */
if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
cpp_error (pfile, DL_FATAL,
"preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T
|| CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
cpp_error (pfile, DL_FATAL,
"CPP cannot handle (wide) character constants over %u bits",
BITS_PER_CPPCHAR_T);
{
cppchar_t test = 0;
test--;
if (test < 1)
cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
}
#endif
/* Canonicalize in_fname and out_fname. We guarantee they are not
NULL, and that the empty string represents stdin / stdout. */
if (CPP_OPTION (pfile, in_fname) == NULL

View File

@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc)
return 0;
}
/* Interpret an escape sequence, and return its value. PSTR points to
the input pointer, which is just after the backslash. LIMIT is how
much text we have. MASK is a bitmask for the precision for the
destination type (char or wchar_t).
Handles all relevant diagnostics. */
unsigned int
cpp_parse_escape (pfile, pstr, limit, mask)
/* Returns the value of an escape sequence, truncated to the correct
target precision. PSTR points to the input pointer, which is just
after the backslash. LIMIT is how much text we have. WIDE is true
if the escape sequence is part of a wide character constant or
string literal. Handles all relevant diagnostics. */
cppchar_t
cpp_parse_escape (pfile, pstr, limit, wide)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
unsigned HOST_WIDE_INT mask;
int wide;
{
int unknown = 0;
const unsigned char *str = *pstr;
unsigned int c = *str++;
cppchar_t c, mask;
unsigned int width;
if (wide)
width = CPP_OPTION (pfile, wchar_precision);
else
width = CPP_OPTION (pfile, char_precision);
if (width < BITS_PER_CPPCHAR_T)
mask = ((cppchar_t) 1 << width) - 1;
else
mask = ~0;
c = *str++;
switch (c)
{
case '\\': case '\'': case '"': case '?': break;
@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask)
"the meaning of '\\x' is different in traditional C");
{
unsigned int i = 0, overflow = 0;
cppchar_t i = 0, overflow = 0;
int digits_found = 0;
while (str < limit)
@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask)
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
unsigned int i = c - '0';
int count = 0;
size_t count = 0;
cppchar_t i = c - '0';
while (str < limit && ++count < 3)
{
@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask)
}
if (c > mask)
cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
{
cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
c &= mask;
}
*pstr = str;
return c;
}
#ifndef MAX_CHAR_TYPE_SIZE
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
#endif
#ifndef MAX_WCHAR_TYPE_SIZE
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
#endif
/* Interpret a (possibly wide) character constant in TOKEN.
WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
to a variable that is filled in with the number of characters seen. */
HOST_WIDE_INT
cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
points to a variable that is filled in with the number of
characters seen, and UNSIGNEDP to a variable that indicates whether
the result has signed type. */
cppchar_t
cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
cpp_reader *pfile;
const cpp_token *token;
int warn_multi;
unsigned int *pchars_seen;
int *unsignedp;
{
const unsigned char *str = token->val.str.text;
const unsigned char *limit = str + token->val.str.len;
unsigned int chars_seen = 0;
unsigned int width, max_chars, c;
unsigned HOST_WIDE_INT mask;
HOST_WIDE_INT result = 0;
unsigned int width, max_chars;
cppchar_t c, mask, result = 0;
bool unsigned_p;
#ifdef MULTIBYTE_CHARS
@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Width in bits. */
if (token->type == CPP_CHAR)
{
width = MAX_CHAR_TYPE_SIZE;
width = CPP_OPTION (pfile, char_precision);
unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
}
else
{
width = MAX_WCHAR_TYPE_SIZE;
width = CPP_OPTION (pfile, wchar_precision);
unsigned_p = WCHAR_UNSIGNED;
}
if (width < HOST_BITS_PER_WIDE_INT)
mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
if (width < BITS_PER_CPPCHAR_T)
mask = ((cppchar_t) 1 << width) - 1;
else
mask = ~0;
max_chars = HOST_BITS_PER_WIDE_INT / width;
max_chars = BITS_PER_CPPCHAR_T / width;
while (str < limit)
{
@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
#endif
if (c == '\\')
c = cpp_parse_escape (pfile, &str, limit, mask);
c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Merge character into result; ignore excess chars. */
if (++chars_seen <= max_chars)
{
if (width < HOST_BITS_PER_WIDE_INT)
if (width < BITS_PER_CPPCHAR_T)
result = (result << width) | (c & mask);
else
result = c;
@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
{
unsigned int nbits = chars_seen * width;
mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits);
if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
result &= mask;
else
@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
}
*pchars_seen = chars_seen;
*unsignedp = unsigned_p;
return result;
}

View File

@ -726,23 +726,15 @@ dequote_string (pfile, str, len)
uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
uchar *dst = result;
const uchar *limit = str + len;
unsigned int c;
unsigned HOST_WIDE_INT mask;
cppchar_t c;
/* We need the mask to match the host's 'unsigned char', not the
target's. */
if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
else
mask = ~(unsigned HOST_WIDE_INT)0;
while (str < limit)
{
c = *str++;
if (c != '\\')
*dst++ = c;
else
*dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask);
*dst++ = cpp_parse_escape (pfile, &str, limit, 0);
}
*dst++ = '\0';
return result;

View File

@ -190,9 +190,12 @@ struct cpp_token
} val;
};
/* A standalone character. It is unsigned for the same reason we use
unsigned char - to avoid signedness issues. */
/* A type wide enough to hold any multibyte source character.
cpplib's character constant interpreter uses shifts, and so
requires an unsigned type. */
typedef unsigned int cppchar_t;
/* Its signed equivalent. */
typedef int cppchar_signed_t;
/* Values for opts.dump_macros.
dump_only means inhibit output of the preprocessed text
@ -237,6 +240,10 @@ struct cpp_options
/* -fleading_underscore sets this to "_". */
const char *user_label_prefix;
/* Precision for target CPP arithmetic, target characters and target
wide characters, respectively. */
size_t precision, char_precision, wchar_precision;
/* The language we're preprocessing. */
enum c_lang lang;
@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int));
/* Evaluate a CPP_CHAR or CPP_WCHAR token. */
extern HOST_WIDE_INT
extern cppchar_t
cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
int, unsigned int *));
int, unsigned int *, int *));
extern void cpp_define PARAMS ((cpp_reader *, const char *));
extern void cpp_assert PARAMS ((cpp_reader *, const char *));
@ -600,10 +607,15 @@ extern int cpp_ideq PARAMS ((const cpp_token *,
extern void cpp_output_line PARAMS ((cpp_reader *, FILE *));
extern void cpp_output_token PARAMS ((const cpp_token *, FILE *));
extern const char *cpp_type2name PARAMS ((enum cpp_ttype));
extern unsigned int cpp_parse_escape PARAMS ((cpp_reader *,
const unsigned char **,
const unsigned char *,
unsigned HOST_WIDE_INT));
/* Returns the value of an escape sequence, truncated to the correct
target precision. PSTR points to the input pointer, which is just
after the backslash. LIMIT is how much text we have. WIDE is true
if the escape sequence is part of a wide character constant or
string literal. Handles all relevant diagnostics. */
extern cppchar_t cpp_parse_escape PARAMS ((cpp_reader *,
const unsigned char ** pstr,
const unsigned char *limit,
int wide));
/* In cpphash.c */