lex.c (mbchar.h): #include it.

1998-07-20  Dave Brolley  <brolley@cygnus.com>
	* lex.c (mbchar.h): #include it.
	(GET_ENVIRONMENT): New macro.
	(init_parse): Set character set based on LANG environment variable.
	(real_yylex): Handle multibyte characters in character literals.
	(real_yylex): Handle multibyte characters in string literals.

From-SVN: r21304
This commit is contained in:
Dave Brolley 1998-07-20 13:36:43 +00:00 committed by Dave Brolley
parent 56f48ce976
commit 0d3ba7398c
3 changed files with 199 additions and 102 deletions

View File

@ -1,3 +1,11 @@
1998-07-20 Dave Brolley <brolley@cygnus.com>
* lex.c (mbchar.h): #include it.
(GET_ENVIRONMENT): New macro.
(init_parse): Set character set based on LANG environment variable.
(real_yylex): Handle multibyte characters in character literals.
(real_yylex): Handle multibyte characters in string literals.
1998-07-19 Jason Merrill <jason@yorick.cygnus.com>
* lex.c (do_identifier): Look for class value even if we don't

View File

@ -244,7 +244,7 @@ spew.o : spew.c $(CONFIG_H) $(CXX_TREE_H) \
lex.o : lex.c $(CONFIG_H) $(CXX_TREE_H) \
$(PARSE_H) input.c $(srcdir)/../flags.h hash.h lex.h \
$(srcdir)/../c-pragma.h $(srcdir)/../system.h $(srcdir)/../toplev.h \
$(srcdir)/../output.h
$(srcdir)/../output.h $(srcdir)/../mbchar.h
decl.o : decl.c $(CONFIG_H) $(CXX_TREE_H) $(srcdir)/../flags.h \
lex.h decl.h $(srcdir)/../stack.h $(srcdir)/../output.h \
$(srcdir)/../except.h $(srcdir)/../system.h $(srcdir)/../toplev.h

View File

@ -39,15 +39,13 @@ Boston, MA 02111-1307, USA. */
#include "toplev.h"
#include "output.h"
/* MULTIBYTE_CHARS support only works for native compilers.
??? Ideally what we want is to model widechar support after
the current floating point support. */
#ifdef CROSS_COMPILE
#undef MULTIBYTE_CHARS
#endif
#ifdef MULTIBYTE_CHARS
#include "mbchar.h"
#include <locale.h>
#ifndef GET_ENVIRONMENT
#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
#endif
#endif
#define obstack_chunk_alloc xmalloc
@ -474,6 +472,12 @@ init_parse (filename)
int i;
#ifdef MULTIBYTE_CHARS
/* Change to the native locale for multibyte conversions. */
setlocale (LC_CTYPE, "");
GET_ENVIRONMENT (literal_codeset, "LANG");
#endif
#if USE_CPPLIB
yy_cur = "\n";
yy_lim = yy_cur + 1;
@ -3922,30 +3926,27 @@ real_yylex ()
{
register int result = 0;
register int num_chars = 0;
int chars_seen = 0;
unsigned width = TYPE_PRECISION (char_type_node);
int max_chars;
if (wide_flag)
{
width = WCHAR_TYPE_SIZE;
#ifdef MULTIBYTE_CHARS
max_chars = MB_CUR_MAX;
#else
max_chars = 1;
int longest_char = local_mb_cur_max ();
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
}
else
max_chars = TYPE_PRECISION (integer_type_node) / width;
max_chars = TYPE_PRECISION (integer_type_node) / width;
if (wide_flag)
width = WCHAR_TYPE_SIZE;
while (1)
{
tryagain:
c = getch ();
if (c == '\'' || c == EOF)
break;
++chars_seen;
if (c == '\\')
{
int ignore = 0;
@ -3954,7 +3955,7 @@ real_yylex ()
goto tryagain;
if (width < HOST_BITS_PER_INT
&& (unsigned) c >= (1 << width))
warning ("escape sequence out of range for character");
pedwarn ("escape sequence out of range for character");
#ifdef MAP_CHARACTER
if (ISPRINT (c))
c = MAP_CHARACTER (c);
@ -3963,21 +3964,79 @@ real_yylex ()
else if (c == '\n')
{
if (pedantic)
pedwarn ("ANSI C++ forbids newline in character constant");
pedwarn ("ANSI C forbids newline in character constant");
lineno++;
}
#ifdef MAP_CHARACTER
else
c = MAP_CHARACTER (c);
{
#ifdef MULTIBYTE_CHARS
wchar_t wc;
int i;
int char_len = -1;
for (i = 1; i <= longest_char; ++i)
{
if (i > maxtoken - 4)
extend_token_buffer (token_buffer);
token_buffer[i] = c;
char_len = local_mbtowc (& wc,
token_buffer + 1,
i);
if (char_len != -1)
break;
c = getch ();
}
if (char_len > 1)
{
/* mbtowc sometimes needs an extra char before accepting */
if (char_len < i)
put_back (c);
if (! wide_flag)
{
/* Merge character into result; ignore excess chars. */
for (i = 1; i <= char_len; ++i)
{
if (i > max_chars)
break;
if (width < HOST_BITS_PER_INT)
result = (result << width)
| (token_buffer[i]
& ((1 << width) - 1));
else
result = token_buffer[i];
}
num_chars += char_len;
goto tryagain;
}
c = wc;
}
else
{
if (char_len == -1)
warning ("Ignoring invalid multibyte character");
if (wide_flag)
c = wc;
#ifdef MAP_CHARACTER
else
c = MAP_CHARACTER (c);
#endif
}
#else /* ! MULTIBYTE_CHARS */
#ifdef MAP_CHARACTER
c = MAP_CHARACTER (c);
#endif
#endif /* ! MULTIBYTE_CHARS */
}
num_chars++;
if (num_chars > maxtoken - 4)
extend_token_buffer (token_buffer);
token_buffer[num_chars] = c;
if (wide_flag)
{
if (chars_seen == 1) /* only keep the first one */
result = c;
goto tryagain;
}
/* Merge character into result; ignore excess chars. */
num_chars++;
if (num_chars < max_chars + 1)
{
if (width < HOST_BITS_PER_INT)
@ -3987,19 +4046,16 @@ real_yylex ()
}
}
token_buffer[num_chars + 1] = '\'';
token_buffer[num_chars + 2] = 0;
if (c != '\'')
error ("malformatted character constant");
else if (num_chars == 0)
else if (chars_seen == 0)
error ("empty character constant");
else if (num_chars > max_chars)
{
num_chars = max_chars;
error ("character constant too long");
}
else if (num_chars != 1 && warn_multichar)
else if (chars_seen != 1 && warn_multichar)
warning ("multi-character character constant");
/* If char type is signed, sign-extend the constant. */
@ -4012,37 +4068,21 @@ real_yylex ()
else if (TREE_UNSIGNED (char_type_node)
|| ((result >> (num_bits - 1)) & 1) == 0)
yylval.ttype
= build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0
= build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
>> (HOST_BITS_PER_WIDE_INT - num_bits)),
0);
else
yylval.ttype
= build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0
= build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
>> (HOST_BITS_PER_WIDE_INT - num_bits)),
-1);
if (num_chars<=1)
if (chars_seen <= 1)
TREE_TYPE (yylval.ttype) = char_type_node;
else
TREE_TYPE (yylval.ttype) = integer_type_node;
}
else
{
#ifdef MULTIBYTE_CHARS
/* Set the initial shift state and convert the next sequence. */
result = 0;
/* In all locales L'\0' is zero and mbtowc will return zero,
so don't use it. */
if (num_chars > 1
|| (num_chars == 1 && token_buffer[1] != '\0'))
{
wchar_t wc;
(void) mbtowc (NULL, NULL, 0);
if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
result = wc;
else
warning ("Ignoring invalid multibyte character");
}
#endif
yylval.ttype = build_int_2 (result, 0);
TREE_TYPE (yylval.ttype) = wchar_type_node;
}
@ -4055,6 +4095,12 @@ real_yylex ()
string_constant:
{
register char *p;
unsigned width = wide_flag ? WCHAR_TYPE_SIZE
: TYPE_PRECISION (char_type_node);
#ifdef MULTIBYTE_CHARS
int longest_char = local_mb_cur_max ();
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
c = getch ();
p = token_buffer + 1;
@ -4068,9 +4114,8 @@ real_yylex ()
c = readescape (&ignore);
if (ignore)
goto skipnewline;
if (!wide_flag
&& TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
&& c >= ((unsigned) 1 << TYPE_PRECISION (char_type_node)))
if (width < HOST_BITS_PER_INT
&& (unsigned) c >= (1 << width))
warning ("escape sequence out of range for character");
}
else if (c == '\n')
@ -4079,10 +4124,74 @@ real_yylex ()
pedwarn ("ANSI C++ forbids newline in string constant");
lineno++;
}
else
{
#ifdef MULTIBYTE_CHARS
wchar_t wc;
int i;
int char_len = -1;
for (i = 0; i < longest_char; ++i)
{
if (p + i == token_buffer + maxtoken)
p = extend_token_buffer (p);
p[i] = c;
if (p == token_buffer + maxtoken)
p = extend_token_buffer (p);
*p++ = c;
char_len = local_mbtowc (& wc, p, i + 1);
if (char_len != -1)
break;
c = getch ();
}
if (char_len == -1)
warning ("Ignoring invalid multibyte character");
else
{
/* mbtowc sometimes needs an extra char before accepting */
if (char_len <= i)
put_back (c);
if (wide_flag)
{
*(wchar_t *)p = wc;
p += sizeof (wc);
}
else
p += (i + 1);
c = getch ();
continue;
}
#endif /* MULTIBYTE_CHARS */
}
/* Add this single character into the buffer either as a wchar_t
or as a single byte. */
if (wide_flag)
{
unsigned width = TYPE_PRECISION (char_type_node);
unsigned bytemask = (1 << width) - 1;
int byte;
if (p + WCHAR_BYTES >= token_buffer + maxtoken)
p = extend_token_buffer (p);
for (byte = 0; byte < WCHAR_BYTES; ++byte)
{
int value;
if (byte >= sizeof (c))
value = 0;
else
value = (c >> (byte * width)) & bytemask;
if (BYTES_BIG_ENDIAN)
p[WCHAR_BYTES - byte - 1] = value;
else
p[byte] = value;
}
p += WCHAR_BYTES;
}
else
{
if (p == token_buffer + maxtoken)
p = extend_token_buffer (p);
*p++ = c;
}
skipnewline:
c = getch ();
@ -4091,56 +4200,36 @@ real_yylex ()
break;
}
}
*p = 0;
/* Terminate the string value, either with a single byte zero
or with a wide zero. */
if (wide_flag)
{
if (p + WCHAR_BYTES >= token_buffer + maxtoken)
p = extend_token_buffer (p);
bzero (p, WCHAR_BYTES);
p += WCHAR_BYTES;
}
else
{
if (p == token_buffer + maxtoken)
p = extend_token_buffer (p);
*p++ = 0;
}
/* We have read the entire constant.
Construct a STRING_CST for the result. */
if (processing_template_decl)
push_obstacks (&permanent_obstack, &permanent_obstack);
yylval.ttype = build_string (p - (token_buffer + 1), token_buffer + 1);
if (processing_template_decl)
pop_obstacks ();
if (wide_flag)
{
/* If this is a L"..." wide-string, convert the multibyte string
to a wide character string. */
char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
int len;
#ifdef MULTIBYTE_CHARS
len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
if (len < 0 || len >= (p - token_buffer))
{
warning ("Ignoring invalid multibyte string");
len = 0;
}
bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
#else
{
char *wp, *cp;
wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
bzero (widep, (p - token_buffer) * WCHAR_BYTES);
for (cp = token_buffer + 1; cp < p; cp++)
*wp = *cp, wp += WCHAR_BYTES;
len = p - token_buffer - 1;
}
#endif
if (processing_template_decl)
push_obstacks (&permanent_obstack, &permanent_obstack);
yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
if (processing_template_decl)
pop_obstacks ();
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
}
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
else
{
if (processing_template_decl)
push_obstacks (&permanent_obstack, &permanent_obstack);
yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
if (processing_template_decl)
pop_obstacks ();
TREE_TYPE (yylval.ttype) = char_array_type_node;
}
*p++ = '"';
*p = 0;
TREE_TYPE (yylval.ttype) = char_array_type_node;
value = STRING; break;
}