configure.in (enable_c_mbchar): New configure option.
Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com> * configure.in (enable_c_mbchar): New configure option. (extra_cpp_objs): Always available now. * cexp.y (mbchar.h): #include it. (yylex): Handle Multibyte characters in character literals. * cccp.c (mbchar.h): #include it. (main): Set character set based on LANG environment variable. (rescan): Handle multibyte characters in comments. (skip_if_group): See above. (validate_else): See above. (skip_to_end_of_comment): See above. (macarg1): See above. (discard_comments): See above. (rescan): Handle multibyte characters in string and character literals. (collect_expansion): See above. (skip_quoted_string): See above. (macroexpand): See above. (macarg1): See above. (discard_comments): See above. (change_newlines): See above. * c-lex.c (mbchar.h): #include it. (GET_ENVIRONMENT): New macro. (init_lex): Set character set based on LANG environment variable. (yylex): Handle multibyte characters in character literals. (yylex): Handle multibyte characters in string literals. * Makefile.in (mbchar.o): New target. (cccp$(exeext)): @extra_cpp_objs@ is always available. (cppmain$(exeext)): @extra_cpp_objs@ is always available. * mbchar.[ch]: New files for multibyte character handling. From-SVN: r21303
This commit is contained in:
parent
689fcba861
commit
56f48ce976
@ -1,3 +1,39 @@
|
||||
Mon Jul 20 16:16:38 1998 Dave Brolley <brolley@cygnus.com>
|
||||
|
||||
* configure.in (enable_c_mbchar): New configure option.
|
||||
(extra_cpp_objs): Always available now.
|
||||
|
||||
* cexp.y (mbchar.h): #include it.
|
||||
(yylex): Handle Multibyte characters in character literals.
|
||||
|
||||
* cccp.c (mbchar.h): #include it.
|
||||
(main): Set character set based on LANG environment variable.
|
||||
(rescan): Handle multibyte characters in comments.
|
||||
(skip_if_group): See above.
|
||||
(validate_else): See above.
|
||||
(skip_to_end_of_comment): See above.
|
||||
(macarg1): See above.
|
||||
(discard_comments): See above.
|
||||
(rescan): Handle multibyte characters in string and character literals.
|
||||
(collect_expansion): See above.
|
||||
(skip_quoted_string): See above.
|
||||
(macroexpand): See above.
|
||||
(macarg1): See above.
|
||||
(discard_comments): See above.
|
||||
(change_newlines): See above.
|
||||
|
||||
* c-lex.c (mbchar.h): #include it.
|
||||
(GET_ENVIRONMENT): New macro.
|
||||
(init_lex): Set character set based on LANG environment variable.
|
||||
(yylex): Handle multibyte characters in character literals.
|
||||
(yylex): Handle multibyte characters in string literals.
|
||||
|
||||
* Makefile.in (mbchar.o): New target.
|
||||
(cccp$(exeext)): @extra_cpp_objs@ is always available.
|
||||
(cppmain$(exeext)): @extra_cpp_objs@ is always available.
|
||||
|
||||
* mbchar.[ch]: New files for multibyte character handling.
|
||||
|
||||
Mon Jul 20 01:11:11 1998 David S. Miller <davem@pierdol.cobaltmicro.com>
|
||||
|
||||
* jump.c (jump_optimize): When simplifying noop moves and
|
||||
|
@ -641,7 +641,8 @@ OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
|
||||
regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o gcse.o \
|
||||
insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
|
||||
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
|
||||
profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o
|
||||
profile.o insn-attrtab.o $(out_object_file) getpwd.o $(EXTRA_OBJS) convert.o \
|
||||
mbchar.o
|
||||
|
||||
# GEN files are listed separately, so they can be built before doing parallel
|
||||
# makes for cc1 or cc1plus. Otherwise sequent parallel make attempts to load
|
||||
@ -1275,13 +1276,14 @@ c-lang.o : c-lang.c $(CONFIG_H) system.h $(TREE_H) c-tree.h c-lex.h toplev.h \
|
||||
output.h
|
||||
c-lex.o : c-lex.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-lex.h c-tree.h \
|
||||
$(srcdir)/c-parse.h input.h flags.h $(srcdir)/c-gperf.h c-pragma.h \
|
||||
toplev.h output.h
|
||||
toplev.h output.h mbchar.h
|
||||
c-aux-info.o : c-aux-info.c $(CONFIG_H) system.h $(TREE_H) c-tree.h flags.h
|
||||
c-convert.o : c-convert.c $(CONFIG_H) system.h $(TREE_H) flags.h toplev.h
|
||||
c-pragma.o: c-pragma.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) except.h \
|
||||
function.h defaults.h c-pragma.h toplev.h
|
||||
c-iterate.o: c-iterate.c $(CONFIG_H) system.h $(TREE_H) $(RTL_H) c-tree.h \
|
||||
flags.h toplev.h $(EXPR_H)
|
||||
mbchar.o: $(CONFIG_H) system.h gansidecl.h mbchar.h
|
||||
|
||||
collect2$(exeext): collect2.o tlink.o hash.o cplus-dem.o underscore.o \
|
||||
version.o choose-temp.o mkstemp.o $(LIBDEPS)
|
||||
@ -1816,15 +1818,16 @@ $(HOST_PREFIX_1):
|
||||
cpp$(exeext): $(CCCP)$(exeext)
|
||||
-rm -f cpp$(exeext)
|
||||
$(LN) $(CCCP)$(exeext) cpp$(exeext)
|
||||
cccp$(exeext): cccp.o cexp.o version.o prefix.o $(LIBDEPS)
|
||||
$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o \
|
||||
version.o $(LIBS)
|
||||
cccp$(exeext): cccp.o cexp.o version.o prefix.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
|
||||
$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cccp.o cexp.o prefix.o mbchar.o \
|
||||
version.o @extra_cpp_objs@ $(LIBS)
|
||||
cexp.o: $(srcdir)/cexp.c $(CONFIG_H) system.h gansidecl.h
|
||||
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) -c $(srcdir)/cexp.c
|
||||
$(srcdir)/cexp.c: $(srcdir)/cexp.y
|
||||
cd $(srcdir); $(BISON) -o cexp.c cexp.y
|
||||
|
||||
cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
|
||||
cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h \
|
||||
mbchar.h
|
||||
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
|
||||
-DGCC_INCLUDE_DIR=\"$(libsubdir)/include\" \
|
||||
-DGPLUSPLUS_INCLUDE_DIR=\"$(gxx_include_dir)\" \
|
||||
@ -1835,8 +1838,9 @@ cccp.o: cccp.c $(CONFIG_H) pcp.h version.c config.status system.h gansidecl.h
|
||||
-c `echo $(srcdir)/cccp.c | sed 's,^\./,,'`
|
||||
|
||||
cppmain$(exeext): cppmain.o cpplib.o cpphash.o cppalloc.o cpperror.o cppexp.o \
|
||||
prefix.o version.o $(LIBDEPS)
|
||||
prefix.o version.o mbchar.o @extra_cpp_objs@ $(LIBDEPS)
|
||||
$(CC) $(ALL_CFLAGS) $(LDFLAGS) -o $@ cppmain.o cpplib.o cpphash.o \
|
||||
mbchar.o @extra_cpp_objs@ \
|
||||
cppalloc.o cpperror.o cppexp.o prefix.o version.o $(LIBS)
|
||||
|
||||
cppmain.o: cppmain.c $(CONFIG_H) cpplib.h system.h gansidecl.h
|
||||
|
264
gcc/c-lex.c
264
gcc/c-lex.c
@ -33,16 +33,14 @@ Boston, MA 02111-1307, USA. */
|
||||
#include "c-pragma.h"
|
||||
#include "toplev.h"
|
||||
|
||||
/* MULTIBYTE_CHARS support only works for native compilers.
|
||||
??? Ideally what we want is to model widechar support after
|
||||
the current floating point support. */
|
||||
#ifdef CROSS_COMPILE
|
||||
#undef MULTIBYTE_CHARS
|
||||
#endif
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
#include "mbchar.h"
|
||||
#include <locale.h>
|
||||
|
||||
#ifndef GET_ENVIRONMENT
|
||||
#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
|
||||
#endif
|
||||
#endif /* MULTIBYTE_CHARS */
|
||||
|
||||
#if USE_CPPLIB
|
||||
#include "cpplib.h"
|
||||
@ -232,6 +230,7 @@ init_lex ()
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* Change to the native locale for multibyte conversions. */
|
||||
setlocale (LC_CTYPE, "");
|
||||
GET_ENVIRONMENT (literal_codeset, "LANG");
|
||||
#endif
|
||||
|
||||
maxtoken = 40;
|
||||
@ -1795,30 +1794,27 @@ yylex ()
|
||||
{
|
||||
register int result = 0;
|
||||
register int num_chars = 0;
|
||||
int chars_seen = 0;
|
||||
unsigned width = TYPE_PRECISION (char_type_node);
|
||||
int max_chars;
|
||||
|
||||
if (wide_flag)
|
||||
{
|
||||
width = WCHAR_TYPE_SIZE;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
max_chars = MB_CUR_MAX;
|
||||
#else
|
||||
max_chars = 1;
|
||||
int longest_char = local_mb_cur_max ();
|
||||
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
max_chars = TYPE_PRECISION (integer_type_node) / width;
|
||||
|
||||
max_chars = TYPE_PRECISION (integer_type_node) / width;
|
||||
if (wide_flag)
|
||||
width = WCHAR_TYPE_SIZE;
|
||||
|
||||
while (1)
|
||||
{
|
||||
tryagain:
|
||||
|
||||
c = GETC();
|
||||
|
||||
if (c == '\'' || c == EOF)
|
||||
break;
|
||||
|
||||
++chars_seen;
|
||||
if (c == '\\')
|
||||
{
|
||||
int ignore = 0;
|
||||
@ -1839,18 +1835,76 @@ yylex ()
|
||||
pedwarn ("ANSI C forbids newline in character constant");
|
||||
lineno++;
|
||||
}
|
||||
#ifdef MAP_CHARACTER
|
||||
else
|
||||
c = MAP_CHARACTER (c);
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
wchar_t wc;
|
||||
int i;
|
||||
int char_len = -1;
|
||||
for (i = 1; i <= longest_char; ++i)
|
||||
{
|
||||
if (i > maxtoken - 4)
|
||||
extend_token_buffer (token_buffer);
|
||||
|
||||
token_buffer[i] = c;
|
||||
char_len = local_mbtowc (& wc,
|
||||
token_buffer + 1,
|
||||
i);
|
||||
if (char_len != -1)
|
||||
break;
|
||||
c = GETC ();
|
||||
}
|
||||
if (char_len > 1)
|
||||
{
|
||||
/* mbtowc sometimes needs an extra char before accepting */
|
||||
if (char_len < i)
|
||||
UNGETC (c);
|
||||
if (! wide_flag)
|
||||
{
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
for (i = 1; i <= char_len; ++i)
|
||||
{
|
||||
if (i > max_chars)
|
||||
break;
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
result = (result << width)
|
||||
| (token_buffer[i]
|
||||
& ((1 << width) - 1));
|
||||
else
|
||||
result = token_buffer[i];
|
||||
}
|
||||
num_chars += char_len;
|
||||
goto tryagain;
|
||||
}
|
||||
c = wc;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (char_len == -1)
|
||||
warning ("Ignoring invalid multibyte character");
|
||||
if (wide_flag)
|
||||
c = wc;
|
||||
#ifdef MAP_CHARACTER
|
||||
else
|
||||
c = MAP_CHARACTER (c);
|
||||
#endif
|
||||
}
|
||||
#else /* ! MULTIBYTE_CHARS */
|
||||
#ifdef MAP_CHARACTER
|
||||
c = MAP_CHARACTER (c);
|
||||
#endif
|
||||
#endif /* ! MULTIBYTE_CHARS */
|
||||
}
|
||||
|
||||
num_chars++;
|
||||
if (num_chars > maxtoken - 4)
|
||||
extend_token_buffer (token_buffer);
|
||||
|
||||
token_buffer[num_chars] = c;
|
||||
if (wide_flag)
|
||||
{
|
||||
if (chars_seen == 1) /* only keep the first one */
|
||||
result = c;
|
||||
goto tryagain;
|
||||
}
|
||||
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
num_chars += (width / TYPE_PRECISION (char_type_node));
|
||||
if (num_chars < max_chars + 1)
|
||||
{
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
@ -1860,19 +1914,16 @@ yylex ()
|
||||
}
|
||||
}
|
||||
|
||||
token_buffer[num_chars + 1] = '\'';
|
||||
token_buffer[num_chars + 2] = 0;
|
||||
|
||||
if (c != '\'')
|
||||
error ("malformatted character constant");
|
||||
else if (num_chars == 0)
|
||||
else if (chars_seen == 0)
|
||||
error ("empty character constant");
|
||||
else if (num_chars > max_chars)
|
||||
{
|
||||
num_chars = max_chars;
|
||||
error ("character constant too long");
|
||||
}
|
||||
else if (num_chars != 1 && ! flag_traditional && warn_multichar)
|
||||
else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
|
||||
warning ("multi-character character constant");
|
||||
|
||||
/* If char type is signed, sign-extend the constant. */
|
||||
@ -1897,22 +1948,6 @@ yylex ()
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* Set the initial shift state and convert the next sequence. */
|
||||
result = 0;
|
||||
/* In all locales L'\0' is zero and mbtowc will return zero,
|
||||
so don't use it. */
|
||||
if (num_chars > 1
|
||||
|| (num_chars == 1 && token_buffer[1] != '\0'))
|
||||
{
|
||||
wchar_t wc;
|
||||
(void) mbtowc (NULL_PTR, NULL_PTR, 0);
|
||||
if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
|
||||
result = wc;
|
||||
else
|
||||
warning ("Ignoring invalid multibyte character");
|
||||
}
|
||||
#endif
|
||||
yylval.ttype = build_int_2 (result, 0);
|
||||
TREE_TYPE (yylval.ttype) = wchar_type_node;
|
||||
}
|
||||
@ -1924,7 +1959,13 @@ yylex ()
|
||||
case '"':
|
||||
string_constant:
|
||||
{
|
||||
c = GETC();
|
||||
unsigned width = wide_flag ? WCHAR_TYPE_SIZE
|
||||
: TYPE_PRECISION (char_type_node);
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int longest_char = local_mb_cur_max ();
|
||||
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
|
||||
#endif
|
||||
c = GETC ();
|
||||
p = token_buffer + 1;
|
||||
|
||||
while (c != '"' && c >= 0)
|
||||
@ -1935,9 +1976,8 @@ yylex ()
|
||||
c = readescape (&ignore);
|
||||
if (ignore)
|
||||
goto skipnewline;
|
||||
if (!wide_flag
|
||||
&& TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
|
||||
&& c >= (1 << TYPE_PRECISION (char_type_node)))
|
||||
if (width < HOST_BITS_PER_INT
|
||||
&& (unsigned) c >= (1 << width))
|
||||
pedwarn ("escape sequence out of range for character");
|
||||
}
|
||||
else if (c == '\n')
|
||||
@ -1946,15 +1986,94 @@ yylex ()
|
||||
pedwarn ("ANSI C forbids newline in string constant");
|
||||
lineno++;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
wchar_t wc;
|
||||
int i;
|
||||
int char_len = -1;
|
||||
for (i = 0; i < longest_char; ++i)
|
||||
{
|
||||
if (p + i == token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
p[i] = c;
|
||||
|
||||
if (p == token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
*p++ = c;
|
||||
char_len = local_mbtowc (& wc, p, i + 1);
|
||||
if (char_len != -1)
|
||||
break;
|
||||
c = GETC ();
|
||||
}
|
||||
if (char_len == -1)
|
||||
warning ("Ignoring invalid multibyte character");
|
||||
else
|
||||
{
|
||||
/* mbtowc sometimes needs an extra char before accepting */
|
||||
if (char_len <= i)
|
||||
UNGETC (c);
|
||||
if (wide_flag)
|
||||
{
|
||||
*(wchar_t *)p = wc;
|
||||
p += sizeof (wc);
|
||||
}
|
||||
else
|
||||
p += (i + 1);
|
||||
c = GETC ();
|
||||
continue;
|
||||
}
|
||||
#endif /* MULTIBYTE_CHARS */
|
||||
}
|
||||
|
||||
/* Add this single character into the buffer either as a wchar_t
|
||||
or as a single byte. */
|
||||
if (wide_flag)
|
||||
{
|
||||
unsigned width = TYPE_PRECISION (char_type_node);
|
||||
unsigned bytemask = (1 << width) - 1;
|
||||
int byte;
|
||||
|
||||
if (p + WCHAR_BYTES >= token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
|
||||
for (byte = 0; byte < WCHAR_BYTES; ++byte)
|
||||
{
|
||||
int value;
|
||||
if (byte >= sizeof (c))
|
||||
value = 0;
|
||||
else
|
||||
value = (c >> (byte * width)) & bytemask;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
p[WCHAR_BYTES - byte - 1] = value;
|
||||
else
|
||||
p[byte] = value;
|
||||
}
|
||||
p += WCHAR_BYTES;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (p == token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
*p++ = c;
|
||||
}
|
||||
|
||||
skipnewline:
|
||||
c = GETC();
|
||||
c = GETC ();
|
||||
}
|
||||
|
||||
/* Terminate the string value, either with a single byte zero
|
||||
or with a wide zero. */
|
||||
if (wide_flag)
|
||||
{
|
||||
if (p + WCHAR_BYTES >= token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
bzero (p, WCHAR_BYTES);
|
||||
p += WCHAR_BYTES;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (p == token_buffer + maxtoken)
|
||||
p = extend_token_buffer (p);
|
||||
*p++ = 0;
|
||||
}
|
||||
*p = 0;
|
||||
|
||||
if (c < 0)
|
||||
error ("Unterminated string constant");
|
||||
@ -1964,52 +2083,27 @@ yylex ()
|
||||
|
||||
if (wide_flag)
|
||||
{
|
||||
/* If this is a L"..." wide-string, convert the multibyte string
|
||||
to a wide character string. */
|
||||
char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
|
||||
int len;
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
|
||||
if (len < 0 || len >= (p - token_buffer))
|
||||
{
|
||||
warning ("Ignoring invalid multibyte string");
|
||||
len = 0;
|
||||
}
|
||||
bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
|
||||
#else
|
||||
{
|
||||
char *wp, *cp;
|
||||
|
||||
wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
|
||||
bzero (widep, (p - token_buffer) * WCHAR_BYTES);
|
||||
for (cp = token_buffer + 1; cp < p; cp++)
|
||||
*wp = *cp, wp += WCHAR_BYTES;
|
||||
len = p - token_buffer - 1;
|
||||
}
|
||||
#endif
|
||||
yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
|
||||
yylval.ttype = build_string (p - (token_buffer + 1),
|
||||
token_buffer + 1);
|
||||
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
|
||||
value = STRING;
|
||||
}
|
||||
else if (objc_flag)
|
||||
{
|
||||
/* Return an Objective-C @"..." constant string object. */
|
||||
yylval.ttype = build_objc_string (p - token_buffer,
|
||||
yylval.ttype = build_objc_string (p - (token_buffer + 1),
|
||||
token_buffer + 1);
|
||||
TREE_TYPE (yylval.ttype) = char_array_type_node;
|
||||
value = OBJC_STRING;
|
||||
}
|
||||
else
|
||||
{
|
||||
yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
|
||||
yylval.ttype = build_string (p - (token_buffer + 1),
|
||||
token_buffer + 1);
|
||||
TREE_TYPE (yylval.ttype) = char_array_type_node;
|
||||
value = STRING;
|
||||
}
|
||||
|
||||
*p++ = '"';
|
||||
*p = 0;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
359
gcc/cccp.c
359
gcc/cccp.c
@ -45,6 +45,11 @@ typedef unsigned char U_CHAR;
|
||||
#include "gansidecl.h"
|
||||
#include "pcp.h"
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
#include "mbchar.h"
|
||||
#include <locale.h>
|
||||
#endif /* MULTIBYTE_CHARS */
|
||||
|
||||
#ifndef GET_ENVIRONMENT
|
||||
#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ENV_VALUE = getenv (ENV_NAME)
|
||||
#endif
|
||||
@ -1308,6 +1313,12 @@ main (argc, argv)
|
||||
bzero ((char *) pend_assertions, argc * sizeof (char *));
|
||||
bzero ((char *) pend_includes, argc * sizeof (char *));
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* Change to the native locale for multibyte conversions. */
|
||||
setlocale (LC_CTYPE, "");
|
||||
GET_ENVIRONMENT (literal_codeset, "LANG");
|
||||
#endif
|
||||
|
||||
/* Process switches and find input file name. */
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
@ -2774,9 +2785,27 @@ do { ip = &instack[indepth]; \
|
||||
bp += 2;
|
||||
else if (*bp == '/' && bp[1] == '*') {
|
||||
bp += 2;
|
||||
while (!(*bp == '*' && bp[1] == '/'))
|
||||
bp++;
|
||||
bp += 2;
|
||||
while (1)
|
||||
{
|
||||
if (*bp == '*')
|
||||
{
|
||||
if (bp[1] == '/')
|
||||
{
|
||||
bp += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
bp++;
|
||||
}
|
||||
}
|
||||
/* There is no point in trying to deal with C++ // comments here,
|
||||
because if there is one, then this # must be part of the
|
||||
@ -2937,6 +2966,24 @@ do { ip = &instack[indepth]; \
|
||||
if (ibp[-1] == c)
|
||||
goto while2end;
|
||||
break;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
default:
|
||||
{
|
||||
int length;
|
||||
--ibp;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 0)
|
||||
{
|
||||
--obp;
|
||||
bcopy (ibp, obp, length);
|
||||
obp += length;
|
||||
ibp += length;
|
||||
}
|
||||
else
|
||||
++ibp;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
while2end:
|
||||
@ -2983,6 +3030,15 @@ do { ip = &instack[indepth]; \
|
||||
*obp++ = '\n';
|
||||
++op->lineno;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
ibp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -3071,6 +3127,16 @@ do { ip = &instack[indepth]; \
|
||||
goto limit_reached;
|
||||
}
|
||||
break;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
default:
|
||||
{
|
||||
int length;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
ibp += (length - 1);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
comment_end:
|
||||
@ -3433,11 +3499,27 @@ randomchar:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*ibp == '\n') {
|
||||
else if (*ibp == '\n') {
|
||||
/* Newline in a file. Count it. */
|
||||
++ip->lineno;
|
||||
++op->lineno;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
{
|
||||
if (put_out_comments)
|
||||
{
|
||||
bcopy (ibp, obp, length - 1);
|
||||
obp += length - 1;
|
||||
}
|
||||
ibp += (length - 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (put_out_comments)
|
||||
*obp++ = *ibp;
|
||||
}
|
||||
@ -3448,9 +3530,32 @@ randomchar:
|
||||
} else if (! traditional) {
|
||||
*obp++ = ' ';
|
||||
}
|
||||
for (ibp += 2; *ibp != '\n' || ibp[-1] == '\\'; ibp++)
|
||||
if (put_out_comments)
|
||||
*obp++ = *ibp;
|
||||
for (ibp += 2; ; ibp++)
|
||||
{
|
||||
if (*ibp == '\n')
|
||||
{
|
||||
if (ibp[-1] != '\\')
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
{
|
||||
if (put_out_comments)
|
||||
{
|
||||
bcopy (ibp, obp, length - 1);
|
||||
obp += length - 1;
|
||||
}
|
||||
ibp += (length - 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (put_out_comments)
|
||||
*obp++ = *ibp;
|
||||
}
|
||||
} else
|
||||
break;
|
||||
}
|
||||
@ -6186,6 +6291,25 @@ collect_expansion (buf, end, nargs, arglist)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* Handle multibyte characters inside string and character literals. */
|
||||
if (expected_delimiter != '\0')
|
||||
{
|
||||
int length;
|
||||
--p;
|
||||
length = local_mblen (p, limit - p);
|
||||
if (length > 1)
|
||||
{
|
||||
--exp_p;
|
||||
bcopy (p, exp_p, length);
|
||||
p += length;
|
||||
exp_p += length;
|
||||
continue;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle the start of a symbol. */
|
||||
if (is_idchar[c] && nargs > 0) {
|
||||
U_CHAR *id_beg = p - 1;
|
||||
@ -7412,9 +7536,27 @@ skip_if_group (ip, any, op)
|
||||
bp += 2;
|
||||
else if (*bp == '/' && bp[1] == '*') {
|
||||
bp += 2;
|
||||
while (!(*bp == '*' && bp[1] == '/'))
|
||||
bp++;
|
||||
bp += 2;
|
||||
while (1)
|
||||
{
|
||||
if (*bp == '*')
|
||||
{
|
||||
if (bp[1] == '/')
|
||||
{
|
||||
bp += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, endb - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
bp++;
|
||||
}
|
||||
}
|
||||
/* There is no point in trying to deal with C++ // comments here,
|
||||
because if there is one, then this # must be part of the
|
||||
@ -7458,6 +7600,15 @@ skip_if_group (ip, any, op)
|
||||
if (bp[1] == '/')
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, endb - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
bp += 2;
|
||||
} else if (bp[1] == '/' && cplusplus_comments) {
|
||||
@ -7469,6 +7620,15 @@ skip_if_group (ip, any, op)
|
||||
warning ("multiline `//' comment");
|
||||
ip->lineno++;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, endb - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} else
|
||||
break;
|
||||
@ -7764,6 +7924,15 @@ validate_else (p, limit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (p, limit - p);
|
||||
if (length > 1)
|
||||
p += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (cplusplus_comments && p[1] == '/')
|
||||
@ -7817,6 +7986,22 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
|
||||
if (op)
|
||||
++op->lineno;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length > 1)
|
||||
{
|
||||
if (op)
|
||||
{
|
||||
bcopy (bp, op->bufp, length - 1);
|
||||
op->bufp += (length - 1);
|
||||
}
|
||||
bp += (length - 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (op)
|
||||
*op->bufp++ = *bp;
|
||||
}
|
||||
@ -7854,6 +8039,23 @@ skip_to_end_of_comment (ip, line_counter, nowarn)
|
||||
return bp;
|
||||
}
|
||||
break;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
default:
|
||||
{
|
||||
int length;
|
||||
bp--;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length <= 0)
|
||||
length = 1;
|
||||
if (op)
|
||||
{
|
||||
op->bufp--;
|
||||
bcopy (bp, op->bufp, length);
|
||||
op->bufp += length;
|
||||
}
|
||||
bp += length;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -7944,6 +8146,16 @@ skip_quoted_string (bp, limit, start_line, count_newlines, backslash_newlines_p,
|
||||
}
|
||||
} else if (c == match)
|
||||
break;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
{
|
||||
int length;
|
||||
--bp;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length <= 0)
|
||||
length = 1;
|
||||
bp += length;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return bp;
|
||||
}
|
||||
@ -8381,9 +8593,23 @@ macroexpand (hp, op)
|
||||
else {
|
||||
if (c == '\\')
|
||||
escaped = 1;
|
||||
if (in_string) {
|
||||
else if (in_string) {
|
||||
if (c == in_string)
|
||||
in_string = 0;
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (arg->raw + i, arglen - i);
|
||||
if (length > 1)
|
||||
{
|
||||
bcopy (arg->raw + i, xbuf + totlen, length);
|
||||
i += length - 1;
|
||||
totlen += length;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else if (c == '\"' || c == '\'')
|
||||
in_string = c;
|
||||
}
|
||||
@ -8717,6 +8943,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} else if (bp[1] == '/' && cplusplus_comments) {
|
||||
*comments = 1;
|
||||
@ -8728,6 +8963,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
|
||||
if (warn_comments)
|
||||
warning ("multiline `//' comment");
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -8751,6 +8995,15 @@ macarg1 (start, limit, macro, depthptr, newlines, comments, rest_args)
|
||||
if (quotec == '\'')
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
length = local_mblen (bp, limit - bp);
|
||||
if (length > 1)
|
||||
bp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -8828,8 +9081,23 @@ discard_comments (start, length, newlines)
|
||||
/* Comments are equivalent to spaces. */
|
||||
obp[-1] = ' ';
|
||||
ibp++;
|
||||
while (ibp < limit && (*ibp != '\n' || ibp[-1] == '\\'))
|
||||
ibp++;
|
||||
while (ibp < limit)
|
||||
{
|
||||
if (*ibp == '\n')
|
||||
{
|
||||
if (ibp[-1] != '\\')
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
ibp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
ibp++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (ibp[0] != '*' || ibp + 1 >= limit)
|
||||
@ -8849,6 +9117,14 @@ discard_comments (start, length, newlines)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
ibp += (length - 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -8863,9 +9139,12 @@ discard_comments (start, length, newlines)
|
||||
*obp++ = c = *ibp++;
|
||||
if (c == quotec)
|
||||
break;
|
||||
if (c == '\n' && quotec == '\'')
|
||||
break;
|
||||
if (c == '\\') {
|
||||
if (c == '\n')
|
||||
{
|
||||
if (quotec == '\'')
|
||||
break;
|
||||
}
|
||||
else if (c == '\\') {
|
||||
if (ibp < limit && *ibp == '\n') {
|
||||
ibp++;
|
||||
obp--;
|
||||
@ -8876,6 +9155,23 @@ discard_comments (start, length, newlines)
|
||||
*obp++ = *ibp++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
ibp--;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
{
|
||||
obp--;
|
||||
bcopy (ibp, obp, length);
|
||||
ibp += length;
|
||||
obp += length;
|
||||
}
|
||||
else
|
||||
ibp++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -8925,10 +9221,33 @@ change_newlines (start, length)
|
||||
int quotec = c;
|
||||
while (ibp < limit) {
|
||||
*obp++ = c = *ibp++;
|
||||
if (c == quotec && ibp[-2] != '\\')
|
||||
break;
|
||||
if (c == '\n' && quotec == '\'')
|
||||
break;
|
||||
if (c == quotec)
|
||||
{
|
||||
if (ibp[-2] != '\\')
|
||||
break;
|
||||
}
|
||||
else if (c == '\n')
|
||||
{
|
||||
if (quotec == '\'')
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int length;
|
||||
ibp--;
|
||||
length = local_mblen (ibp, limit - ibp);
|
||||
if (length > 1)
|
||||
{
|
||||
obp--;
|
||||
bcopy (ibp, obp, length);
|
||||
ibp += length;
|
||||
obp += length;
|
||||
}
|
||||
else
|
||||
ibp++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
113
gcc/cexp.y
113
gcc/cexp.y
@ -39,12 +39,12 @@ Boston, MA 02111-1307, USA.
|
||||
#include "system.h"
|
||||
#include <setjmp.h>
|
||||
/* #define YYDEBUG 1 */
|
||||
#include "gansidecl.h"
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
#include "mbchar.h"
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
#include "gansidecl.h"
|
||||
#endif /* MULTIBYTE_CHARS */
|
||||
|
||||
typedef unsigned char U_CHAR;
|
||||
|
||||
@ -641,23 +641,18 @@ yylex ()
|
||||
{
|
||||
register HOST_WIDE_INT result = 0;
|
||||
register int num_chars = 0;
|
||||
int chars_seen = 0;
|
||||
unsigned width = MAX_CHAR_TYPE_SIZE;
|
||||
int max_chars;
|
||||
char *token_buffer;
|
||||
|
||||
if (wide_flag)
|
||||
{
|
||||
width = MAX_WCHAR_TYPE_SIZE;
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
max_chars = MB_CUR_MAX;
|
||||
#else
|
||||
max_chars = 1;
|
||||
int longest_char = local_mb_cur_max ();
|
||||
char *token_buffer = (char *) alloca (longest_char);
|
||||
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
max_chars = MAX_LONG_TYPE_SIZE / width;
|
||||
|
||||
token_buffer = (char *) alloca (max_chars + 1);
|
||||
max_chars = MAX_LONG_TYPE_SIZE / width;
|
||||
if (wide_flag)
|
||||
width = MAX_WCHAR_TYPE_SIZE;
|
||||
|
||||
while (1)
|
||||
{
|
||||
@ -666,44 +661,96 @@ yylex ()
|
||||
if (c == '\'' || c == EOF)
|
||||
break;
|
||||
|
||||
++chars_seen;
|
||||
if (c == '\\')
|
||||
{
|
||||
c = parse_escape (&lexptr, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
wchar_t wc;
|
||||
int i;
|
||||
int char_len = -1;
|
||||
for (i = 1; i <= longest_char; ++i)
|
||||
{
|
||||
token_buffer[i - 1] = c;
|
||||
char_len = local_mbtowc (& wc, token_buffer, i);
|
||||
if (char_len != -1)
|
||||
break;
|
||||
c = *lexptr++;
|
||||
}
|
||||
if (char_len > 1)
|
||||
{
|
||||
/* mbtowc sometimes needs an extra char before accepting */
|
||||
if (char_len < i)
|
||||
lexptr--;
|
||||
if (! wide_flag)
|
||||
{
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
for (i = 1; i <= char_len; ++i)
|
||||
{
|
||||
if (i > max_chars)
|
||||
break;
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
result = (result << width)
|
||||
| (token_buffer[i - 1]
|
||||
& ((1 << width) - 1));
|
||||
else
|
||||
result = token_buffer[i - 1];
|
||||
}
|
||||
num_chars += char_len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (char_len == -1)
|
||||
warning ("Ignoring invalid multibyte character");
|
||||
}
|
||||
if (wide_flag)
|
||||
c = wc;
|
||||
#endif /* ! MULTIBYTE_CHARS */
|
||||
}
|
||||
|
||||
num_chars++;
|
||||
if (wide_flag)
|
||||
{
|
||||
if (chars_seen == 1) /* only keep the first one */
|
||||
result = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
num_chars++;
|
||||
if (num_chars <= max_chars)
|
||||
{
|
||||
if (width < HOST_BITS_PER_WIDE_INT)
|
||||
result = (result << width) | c;
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
result = (result << width) | (c & ((1 << width) - 1));
|
||||
else
|
||||
result = c;
|
||||
token_buffer[num_chars - 1] = c;
|
||||
}
|
||||
}
|
||||
|
||||
token_buffer[num_chars] = 0;
|
||||
|
||||
if (c != '\'')
|
||||
error ("malformatted character constant");
|
||||
else if (num_chars == 0)
|
||||
else if (chars_seen == 0)
|
||||
error ("empty character constant");
|
||||
else if (num_chars > max_chars)
|
||||
{
|
||||
num_chars = max_chars;
|
||||
error ("character constant too long");
|
||||
}
|
||||
else if (num_chars != 1 && ! traditional)
|
||||
else if (chars_seen != 1 && ! traditional)
|
||||
warning ("multi-character character constant");
|
||||
|
||||
/* If char type is signed, sign-extend the constant. */
|
||||
if (! wide_flag)
|
||||
{
|
||||
int num_bits = num_chars * width;
|
||||
|
||||
if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
|
||||
if (num_bits == 0)
|
||||
/* We already got an error; avoid invalid shift. */
|
||||
yylval.integer.value = 0;
|
||||
else if (lookup ((U_CHAR *) "__CHAR_UNSIGNED__",
|
||||
sizeof ("__CHAR_UNSIGNED__") - 1, -1)
|
||||
|| ((result >> (num_bits - 1)) & 1) == 0)
|
||||
yylval.integer.value
|
||||
@ -716,22 +763,6 @@ yylex ()
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* Set the initial shift state and convert the next sequence. */
|
||||
result = 0;
|
||||
/* In all locales L'\0' is zero and mbtowc will return zero,
|
||||
so don't use it. */
|
||||
if (num_chars > 1
|
||||
|| (num_chars == 1 && token_buffer[0] != '\0'))
|
||||
{
|
||||
wchar_t wc;
|
||||
(void) mbtowc (NULL_PTR, NULL_PTR, 0);
|
||||
if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
|
||||
result = wc;
|
||||
else
|
||||
pedwarn ("Ignoring invalid multibyte character");
|
||||
}
|
||||
#endif
|
||||
yylval.integer.value = result;
|
||||
}
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ AC_DEFINE(ENABLE_CHECKING)
|
||||
# Enable use of cpplib for C.
|
||||
cpp_main=cccp
|
||||
AC_ARG_ENABLE(c-cpplib,
|
||||
[ --enable-c-cpplib Use cpplib for C.],
|
||||
[ --enable-c-cpplib Use cpplib for C and C++.],
|
||||
if [[[ x$enable_c_cpplib != xno ]]]; then
|
||||
extra_c_objs="${extra_c_objs} cpplib.o cppexp.o cpphash.o cpperror.o"
|
||||
extra_c_objs="${extra_c_objs} prefix.o"
|
||||
@ -93,6 +93,13 @@ if [[[ x$enable_c_cpplib != xno ]]]; then
|
||||
cpp_main=cppmain
|
||||
fi)
|
||||
|
||||
# Enable Multibyte Characters for C/C++
|
||||
AC_ARG_ENABLE(c-mbchar,
|
||||
[ --enable-c-mbchar Enable multibyte characters for C and C++.],
|
||||
if [[[ x$enable_c_mbchar != xno ]]]; then
|
||||
extra_c_flags=-DMULTIBYTE_CHARS=1
|
||||
fi)
|
||||
|
||||
# Enable Haifa scheduler.
|
||||
AC_ARG_ENABLE(haifa,
|
||||
[ --enable-haifa Use the experimental scheduler.
|
||||
@ -193,6 +200,9 @@ AC_CHECK_FUNCS(strtoul bsearch strerror putenv popen bcopy bzero bcmp \
|
||||
index rindex strchr strrchr kill getrlimit setrlimit atoll atoq \
|
||||
sysconf isascii gettimeofday)
|
||||
|
||||
# Make sure wchar_t is available
|
||||
#AC_CHECK_TYPE(wchar_t, unsigned int)
|
||||
|
||||
GCC_FUNC_VFPRINTF_DOPRNT
|
||||
GCC_FUNC_PRINTF_PTR
|
||||
|
||||
@ -3585,6 +3595,7 @@ AC_SUBST(extra_programs)
|
||||
AC_SUBST(extra_parts)
|
||||
AC_SUBST(extra_c_objs)
|
||||
AC_SUBST(extra_cxx_objs)
|
||||
AC_SUBST(extra_cpp_objs)
|
||||
AC_SUBST(extra_c_flags)
|
||||
AC_SUBST(extra_objs)
|
||||
AC_SUBST(host_extra_gcc_objs)
|
||||
|
@ -5964,8 +5964,9 @@ the language standard. You should not need to use these options yourself.
|
||||
@cindex environment variables
|
||||
|
||||
This section describes several environment variables that affect how GNU
|
||||
CC operates. They work by specifying directories or prefixes to use
|
||||
when searching for various kinds of files.
|
||||
CC operates. Some of them work by specifying directories or prefixes to use
|
||||
when searching for various kinds of files. Some are used to specify other
|
||||
ascpects of the compilation environment.
|
||||
|
||||
@ifclear INTERNALS
|
||||
Note that you can also specify places to search using options such as
|
||||
@ -6065,6 +6066,28 @@ which case the Make rules are written to that file, guessing the target
|
||||
name from the source file name. Or the value can have the form
|
||||
@samp{@var{file} @var{target}}, in which case the rules are written to
|
||||
file @var{file} using @var{target} as the target name.
|
||||
|
||||
@item LANG
|
||||
@findex LANG
|
||||
@cindex locale definition
|
||||
This variable is used to pass locale information to the compiler. One way in
|
||||
which this information is used is to determine the character set to be used
|
||||
when character literals, string literals and comments are parsed in C and C++.
|
||||
When the compiler is configured to allow multibyte characters,
|
||||
the following values for @code{LANG} are recognized:
|
||||
|
||||
@table @code
|
||||
@item C-JIS
|
||||
Recognize JIS characters.
|
||||
@item C-SJIS
|
||||
Recognize SJIS characters.
|
||||
@item C-EUCJP
|
||||
Recognize EUCJP characters.
|
||||
@end table
|
||||
|
||||
If @code{LANG} is not defined, or if it has some ther value, then the
|
||||
compiler will use mblen and mbtowc as defined by the default locale to
|
||||
recognize and translate multibyte characters.
|
||||
@end table
|
||||
|
||||
@node Running Protoize
|
||||
|
288
gcc/mbchar.c
Normal file
288
gcc/mbchar.c
Normal file
@ -0,0 +1,288 @@
|
||||
/* Multibyte Character Functions.
|
||||
Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
||||
GNU CC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU CC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU CC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* These functions are used to manipulate multibyte characters. */
|
||||
|
||||
/* Note regarding cross compilation:
|
||||
|
||||
In general translation of multibyte characters to wide characters can
|
||||
only work in a native compiler since the translation function (mbtowc)
|
||||
needs to know about both the source and target character encoding. However,
|
||||
this particular implementation for JIS, SJIS and EUCJP source characters
|
||||
will work for any compiler with a newlib target. Other targets may also
|
||||
work provided that their wchar_t implementation is 2 bytes and the encoding
|
||||
leaves the source character values unchanged (except for removing the
|
||||
state shifting markers). */
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "gansidecl.h"
|
||||
#include "mbchar.h"
|
||||
#include <locale.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
|
||||
} JIS_CHAR_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
|
||||
J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
|
||||
} JIS_STATE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
|
||||
} JIS_ACTION;
|
||||
|
||||
/*****************************************************************************
|
||||
* state/action tables for processing JIS encoding
|
||||
* Where possible, switches to JIS are grouped with proceding JIS characters
|
||||
* and switches to ASCII are grouped with preceding JIS characters.
|
||||
* Thus, maximum returned length is:
|
||||
* 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
|
||||
*****************************************************************************/
|
||||
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
|
||||
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
|
||||
/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
|
||||
/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
|
||||
/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
|
||||
/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
|
||||
/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
|
||||
/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
|
||||
/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
|
||||
/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
||||
/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
|
||||
/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
||||
};
|
||||
|
||||
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
|
||||
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
|
||||
/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
|
||||
/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
|
||||
/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
|
||||
/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
|
||||
/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
|
||||
/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
|
||||
/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
|
||||
/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
|
||||
/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
|
||||
/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
|
||||
};
|
||||
|
||||
|
||||
char *literal_codeset = NULL;
|
||||
|
||||
int
|
||||
local_mbtowc (pwc, s, n)
|
||||
wchar_t *pwc;
|
||||
const char *s;
|
||||
size_t n;
|
||||
{
|
||||
static JIS_STATE save_state = ASCII;
|
||||
JIS_STATE curr_state = save_state;
|
||||
unsigned char *t = (unsigned char *)s;
|
||||
|
||||
if (s != NULL && n == 0)
|
||||
return -1;
|
||||
|
||||
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
|
||||
{
|
||||
/* This must be the "C" locale or unknown locale -- fall thru */
|
||||
}
|
||||
else if (! strcmp (literal_codeset, "C-SJIS"))
|
||||
{
|
||||
int char1;
|
||||
if (s == NULL)
|
||||
return 0; /* not state-dependent */
|
||||
char1 = *t;
|
||||
if (ISSJIS1 (char1))
|
||||
{
|
||||
int char2 = t[1];
|
||||
if (n <= 1)
|
||||
return -1;
|
||||
if (ISSJIS2 (char2))
|
||||
{
|
||||
if (pwc != NULL)
|
||||
*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
|
||||
return 2;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (pwc != NULL)
|
||||
*pwc = (wchar_t)*t;
|
||||
if (*t == '\0')
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
else if (! strcmp (literal_codeset, "C-EUCJP"))
|
||||
{
|
||||
int char1;
|
||||
if (s == NULL)
|
||||
return 0; /* not state-dependent */
|
||||
char1 = *t;
|
||||
if (ISEUCJP (char1))
|
||||
{
|
||||
int char2 = t[1];
|
||||
if (n <= 1)
|
||||
return -1;
|
||||
if (ISEUCJP (char2))
|
||||
{
|
||||
if (pwc != NULL)
|
||||
*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
|
||||
return 2;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (pwc != NULL)
|
||||
*pwc = (wchar_t)*t;
|
||||
if (*t == '\0')
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
else if (! strcmp (literal_codeset, "C-JIS"))
|
||||
{
|
||||
JIS_ACTION action;
|
||||
JIS_CHAR_TYPE ch;
|
||||
unsigned char *ptr;
|
||||
int i, curr_ch;
|
||||
|
||||
if (s == NULL)
|
||||
{
|
||||
save_state = ASCII;
|
||||
return 1; /* state-dependent */
|
||||
}
|
||||
|
||||
ptr = t;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
curr_ch = t[i];
|
||||
switch (curr_ch)
|
||||
{
|
||||
case JIS_ESC_CHAR:
|
||||
ch = ESCAPE;
|
||||
break;
|
||||
case '$':
|
||||
ch = DOLLAR;
|
||||
break;
|
||||
case '@':
|
||||
ch = AT;
|
||||
break;
|
||||
case '(':
|
||||
ch = BRACKET;
|
||||
break;
|
||||
case 'B':
|
||||
ch = B;
|
||||
break;
|
||||
case 'J':
|
||||
ch = J;
|
||||
break;
|
||||
case '\0':
|
||||
ch = NUL;
|
||||
break;
|
||||
default:
|
||||
if (ISJIS (curr_ch))
|
||||
ch = JIS_CHAR;
|
||||
else
|
||||
ch = OTHER;
|
||||
}
|
||||
|
||||
action = JIS_action_table[curr_state][ch];
|
||||
curr_state = JIS_state_table[curr_state][ch];
|
||||
|
||||
switch (action)
|
||||
{
|
||||
case NOOP:
|
||||
break;
|
||||
case EMPTY:
|
||||
if (pwc != NULL)
|
||||
*pwc = (wchar_t)0;
|
||||
save_state = curr_state;
|
||||
return i;
|
||||
case COPYA:
|
||||
if (pwc != NULL)
|
||||
*pwc = (wchar_t)*ptr;
|
||||
save_state = curr_state;
|
||||
return (i + 1);
|
||||
case COPYJ:
|
||||
if (pwc != NULL)
|
||||
*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
|
||||
save_state = curr_state;
|
||||
return (i + 1);
|
||||
case COPYJ2:
|
||||
if (pwc != NULL)
|
||||
*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
|
||||
save_state = curr_state;
|
||||
return (ptr - t) + 2;
|
||||
case MAKE_A:
|
||||
case MAKE_J:
|
||||
ptr = (char *)(t + i + 1);
|
||||
break;
|
||||
case ERROR:
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1; /* n < bytes needed */
|
||||
}
|
||||
|
||||
#ifdef CROSS_COMPILE
|
||||
if (s == NULL)
|
||||
return 0; /* not state-dependent */
|
||||
if (pwc != NULL)
|
||||
*pwc = *s;
|
||||
return 1;
|
||||
#else
|
||||
/* This must be the "C" locale or unknown locale. */
|
||||
return mbtowc (pwc, s, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
local_mblen (s, n)
|
||||
const char *s;
|
||||
size_t n;
|
||||
{
|
||||
return local_mbtowc (NULL, s, n);
|
||||
}
|
||||
|
||||
int
|
||||
local_mb_cur_max ()
|
||||
{
|
||||
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
|
||||
;
|
||||
else if (! strcmp (literal_codeset, "C-SJIS"))
|
||||
return 2;
|
||||
else if (! strcmp (literal_codeset, "C-EUCJP"))
|
||||
return 2;
|
||||
else if (! strcmp (literal_codeset, "C-JIS"))
|
||||
return 8; /* 3 + 2 + 3 */
|
||||
|
||||
#ifdef CROSS_COMPILE
|
||||
return 1;
|
||||
#else
|
||||
return MB_CUR_MAX;
|
||||
#endif
|
||||
}
|
||||
#endif /* MULTIBYTE_CHARS */
|
25
gcc/mbchar.h
Normal file
25
gcc/mbchar.h
Normal file
@ -0,0 +1,25 @@
|
||||
/* mbchar.h - Various declarations for functions found in mbchar.c
|
||||
Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
*/
|
||||
|
||||
#ifndef __GCC_MBCHAR_H__
|
||||
#define __GCC_MBCHAR_H__
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
/* escape character used for JIS encoding */
|
||||
#define JIS_ESC_CHAR 0x1b
|
||||
|
||||
#define ISSJIS1(c) ((c) >= 0x81 && (c) <= 0x9f || (c) >= 0xe0 && (c) <= 0xef)
|
||||
#define ISSJIS2(c) ((c) >= 0x40 && (c) <= 0x7e || (c) >= 0x80 && (c) <= 0xfc)
|
||||
#define ISEUCJP(c) ((c) >= 0xa1 && (c) <= 0xfe)
|
||||
#define ISJIS(c) ((c) >= 0x21 && (c) <= 0x7e)
|
||||
|
||||
int local_mbtowc PROTO ((wchar_t *, const char *, size_t));
|
||||
int local_mblen PROTO ((const char *, size_t));
|
||||
int local_mb_cur_max PROTO ((void));
|
||||
|
||||
/* The locale being used for multibyte characters in string/char literals. */
|
||||
extern char *literal_codeset;
|
||||
#endif /* MULTIBYTE_CHARS */
|
||||
|
||||
#endif /* __GCC_MBCHAR_H__ */
|
Loading…
Reference in New Issue
Block a user