re GNATS gcj/33 (gcj mangles composed characters)
Fix for PR gcj/33: * jv-scan.c (help): Document --encoding. (options): Added `encoding' entry. (OPT_ENCODING): New define. (main): Handle --encoding. Include <langinfo.h> if nl_langinfo exists. * lang-options.h: Document --classpath, --CLASSPATH, --main, and --encoding. * jcf-parse.c Include <langinfo.h> if we have nl_langinfo. (parse_source_file): Correctly call java_init_lex. Added `finput' argument. Use nl_langinfo to determine default encoding. * java-tree.h (current_encoding): Declare. * parse.y (java_parser_context_restore_global): Don't restore `finput'. (java_parser_context_save_global): Don't set `finput' field. (java_pop_parser_context): Don't restore `finput'. Free old lexer if required. * lang.c (current_encoding): New global. (lang_decode_option): Recognize `-fencoding='. (finish_parse): Don't close finput. * parse.h (struct parser_ctxt): Removed `finput' and `unget_utf8_value' fields. Added `lexer' field. (java_init_lex): Fixed declaration. * lex.c (java_new_lexer): New function. (java_destroy_lexer): Likewise. (java_read_char): Added `lex' argument. Handle iconv case. (java_read_unicode): Added `lex' argument. Count backslashes in lexer structure. (java_init_lex): Added `finput' and `encoding' arguments. Set `lexer' field in ctxp. (BAD_UTF8_VALUE): Removed. (java_lex): Handle seeing UEOF in the middle of a string literal. * lex.h: Include <iconv.h> if HAVE_ICONV defined. (java_lexer): New structure. (UNGETC): Removed. (GETC): Removed. (DEFAULT_ENCODING): New define. (java_destroy_lexer): Declare. From-SVN: r36377
This commit is contained in:
parent
ee17a29049
commit
d19cbcb5e3
@ -1,3 +1,44 @@
|
||||
2000-09-12 Tom Tromey <tromey@cygnus.com>
|
||||
|
||||
Fix for PR gcj/33:
|
||||
* jv-scan.c (help): Document --encoding.
|
||||
(options): Added `encoding' entry.
|
||||
(OPT_ENCODING): New define.
|
||||
(main): Handle --encoding.
|
||||
Include <langinfo.h> if nl_langinfo exists.
|
||||
* lang-options.h: Document --classpath, --CLASSPATH, --main, and
|
||||
--encoding.
|
||||
* jcf-parse.c Include <langinfo.h> if we have nl_langinfo.
|
||||
(parse_source_file): Correctly call java_init_lex. Added `finput'
|
||||
argument. Use nl_langinfo to determine default encoding.
|
||||
* java-tree.h (current_encoding): Declare.
|
||||
* parse.y (java_parser_context_restore_global): Don't restore
|
||||
`finput'.
|
||||
(java_parser_context_save_global): Don't set `finput' field.
|
||||
(java_pop_parser_context): Don't restore `finput'. Free old lexer
|
||||
if required.
|
||||
* lang.c (current_encoding): New global.
|
||||
(lang_decode_option): Recognize `-fencoding='.
|
||||
(finish_parse): Don't close finput.
|
||||
* parse.h (struct parser_ctxt): Removed `finput' and
|
||||
`unget_utf8_value' fields. Added `lexer' field.
|
||||
(java_init_lex): Fixed declaration.
|
||||
* lex.c (java_new_lexer): New function.
|
||||
(java_destroy_lexer): Likewise.
|
||||
(java_read_char): Added `lex' argument. Handle iconv case.
|
||||
(java_read_unicode): Added `lex' argument. Count backslashes in
|
||||
lexer structure.
|
||||
(java_init_lex): Added `finput' and `encoding' arguments. Set
|
||||
`lexer' field in ctxp.
|
||||
(BAD_UTF8_VALUE): Removed.
|
||||
(java_lex): Handle seeing UEOF in the middle of a string literal.
|
||||
* lex.h: Include <iconv.h> if HAVE_ICONV defined.
|
||||
(java_lexer): New structure.
|
||||
(UNGETC): Removed.
|
||||
(GETC): Removed.
|
||||
(DEFAULT_ENCODING): New define.
|
||||
(java_destroy_lexer): Declare.
|
||||
|
||||
2000-09-12 Tom Tromey <tromey@cygnus.com>
|
||||
|
||||
Fix for PR gcj/343:
|
||||
|
@ -169,6 +169,9 @@ extern int flag_use_boehm_gc;
|
||||
object to its synchronization structure. */
|
||||
extern int flag_hash_synchronization;
|
||||
|
||||
/* Encoding used for source files. */
|
||||
extern char *current_encoding;
|
||||
|
||||
/* The Java .class file that provides main_class; the main input file. */
|
||||
extern struct JCF *current_jcf;
|
||||
|
||||
|
@ -35,6 +35,10 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */
|
||||
#include "toplev.h"
|
||||
#include "parse.h"
|
||||
|
||||
#ifdef HAVE_NL_LANGINFO
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
|
||||
/* A CONSTANT_Utf8 element is converted to an IDENTIFIER_NODE at parse time. */
|
||||
#define JPOOL_UTF(JCF, INDEX) CPOOL_UTF(&(JCF)->cpool, INDEX)
|
||||
#define JPOOL_UTF_LENGTH(JCF, INDEX) IDENTIFIER_LENGTH (JPOOL_UTF (JCF, INDEX))
|
||||
@ -83,7 +87,7 @@ static struct JCF main_jcf[1];
|
||||
static tree give_name_to_class PARAMS ((JCF *jcf, int index));
|
||||
static void parse_zip_file_entries PARAMS ((void));
|
||||
static void process_zip_dir PARAMS ((void));
|
||||
static void parse_source_file PARAMS ((tree));
|
||||
static void parse_source_file PARAMS ((tree, FILE *));
|
||||
static void jcf_parse_source PARAMS ((void));
|
||||
static int jcf_figure_file_type PARAMS ((JCF *));
|
||||
static int find_in_current_zip PARAMS ((const char *, struct JCF **));
|
||||
@ -564,6 +568,7 @@ static void
|
||||
jcf_parse_source ()
|
||||
{
|
||||
tree file;
|
||||
FILE *finput;
|
||||
|
||||
java_parser_context_save_global ();
|
||||
java_push_parser_context ();
|
||||
@ -576,7 +581,7 @@ jcf_parse_source ()
|
||||
if (!(finput = fopen (input_filename, "r")))
|
||||
fatal ("input file `%s' just disappeared - jcf_parse_source",
|
||||
input_filename);
|
||||
parse_source_file (file);
|
||||
parse_source_file (file, finput);
|
||||
if (fclose (finput))
|
||||
fatal ("can't close input file `%s' stream - jcf_parse_source",
|
||||
input_filename);
|
||||
@ -754,8 +759,9 @@ parse_class_file ()
|
||||
/* Parse a source file, as pointed by the current value of INPUT_FILENAME. */
|
||||
|
||||
static void
|
||||
parse_source_file (file)
|
||||
parse_source_file (file, finput)
|
||||
tree file;
|
||||
FILE *finput;
|
||||
{
|
||||
int save_error_count = java_error_count;
|
||||
/* Mark the file as parsed */
|
||||
@ -765,7 +771,21 @@ parse_source_file (file)
|
||||
|
||||
lang_init_source (1); /* Error msgs have no method prototypes */
|
||||
|
||||
java_init_lex (); /* Initialize the parser */
|
||||
/* There's no point in trying to find the current encoding unless we
|
||||
are going to do something intelligent with it -- hence the test
|
||||
for iconv. */
|
||||
#ifdef HAVE_ICONV
|
||||
#ifdef HAVE_NL_LANGINFO
|
||||
setlocale (LC_CTYPE, "");
|
||||
if (current_encoding == NULL)
|
||||
current_encoding = nl_langinfo (CODESET);
|
||||
#endif /* HAVE_NL_LANGINFO */
|
||||
#endif /* HAVE_ICONV */
|
||||
if (current_encoding == NULL || *current_encoding == '\0')
|
||||
current_encoding = DEFAULT_ENCODING;
|
||||
|
||||
/* Initialize the parser */
|
||||
java_init_lex (finput, current_encoding);
|
||||
java_parse_abort_on_error ();
|
||||
|
||||
java_parse (); /* Parse and build partial tree nodes. */
|
||||
@ -796,6 +816,7 @@ yyparse ()
|
||||
int several_files = 0;
|
||||
char *list = xstrdup (input_filename), *next;
|
||||
tree node, current_file_list = NULL_TREE;
|
||||
FILE *finput;
|
||||
|
||||
do
|
||||
{
|
||||
@ -901,7 +922,7 @@ yyparse ()
|
||||
case JCF_SOURCE:
|
||||
java_push_parser_context ();
|
||||
java_parser_context_save_global ();
|
||||
parse_source_file (name);
|
||||
parse_source_file (name, finput);
|
||||
java_parser_context_restore_global ();
|
||||
java_pop_parser_context (1);
|
||||
break;
|
||||
|
@ -26,6 +26,10 @@ Boston, MA 02111-1307, USA. */
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#ifdef HAVE_NL_LANGINFO
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
void fatal PARAMS ((const char *s, ...)) ATTRIBUTE_PRINTF_1 ATTRIBUTE_NORETURN;
|
||||
@ -61,6 +65,7 @@ int flag_list_filename = 0;
|
||||
|
||||
#define OPT_HELP LONG_OPT (0)
|
||||
#define OPT_VERSION LONG_OPT (1)
|
||||
#define OPT_ENCODING LONG_OPT (2)
|
||||
|
||||
static struct option options[] =
|
||||
{
|
||||
@ -69,6 +74,7 @@ static struct option options[] =
|
||||
{ "print-main", no_argument, &flag_find_main, 1 },
|
||||
{ "list-filename", no_argument, &flag_list_filename, 1 },
|
||||
{ "list-class", no_argument, &flag_dump_class, 1 },
|
||||
{ "encoding", required_argument, NULL, OPT_ENCODING },
|
||||
{ NULL, no_argument, NULL, 0 }
|
||||
};
|
||||
|
||||
@ -84,6 +90,7 @@ help ()
|
||||
{
|
||||
printf ("Usage: jv-scan [OPTION]... FILE...\n\n");
|
||||
printf ("Print useful information read from Java source files.\n\n");
|
||||
printf (" --encoding NAME Specify encoding of input file\n");
|
||||
printf (" --print-main Print name of class containing `main'\n");
|
||||
printf (" --list-class List all classes defined in file\n");
|
||||
printf (" --list-filename Print input filename when listing class names\n");
|
||||
@ -114,6 +121,7 @@ DEFUN (main, (argc, argv),
|
||||
{
|
||||
int i = 1;
|
||||
const char *output_file = NULL;
|
||||
const char *encoding = NULL;
|
||||
long ft;
|
||||
int opt;
|
||||
|
||||
@ -144,6 +152,10 @@ DEFUN (main, (argc, argv),
|
||||
version ();
|
||||
break;
|
||||
|
||||
case OPT_ENCODING:
|
||||
encoding = optarg;
|
||||
break;
|
||||
|
||||
default:
|
||||
usage ();
|
||||
break;
|
||||
@ -172,7 +184,20 @@ DEFUN (main, (argc, argv),
|
||||
input_filename = argv [i];
|
||||
if ( (finput = fopen (argv [i], "r")) )
|
||||
{
|
||||
java_init_lex ();
|
||||
/* There's no point in trying to find the current encoding
|
||||
unless we are going to do something intelligent with it
|
||||
-- hence the test for iconv. */
|
||||
#ifdef HAVE_ICONV
|
||||
#ifdef HAVE_NL_LANGINFO
|
||||
setlocale (LC_CTYPE, "");
|
||||
if (encoding == NULL)
|
||||
encoding = nl_langinfo (CODESET);
|
||||
#endif /* HAVE_NL_LANGINFO */
|
||||
#endif /* HAVE_ICONV */
|
||||
if (encoding == NULL || *encoding == '\0')
|
||||
encoding = DEFAULT_ENCODING;
|
||||
|
||||
java_init_lex (finput, encoding);
|
||||
yyparse ();
|
||||
if (ftell (out) != ft)
|
||||
fputc ('\n', out);
|
||||
|
@ -42,8 +42,10 @@ DEFINE_LANG_NAME ("Java")
|
||||
{ "-M", "Print dependencies to stdout" },
|
||||
{ "-MM", "Print dependencies to stdout" },
|
||||
#endif /* ! USE_CPPLIB */
|
||||
{ "-fclasspath", "Set class path and suppress system path" },
|
||||
{ "-fCLASSPATH", "Set class path" },
|
||||
{ "--classpath", "Set class path and suppress system path" },
|
||||
{ "--CLASSPATH", "Set class path" },
|
||||
{ "--main", "Choose class whose main method should be used" },
|
||||
{ "--encoding", "Choose input encoding (default is UTF-8)" },
|
||||
{ "-I", "Add directory to class path" },
|
||||
{ "-foutput-class-dir", "Directory where class files should be written" },
|
||||
{ "-fuse-divide-subroutine", "" },
|
||||
|
@ -121,6 +121,9 @@ int flag_hash_synchronization;
|
||||
JNI, not CNI. */
|
||||
int flag_jni = 0;
|
||||
|
||||
/* The encoding of the source file. */
|
||||
char *current_encoding = NULL;
|
||||
|
||||
/* When non zero, report the now deprecated empty statements. */
|
||||
int flag_extraneous_semicolon;
|
||||
|
||||
@ -222,6 +225,13 @@ lang_decode_option (argc, argv)
|
||||
return 1;
|
||||
}
|
||||
#undef ARG
|
||||
#define ARG "-fencoding="
|
||||
if (strncmp (p, ARG, sizeof (ARG) - 1) == 0)
|
||||
{
|
||||
current_encoding = p + sizeof (ARG) - 1;
|
||||
return 1;
|
||||
}
|
||||
#undef ARG
|
||||
|
||||
if (p[0] == '-' && p[1] == 'f')
|
||||
{
|
||||
@ -309,7 +319,9 @@ lang_decode_option (argc, argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Global open file. */
|
||||
FILE *finput;
|
||||
|
||||
const char *
|
||||
init_parse (filename)
|
||||
const char *filename;
|
||||
@ -362,6 +374,7 @@ init_parse (filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
init_lex ();
|
||||
|
||||
return filename;
|
||||
@ -370,7 +383,6 @@ init_parse (filename)
|
||||
void
|
||||
finish_parse ()
|
||||
{
|
||||
fclose (finput);
|
||||
jcf_dependency_write ();
|
||||
}
|
||||
|
||||
|
294
gcc/java/lex.c
294
gcc/java/lex.c
@ -24,15 +24,15 @@ of Sun Microsystems, Inc. in the United States and other countries.
|
||||
The Free Software Foundation is independent of Sun Microsystems, Inc. */
|
||||
|
||||
/* It defines java_lex (yylex) that reads a Java ASCII source file
|
||||
possibly containing Unicode escape sequence or utf8 encoded characters
|
||||
and returns a token for everything found but comments, white spaces
|
||||
and line terminators. When necessary, it also fills the java_lval
|
||||
(yylval) union. It's implemented to be called by a re-entrant parser
|
||||
generated by Bison.
|
||||
possibly containing Unicode escape sequence or utf8 encoded
|
||||
characters and returns a token for everything found but comments,
|
||||
white spaces and line terminators. When necessary, it also fills
|
||||
the java_lval (yylval) union. It's implemented to be called by a
|
||||
re-entrant parser generated by Bison.
|
||||
|
||||
The lexical analysis conforms to the Java grammar described in "The
|
||||
Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
|
||||
Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
|
||||
The lexical analysis conforms to the Java grammar described in "The
|
||||
Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
|
||||
Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
|
||||
|
||||
#include "keyword.h"
|
||||
|
||||
@ -55,15 +55,18 @@ static int java_letter_or_digit_p PARAMS ((unicode_t));
|
||||
static int java_parse_doc_section PARAMS ((unicode_t));
|
||||
static void java_parse_end_comment PARAMS ((unicode_t));
|
||||
static unicode_t java_get_unicode PARAMS ((void));
|
||||
static unicode_t java_read_unicode PARAMS ((int, int *));
|
||||
static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
|
||||
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
|
||||
static unicode_t java_read_char PARAMS ((void));
|
||||
static unicode_t java_read_char PARAMS ((java_lexer *));
|
||||
static void java_allocate_new_line PARAMS ((void));
|
||||
static void java_unget_unicode PARAMS ((void));
|
||||
static unicode_t java_sneak_unicode PARAMS ((void));
|
||||
java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
|
||||
|
||||
void
|
||||
java_init_lex ()
|
||||
java_init_lex (finput, encoding)
|
||||
FILE *finput;
|
||||
const char *encoding;
|
||||
{
|
||||
#ifndef JC1_LITE
|
||||
int java_lang_imported = 0;
|
||||
@ -114,9 +117,9 @@ java_init_lex ()
|
||||
ctxp->lineno = lineno = 0;
|
||||
ctxp->p_line = NULL;
|
||||
ctxp->c_line = NULL;
|
||||
ctxp->unget_utf8_value = 0;
|
||||
ctxp->minus_seen = 0;
|
||||
ctxp->java_error_flag = 0;
|
||||
ctxp->lexer = java_new_lexer (finput, encoding);
|
||||
}
|
||||
|
||||
static char *
|
||||
@ -194,59 +197,180 @@ java_allocate_new_line ()
|
||||
ctxp->c_line->white_space_only = 1;
|
||||
}
|
||||
|
||||
#define BAD_UTF8_VALUE 0xFFFE
|
||||
/* Create a new lexer object. */
|
||||
java_lexer *
|
||||
java_new_lexer (finput, encoding)
|
||||
FILE *finput;
|
||||
const char *encoding;
|
||||
{
|
||||
java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
|
||||
int enc_error = 0;
|
||||
|
||||
lex->finput = finput;
|
||||
lex->bs_count = 0;
|
||||
lex->unget_value = 0;
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
lex->handle = iconv_open ("UCS-2", encoding);
|
||||
if (lex->handle == (iconv_t) -1)
|
||||
{
|
||||
/* FIXME: we should give a nice error based on errno here. */
|
||||
enc_error = 1;
|
||||
}
|
||||
lex->first = -1;
|
||||
lex->last = -1;
|
||||
#else /* HAVE_ICONV */
|
||||
if (strcmp (encoding, DEFAULT_ENCODING))
|
||||
enc_error = 1;
|
||||
#endif /* HAVE_ICONV */
|
||||
|
||||
if (enc_error)
|
||||
fatal ("unknown encoding: `%s'", encoding);
|
||||
|
||||
return lex;
|
||||
}
|
||||
|
||||
void
|
||||
java_destroy_lexer (lex)
|
||||
java_lexer *lex;
|
||||
{
|
||||
#ifdef HAVE_ICONV
|
||||
iconv_close (lex->handle);
|
||||
#endif
|
||||
free (lex);
|
||||
}
|
||||
|
||||
static unicode_t
|
||||
java_read_char ()
|
||||
java_read_char (lex)
|
||||
java_lexer *lex;
|
||||
{
|
||||
int c;
|
||||
int c1, c2;
|
||||
|
||||
if (ctxp->unget_utf8_value)
|
||||
if (lex->unget_value)
|
||||
{
|
||||
int to_return = ctxp->unget_utf8_value;
|
||||
ctxp->unget_utf8_value = 0;
|
||||
return (to_return);
|
||||
unicode_t r = lex->unget_value;
|
||||
lex->unget_value = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
c = GETC ();
|
||||
#ifdef HAVE_ICONV
|
||||
{
|
||||
char out[2];
|
||||
size_t ir, inbytesleft, in_save, out_count;
|
||||
char *inp, *outp;
|
||||
|
||||
if (c < 128)
|
||||
return (unicode_t)c;
|
||||
if (c == EOF)
|
||||
return UEOF;
|
||||
else
|
||||
{
|
||||
if ((c & 0xe0) == 0xc0)
|
||||
{
|
||||
c1 = GETC ();
|
||||
if ((c1 & 0xc0) == 0x80)
|
||||
return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
|
||||
c = c1;
|
||||
}
|
||||
else if ((c & 0xf0) == 0xe0)
|
||||
{
|
||||
c1 = GETC ();
|
||||
if ((c1 & 0xc0) == 0x80)
|
||||
{
|
||||
c2 = GETC ();
|
||||
if ((c2 & 0xc0) == 0x80)
|
||||
return (unicode_t)(((c & 0xf) << 12) +
|
||||
(( c1 & 0x3f) << 6) + (c2 & 0x3f));
|
||||
else
|
||||
c = c2;
|
||||
}
|
||||
else
|
||||
while (1)
|
||||
{
|
||||
/* See if we need to read more data. If FIRST == 0 then the
|
||||
previous conversion attempt ended in the middle of a
|
||||
character at the end of the buffer. Otherwise we only have
|
||||
to read if the buffer is empty. */
|
||||
if (lex->first == 0 || lex->first >= lex->last)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (lex->first >= lex->last)
|
||||
{
|
||||
lex->first = 0;
|
||||
lex->last = 0;
|
||||
}
|
||||
if (feof (lex->finput))
|
||||
return UEOF;
|
||||
r = fread (&lex->buffer[lex->last], 1,
|
||||
sizeof (lex->buffer) - lex->last,
|
||||
lex->finput);
|
||||
lex->last += r;
|
||||
}
|
||||
|
||||
inbytesleft = lex->last - lex->first;
|
||||
|
||||
if (inbytesleft == 0)
|
||||
{
|
||||
/* We've tried to read and there is nothing left. */
|
||||
return UEOF;
|
||||
}
|
||||
|
||||
in_save = inbytesleft;
|
||||
out_count = 2;
|
||||
inp = &lex->buffer[lex->first];
|
||||
outp = out;
|
||||
ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
|
||||
&outp, &out_count);
|
||||
lex->first += in_save - inbytesleft;
|
||||
|
||||
if (out_count == 0)
|
||||
{
|
||||
/* Success. We assume that UCS-2 is big-endian. This
|
||||
appears to be an ok assumption. */
|
||||
unicode_t result;
|
||||
result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
|
||||
return result;
|
||||
}
|
||||
|
||||
if (ir == (size_t) -1)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
{
|
||||
/* This is ok. This means that the end of our buffer
|
||||
is in the middle of a character sequence. We just
|
||||
move the valid part of the buffer to the beginning
|
||||
to force a read. */
|
||||
/* We use bcopy() because it should work for
|
||||
overlapping strings. Use memmove() instead... */
|
||||
bcopy (&lex->buffer[lex->first], &lex->buffer[0],
|
||||
lex->last - lex->first);
|
||||
lex->last -= lex->first;
|
||||
lex->first = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A more serious error. */
|
||||
java_lex_error ("unrecognized character in input stream", 0);
|
||||
return UEOF;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else /* HAVE_ICONV */
|
||||
{
|
||||
int c, c1, c2;
|
||||
c = getc (lex->finput);
|
||||
|
||||
if (c < 128)
|
||||
return (unicode_t)c;
|
||||
if (c == EOF)
|
||||
return UEOF;
|
||||
else
|
||||
{
|
||||
if ((c & 0xe0) == 0xc0)
|
||||
{
|
||||
c1 = getc (lex->finput);
|
||||
if ((c1 & 0xc0) == 0x80)
|
||||
return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
|
||||
c = c1;
|
||||
}
|
||||
/* We looked for a UTF8 multi-byte sequence (since we saw an initial
|
||||
byte with the high bit set), but found invalid bytes instead.
|
||||
If the most recent byte was Ascii (and not EOF), we should
|
||||
unget it, in case it was a comment terminator or other delimitor. */
|
||||
if ((c & 0x80) == 0)
|
||||
UNGETC (c);
|
||||
return BAD_UTF8_VALUE;
|
||||
}
|
||||
}
|
||||
else if ((c & 0xf0) == 0xe0)
|
||||
{
|
||||
c1 = getc (lex->finput);
|
||||
if ((c1 & 0xc0) == 0x80)
|
||||
{
|
||||
c2 = getc (lex->finput);
|
||||
if ((c2 & 0xc0) == 0x80)
|
||||
return (unicode_t)(((c & 0xf) << 12) +
|
||||
(( c1 & 0x3f) << 6) + (c2 & 0x3f));
|
||||
else
|
||||
c = c2;
|
||||
}
|
||||
else
|
||||
c = c1;
|
||||
}
|
||||
|
||||
/* We simply don't support invalid characters. */
|
||||
java_lex_error ("malformed UTF-8 character", 0);
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_ICONV */
|
||||
|
||||
/* We only get here on error. */
|
||||
return UEOF;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -267,56 +391,54 @@ java_store_unicode (l, c, unicode_escape_p)
|
||||
}
|
||||
|
||||
static unicode_t
|
||||
java_read_unicode (term_context, unicode_escape_p)
|
||||
int term_context;
|
||||
int *unicode_escape_p;
|
||||
java_read_unicode (lex, term_context, unicode_escape_p)
|
||||
java_lexer *lex;
|
||||
int term_context;
|
||||
int *unicode_escape_p;
|
||||
{
|
||||
unicode_t c;
|
||||
long i, base;
|
||||
|
||||
c = java_read_char ();
|
||||
c = java_read_char (lex);
|
||||
*unicode_escape_p = 0;
|
||||
|
||||
if (c != '\\')
|
||||
return ((term_context ? c :
|
||||
java_lineterminator (c) ? '\n' : (unicode_t)c));
|
||||
|
||||
/* Count the number of preceeding '\' */
|
||||
for (base = ftell (finput), i = base-2; c == '\\';)
|
||||
{
|
||||
fseek (finput, i--, SEEK_SET);
|
||||
c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
|
||||
}
|
||||
fseek (finput, base, SEEK_SET);
|
||||
if ((base-i-3)%2 == 0) /* If odd number of \ seen */
|
||||
{
|
||||
c = java_read_char ();
|
||||
lex->bs_count = 0;
|
||||
return (term_context ? c : (java_lineterminator (c)
|
||||
? '\n'
|
||||
: (unicode_t) c));
|
||||
}
|
||||
|
||||
++lex->bs_count;
|
||||
if ((lex->bs_count) % 2 == 1)
|
||||
{
|
||||
/* Odd number of \ seen. */
|
||||
c = java_read_char (lex);
|
||||
if (c == 'u')
|
||||
{
|
||||
unsigned short unicode = 0;
|
||||
unicode_t unicode = 0;
|
||||
int shift = 12;
|
||||
/* Next should be 4 hex digits, otherwise it's an error.
|
||||
The hex value is converted into the unicode, pushed into
|
||||
the Unicode stream. */
|
||||
for (shift = 12; shift >= 0; shift -= 4)
|
||||
{
|
||||
if ((c = java_read_char ()) == UEOF)
|
||||
if ((c = java_read_char (lex)) == UEOF)
|
||||
return UEOF;
|
||||
if (c >= '0' && c <= '9')
|
||||
unicode |= (unicode_t)((c-'0') << shift);
|
||||
else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
|
||||
unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
|
||||
else
|
||||
java_lex_error
|
||||
("Non hex digit in Unicode escape sequence", 0);
|
||||
java_lex_error ("Non hex digit in Unicode escape sequence", 0);
|
||||
}
|
||||
*unicode_escape_p = 1;
|
||||
return (term_context ? unicode :
|
||||
(java_lineterminator (c) ? '\n' : unicode));
|
||||
return (term_context
|
||||
? unicode : (java_lineterminator (c) ? '\n' : unicode));
|
||||
}
|
||||
ctxp->unget_utf8_value = c;
|
||||
lex->unget_value = c;
|
||||
}
|
||||
return (unicode_t)'\\';
|
||||
return (unicode_t) '\\';
|
||||
}
|
||||
|
||||
static unicode_t
|
||||
@ -331,7 +453,7 @@ java_get_unicode ()
|
||||
for (;;)
|
||||
{
|
||||
int unicode_escape_p;
|
||||
c = java_read_unicode (0, &unicode_escape_p);
|
||||
c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
|
||||
java_store_unicode (ctxp->c_line, c, unicode_escape_p);
|
||||
if (ctxp->c_line->white_space_only
|
||||
&& !JAVA_WHITE_SPACE_P (c) && c!='\n')
|
||||
@ -354,7 +476,7 @@ java_lineterminator (c)
|
||||
else if (c == '\r') /* CR */
|
||||
{
|
||||
int unicode_escape_p;
|
||||
c = java_read_unicode (1, &unicode_escape_p);
|
||||
c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
|
||||
if (c == '\r')
|
||||
{
|
||||
/* In this case we will have another terminator. For some
|
||||
@ -363,7 +485,7 @@ java_lineterminator (c)
|
||||
up in the actual text of the line, causing an error. So
|
||||
instead we choose a very low-level method. FIXME: this
|
||||
is incredibly ugly. */
|
||||
UNGETC (c);
|
||||
ctxp->lexer->unget_value = c;
|
||||
}
|
||||
else if (c != '\n')
|
||||
{
|
||||
@ -939,7 +1061,7 @@ java_lex (java_lval)
|
||||
char *string;
|
||||
|
||||
for (no_error = 1, c = java_get_unicode ();
|
||||
c != '"' && c != '\n'; c = java_get_unicode ())
|
||||
c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
|
||||
{
|
||||
if (c == '\\')
|
||||
c = java_parse_escape_sequence ();
|
||||
|
@ -35,6 +35,13 @@ extern int lineno;
|
||||
/* A Unicode character, as read from the input file */
|
||||
typedef unsigned short unicode_t;
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
#include <iconv.h>
|
||||
#endif /* HAVE_ICONV */
|
||||
|
||||
/* Default encoding to use if no encoding is specified. */
|
||||
#define DEFAULT_ENCODING "UTF-8"
|
||||
|
||||
/* Debug macro to print-out what we match */
|
||||
#ifdef JAVA_LEX_DEBUG
|
||||
#ifdef JAVA_LEX_DEBUG_CHAR
|
||||
@ -96,13 +103,39 @@ typedef struct _java_lc {
|
||||
int col;
|
||||
} java_lc;
|
||||
|
||||
typedef struct java_lexer
|
||||
{
|
||||
/* The file from which we're reading. */
|
||||
FILE *finput;
|
||||
|
||||
/* Number of consecutive backslashes we've read. */
|
||||
int bs_count;
|
||||
|
||||
/* If nonzero, a value that was pushed back. */
|
||||
unicode_t unget_value;
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
/* The handle for the iconv converter we're using. */
|
||||
iconv_t handle;
|
||||
|
||||
/* Bytes we've read from the file but have not sent to iconv. */
|
||||
char buffer[1024];
|
||||
|
||||
/* Index of first valid character in buffer, -1 if no valid
|
||||
characters. */
|
||||
int first;
|
||||
|
||||
/* Index of last valid character in buffer, plus one. -1 if no
|
||||
valid characters in buffer. */
|
||||
int last;
|
||||
#endif /* HAVE_ICONV */
|
||||
} java_lexer;
|
||||
|
||||
/* Destroy a lexer object. */
|
||||
extern void java_destroy_lexer PARAMS ((java_lexer *));
|
||||
|
||||
#define JAVA_LINE_MAX 80
|
||||
|
||||
/* Macro to read and unread bytes */
|
||||
#define UNGETC(c) ungetc(c, finput)
|
||||
#define GETC() getc(finput)
|
||||
|
||||
/* Build a location compound integer */
|
||||
#define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
|
||||
|
||||
|
@ -728,13 +728,12 @@ typedef struct _jdeplist {
|
||||
struct parser_ctxt {
|
||||
|
||||
const char *filename; /* Current filename */
|
||||
FILE *finput; /* Current file input stream */
|
||||
struct parser_ctxt *next;
|
||||
|
||||
java_lexer *lexer; /* Current lexer state */
|
||||
char marker_begining; /* Marker. Should be a sub-struct */
|
||||
struct java_line *p_line, *c_line; /* Previous and current line */
|
||||
java_lc elc; /* Error's line column info */
|
||||
unicode_t unget_utf8_value; /* An unget utf8 value */
|
||||
int ccb_indent; /* Keep track of {} indent, lexer */
|
||||
int first_ccb_indent1; /* First { at ident level 1 */
|
||||
int last_ccb_indent1; /* Last } at ident level 1 */
|
||||
@ -928,7 +927,7 @@ extern void reset_report PARAMS ((void));
|
||||
/* Always in use, no matter what you compile */
|
||||
void java_push_parser_context PARAMS ((void));
|
||||
void java_pop_parser_context PARAMS ((int));
|
||||
void java_init_lex PARAMS ((void));
|
||||
void java_init_lex PARAMS ((FILE *, const char *));
|
||||
extern void java_parser_context_save_global PARAMS ((void));
|
||||
extern void java_parser_context_restore_global PARAMS ((void));
|
||||
int yyparse PARAMS ((void));
|
||||
|
@ -2618,10 +2618,13 @@ java_pop_parser_context (generate)
|
||||
next->incomplete_class = ctxp->incomplete_class;
|
||||
next->gclass_list = ctxp->gclass_list;
|
||||
lineno = ctxp->lineno;
|
||||
finput = ctxp->finput;
|
||||
current_class = ctxp->current_class;
|
||||
}
|
||||
|
||||
/* If the old and new lexers differ, then free the old one. */
|
||||
if (ctxp->lexer && next && ctxp->lexer != next->lexer)
|
||||
java_destroy_lexer (ctxp->lexer);
|
||||
|
||||
/* Set the single import class file flag to 0 for the current list
|
||||
of imported things */
|
||||
for (current = ctxp->import_list; current; current = TREE_CHAIN (current))
|
||||
@ -2661,7 +2664,6 @@ java_parser_context_save_global ()
|
||||
else if (ctxp->saved_data)
|
||||
create_new_parser_context (1);
|
||||
|
||||
ctxp->finput = finput;
|
||||
ctxp->lineno = lineno;
|
||||
ctxp->current_class = current_class;
|
||||
ctxp->filename = input_filename;
|
||||
@ -2675,7 +2677,6 @@ java_parser_context_save_global ()
|
||||
void
|
||||
java_parser_context_restore_global ()
|
||||
{
|
||||
finput = ctxp->finput;
|
||||
lineno = ctxp->lineno;
|
||||
current_class = ctxp->current_class;
|
||||
input_filename = ctxp->filename;
|
||||
|
Loading…
Reference in New Issue
Block a user