lex.c (java_read_char): UNGET invalid non-initial utf8 character.

h * lex.c (java_read_char): UNGET invalid non-initial utf8 character. * lex.h (UNGETC): Change misleading macro. From-SVN: r25753
1999-03-13 11:21:38 -08:00 · 1999-03-13 11:21:38 -08:00 · 45ec036eed
commit 45ec036eed
parent b452ec852d
2 changed files with 14 additions and 3 deletions
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@ -227,6 +227,7 @@ java_read_char ()
          c1 = GETC ();
 	  if ((c1 & 0xc0) == 0x80)
 	    return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+	  c = c1;
 	}
      else if ((c & 0xf0) == 0xe0)
        {
@ -237,8 +238,18 @@ java_read_char ()
 	      if ((c2 & 0xc0) == 0x80)
 	        return (unicode_t)(((c & 0xf) << 12) + 
 				   (( c1 & 0x3f) << 6) + (c2 & 0x3f));
+	      else
+		c = c2;
 	    }
+	  else
+	    c = c1;
 	}
+      /* We looked for a UTF8 multi-byte sequence (since we saw an initial
+	 byte with the high bit set), but found invalid bytes instead.
+	 If the most recent byte was Ascii (and not EOF), we should
+	 unget it, in case it was a comment terminator or other delimitor. */
+      if ((c & 0x80) == 0)
+	UNGETC (c);
      return BAD_UTF8_VALUE;
    }
 }
@ -308,7 +319,7 @@ java_read_unicode (term_context, unicode_escape_p)
 	  return (term_context ? unicode :
 		  (java_lineterminator (c) ? '\n' : unicode));
 	}
-      UNGETC (c);
+      ctxp->unget_utf8_value = c;
    }
  return (unicode_t)'\\';
 }
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@ -99,8 +99,8 @@ typedef struct _java_lc {

 #define JAVA_LINE_MAX 80

-/* Macro to read and unread chars */
-#define UNGETC(c) ctxp->unget_utf8_value = (c);
+/* Macro to read and unread bytes */
+#define UNGETC(c) ungetc(c, finput)
 #define GETC()    getc(finput)

 /* Build a location compound integer */