re PR libgcj/27171 (UTF8 PrintWriter goes haywire)

PR libgcj/27171:
	* testsuite/libjava.lang/pr27171.java: New file.
	* testsuite/libjava.lang/pr27171.out: New file.
	* gnu/gcj/convert/Output_UTF8.java (havePendingBytes): Return
	true if we've seen a high surrogate.
	(write): Handle high surrogates at the end of the stream.
	Properly emit isolated low surrogates.

From-SVN: r113013
This commit is contained in:
Tom Tromey 2006-04-17 21:41:47 +00:00 committed by Tom Tromey
parent e26303c2f7
commit dff81d4f4c
4 changed files with 57 additions and 8 deletions

View File

@ -1,3 +1,13 @@
2006-04-17 Tom Tromey <tromey@redhat.com>
PR libgcj/27171:
* testsuite/libjava.lang/pr27171.java: New file.
* testsuite/libjava.lang/pr27171.out: New file.
* gnu/gcj/convert/Output_UTF8.java (havePendingBytes): Return
true if we've seen a high surrogate.
(write): Handle high surrogates at the end of the stream.
Properly emit isolated low surrogates.
2006-04-17 Andreas Tobler <a.tobler@schweiz.ch>
* testsuite/libjava.lang/stringconst2.java: Print a stack trace in case

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1999, 2000, 2003 Free Software Foundation
/* Copyright (C) 1999, 2000, 2003, 2006 Free Software Foundation
This file is part of libgcj.
@ -36,7 +36,7 @@ public class Output_UTF8 extends UnicodeToBytes
int avail = buf.length - count;
for (;;)
{
if (avail == 0 || (inlength == 0 && bytes_todo == 0))
if (avail == 0 || (inlength == 0 && bytes_todo == 0 && hi_part == 0))
break;
// The algorithm is made more complicated because we want to write
// at least one byte in the output buffer, if there is room for
@ -61,17 +61,25 @@ public class Output_UTF8 extends UnicodeToBytes
continue;
}
// Handle a high surrogate at the end of the input stream.
if (inlength == 0 && hi_part != 0)
{
buf[count++] = (byte) (0xE0 | (hi_part >> 12));
value = hi_part;
hi_part = 0;
avail--;
bytes_todo = 2;
continue;
}
char ch = inbuffer[inpos++];
inlength--;
if ((hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF))
|| (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF))
if (hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF))
{
// If the previous character was a high surrogate, and we
// don't now have a low surrogate, we print the high
// surrogate as an isolated character. If this character
// is a low surrogate and we didn't previously see a high
// surrogate, we do the same thing.
// surrogate as an isolated character.
--inpos;
++inlength;
buf[count++] = (byte) (0xE0 | (hi_part >> 12));
@ -80,6 +88,16 @@ public class Output_UTF8 extends UnicodeToBytes
avail--;
bytes_todo = 2;
}
else if (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF)
{
// If this character is a low surrogate and we didn't
// previously see a high surrogate, we do the same thing
// as above.
buf[count++] = (byte) (0xE0 | (ch >> 12));
value = ch;
avail--;
bytes_todo = 2;
}
else if (ch < 128 && (ch != 0 || standardUTF8))
{
avail--;
@ -122,7 +140,7 @@ public class Output_UTF8 extends UnicodeToBytes
public boolean havePendingBytes()
{
return bytes_todo > 0;
return bytes_todo > 0 || hi_part != 0;
}
}

View File

@ -0,0 +1,19 @@
public class pr27171 {
public static void main(String[] args) throws Throwable {
// Isolated low surrogate.
char x = 56478; // 0xdc9e
String xs = new String(new char[] { x });
// Note that we fix a result for our implementation; but
// the JDK does something else.
System.out.println(xs.getBytes("UTF-8").length);
// isolated high surrogate -- at end of input stream
char y = 0xdaee;
String ys = new String(new char[] { y });
// Note that we fix a result for our implementation; but
// the JDK does something else.
System.out.println(ys.getBytes("UTF-8").length);
}
}

View File

@ -0,0 +1,2 @@
3
3