PR libgcj/9715, PR libgcj/19132:

* java/nio/charset/Charset.java (charsetForName): Try default
	provider first.
	(availableCharsets): Re-merged.
	(providers2): Likewise.
	(defaultCharset): Likewise.
	* sources.am, Makefile.in: Rebuilt.
	* gnu/java/nio/charset/Provider.java: Removed.
	* java/io/OutputStreamWriter.java
	(OutputStreamWriter(OutputStream,Charset)): New constructor.
	(OutputStreamWriter(OutputStream,CharsetEncoder)): Likewise.
	* java/io/InputStreamReader.java
	(InputStreamReader(InputStream,CharsetDecoder)): New constructor.
	(InputStreamReader(InputStream,Charset)): Likewise.
	* gnu/gcj/convert/BytesToUnicode.java (getDecoder): Try a
	BytesToCharsetAdaptor.
	* gnu/gcj/convert/UnicodeToBytes.java (getEncoder): Try a
	CharsetToBytesAdaptor.
	* gnu/gcj/convert/CharsetToBytesAdaptor.java: New file.
	* gnu/gcj/convert/BytesToCharsetAdaptor.java: New file.
	* mauve-libgcj: Remove getEncoding exclusion.

Co-Authored-By: Tom Tromey <tromey@redhat.com>

From-SVN: r109294
This commit is contained in:
David Daney 2006-01-03 22:58:31 +00:00 committed by Tom Tromey
parent 368872c315
commit 8ceb88d4cd
12 changed files with 420 additions and 186 deletions

View File

@ -1,3 +1,28 @@
2005-12-24 David Daney <ddaney@avtrex.com>
Tom Tromey <tromey@redhat.com>
PR libgcj/9715, PR libgcj/19132:
* java/nio/charset/Charset.java (charsetForName): Try default
provider first.
(availableCharsets): Re-merged.
(providers2): Likewise.
(defaultCharset): Likewise.
* sources.am, Makefile.in: Rebuilt.
* gnu/java/nio/charset/Provider.java: Removed.
* java/io/OutputStreamWriter.java
(OutputStreamWriter(OutputStream,Charset)): New constructor.
(OutputStreamWriter(OutputStream,CharsetEncoder)): Likewise.
* java/io/InputStreamReader.java
(InputStreamReader(InputStream,CharsetDecoder)): New constructor.
(InputStreamReader(InputStream,Charset)): Likewise.
* gnu/gcj/convert/BytesToUnicode.java (getDecoder): Try a
BytesToCharsetAdaptor.
* gnu/gcj/convert/UnicodeToBytes.java (getEncoder): Try a
CharsetToBytesAdaptor.
* gnu/gcj/convert/CharsetToBytesAdaptor.java: New file.
* gnu/gcj/convert/BytesToCharsetAdaptor.java: New file.
* mauve-libgcj: Remove getEncoding exclusion.
2005-12-28 Anthony Green <green@redhat.com>
* gnu/java/net/natPlainSocketImplWin32.cc (connect): Same

View File

@ -1134,7 +1134,9 @@ gnu/gcj/RawDataManaged.java
gnu_gcj_header_files = $(patsubst %.java,%.h,$(gnu_gcj_source_files))
gnu_gcj_convert_source_files = \
gnu/gcj/convert/BytesToCharsetAdaptor.java \
gnu/gcj/convert/BytesToUnicode.java \
gnu/gcj/convert/CharsetToBytesAdaptor.java \
gnu/gcj/convert/Convert.java \
gnu/gcj/convert/IOConverter.java \
gnu/gcj/convert/Input_8859_1.java \
@ -1708,7 +1710,7 @@ classpath/gnu/java/nio/charset/MacRomania.java \
classpath/gnu/java/nio/charset/MacSymbol.java \
classpath/gnu/java/nio/charset/MacThai.java \
classpath/gnu/java/nio/charset/MacTurkish.java \
gnu/java/nio/charset/Provider.java \
classpath/gnu/java/nio/charset/Provider.java \
classpath/gnu/java/nio/charset/US_ASCII.java \
classpath/gnu/java/nio/charset/UTF_16.java \
classpath/gnu/java/nio/charset/UTF_16BE.java \

View File

@ -0,0 +1,90 @@
/* Copyright (C) 2005 Free Software Foundation
This file is part of libgcj.
This software is copyrighted work licensed under the terms of the
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
details. */
package gnu.gcj.convert;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.CoderResult;
import gnu.java.nio.charset.EncodingHelper;
/**
* Adaptor class that allow any {@link Charset} to be used
* as a BytesToUnicode converter.
*/
public class BytesToCharsetAdaptor extends BytesToUnicode
{
/**
* The CharsetDecoder that does all the work.
*/
private final CharsetDecoder decoder;
/**
* ByteBuffer wrapper for this.buf.
*/
private ByteBuffer inBuf;
/**
* Create a new BytesToCharsetAdaptor for the given Charset.
*
* @param cs the Charset.
*/
public BytesToCharsetAdaptor(Charset cs)
{
this(cs.newDecoder());
}
/**
* Create a new BytesToCharsetAdaptor for the given CharsetDecoder.
*
* @param dec the CharsetDecoder.
*/
public BytesToCharsetAdaptor(CharsetDecoder dec)
{
decoder = dec;
// Use default replacments on bad input so that we don't have to
// deal with errors.
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
/**
* Return the decoder's name. The backing Charset's name is
* returned.
*
* @return The name.
*/
public String getName()
{
return EncodingHelper.getOldCanonical(decoder.charset().name());
}
public int read(char[] outbuffer, int outpos, int count)
{
if (inBuf == null || ! inBuf.hasArray() || inBuf.array() != inbuffer)
inBuf = ByteBuffer.wrap(inbuffer);
inBuf.limit(inpos + inlength);
inBuf.position(inpos);
CharBuffer outBuf = CharBuffer.wrap(outbuffer, outpos, count);
decoder.decode(inBuf, outBuf, false);
// Update this.inpos to reflect the bytes consumed.
inpos = inBuf.position();
// Return the number of characters that were written to outbuffer.
return outBuf.position() - outpos;
}
// These aren't cached.
public void done()
{
}
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1999, 2000, 2001 Free Software Foundation
/* Copyright (C) 1999, 2000, 2001, 2005 Free Software Foundation
This file is part of libgcj.
@ -8,6 +8,8 @@ details. */
package gnu.gcj.convert;
import java.nio.charset.Charset;
public abstract class BytesToUnicode extends IOConverter
{
/** Buffer to read bytes from.
@ -104,9 +106,18 @@ public abstract class BytesToUnicode extends IOConverter
try
{
// We pass the original name to iconv and let it handle
// its own aliasing.
// its own aliasing. Note that we intentionally prefer
// iconv over nio.
return new Input_iconv (encoding);
}
catch (Throwable _)
{
// Ignore, and try the next method.
}
try
{
return new BytesToCharsetAdaptor(Charset.forName(encoding));
}
catch (Throwable _)
{
throw new java.io.UnsupportedEncodingException(encoding

View File

@ -0,0 +1,150 @@
/* Copyright (C) 2005, 2006 Free Software Foundation
This file is part of libgcj.
This software is copyrighted work licensed under the terms of the
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
details. */
package gnu.gcj.convert;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.CoderResult;
import gnu.java.nio.charset.EncodingHelper;
/**
* Adaptor class that allow any {@link Charset} to be used
* as a UnicodeToBytes converter.
*/
public class CharsetToBytesAdaptor extends UnicodeToBytes
{
/**
* The CharsetEncoder that does all the work.
*/
private final CharsetEncoder encoder;
/**
* ByteBuffer wrapper for this.buf.
*/
private ByteBuffer outBuf;
/**
* True if we've told the CharsetEncoder that there are no more
* characters available.
*/
private boolean closedEncoder;
/**
* True if we're finished.
*/
private boolean finished;
/**
* Create a new CharsetToBytesAdaptor for the given Charset.
*
* @param cs The Charset.
*/
public CharsetToBytesAdaptor(Charset cs)
{
this(cs.newEncoder());
}
/**
* Create a new CharsetToBytesAdaptor for the given CharsetEncoder.
*
* @param enc The CharsetEncoder.
*/
public CharsetToBytesAdaptor(CharsetEncoder enc)
{
encoder = enc;
// Use default replacments on bad input so that we don't have to
// deal with errors.
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
/**
* Return the encoder's name. The backing Charset's name is
* returned.
*
* @return The name.
*/
public String getName()
{
return EncodingHelper.getOldCanonical(encoder.charset().name());
}
public int write (char[] inbuffer, int inpos, int inlength)
{
// Wrap the char array so it can be used by the encoder.
CharBuffer b = CharBuffer.wrap(inbuffer, inpos, inlength);
write(b);
return b.position() - inpos; // Number of chars consumed.
}
public int write (String str, int inpos, int inlength, char work)
{
// Wrap the String so it can be used by the encoder.
CharBuffer b = CharBuffer.wrap(str, inpos, inlength);
write(b);
return b.position() - inpos; // Number of chars consumed.
}
/**
* Encode as much of inBuf as will fit in buf. The number of
* chars consumed is reflected by the new position of inBuf. The
* output is put in buf and count is incremented by the number of
* bytes written.
*
* @param inBuf The input.
*/
private void write(CharBuffer inBuf)
{
// Reuse existing outBuf if it is still wrapping the same array
// it was created with.
if (outBuf == null || !outBuf.hasArray() || outBuf.array() != buf)
outBuf = ByteBuffer.wrap(buf);
// Set the current position.
outBuf.position(count);
// If we've already said that there is no more input available,
// then we simply try to flush again.
if (closedEncoder)
{
CoderResult result = encoder.flush(outBuf);
if (result == CoderResult.UNDERFLOW)
finished = true;
}
else
{
// Do the conversion. If there are no characters to write,
// then we are finished.
closedEncoder = ! inBuf.hasRemaining();
encoder.encode(inBuf, outBuf, closedEncoder);
}
// Mark the new end of buf.
count = outBuf.position();
}
/**
* Check for cached output in the converter.
*
* @return true if there is cached output that has not been
* written to buf.
*/
public boolean havePendingBytes()
{
return ! finished;
}
// These aren't cached.
public void done()
{
}
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1999, 2000, 2001, 2003 Free Software Foundation
/* Copyright (C) 1999, 2000, 2001, 2003, 2005 Free Software Foundation
This file is part of libgcj.
@ -8,6 +8,8 @@ details. */
package gnu.gcj.convert;
import java.nio.charset.Charset;
public abstract class UnicodeToBytes extends IOConverter
{
/** Buffer to emit bytes to.
@ -101,9 +103,21 @@ public abstract class UnicodeToBytes extends IOConverter
try
{
// We pass the original name to iconv and let it handle
// its own aliasing.
// its own aliasing. Note that we intentionally prefer
// iconv over nio.
return new Output_iconv (encoding);
}
catch (Throwable _)
{
// Ignore, and try the next method.
}
try
{
// Try using finding java.nio.charset.Charset and using
// the adaptor. Use the original name as Charsets have
// their own canonical names.
return new CharsetToBytesAdaptor(Charset.forName(encoding));
}
catch (Throwable _)
{
// Put the original exception in the throwable.

View File

@ -1,154 +0,0 @@
/* Provider.java --
Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.nio.charset;
import java.nio.charset.Charset;
import java.nio.charset.spi.CharsetProvider;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
/**
* Charset provider for the required charsets. Used by
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
*
* @author Jesse Rosenstock
* @author Robert Schuster (thebohemian@gmx.net)
* @see Charset
*/
public final class Provider extends CharsetProvider
{
private static Provider singleton;
static
{
synchronized (Provider.class)
{
singleton = null;
}
}
/**
* Map from charset name to charset canonical name. The strings
* are all lower-case to allow case-insensitive retrieval of
* Charset instances.
*/
private final HashMap canonicalNames;
/**
* Map from lower-case canonical name to Charset.
* TODO: We may want to use soft references. We would then need to keep
* track of the class name to regenerate the object.
*/
private final HashMap charsets;
private Provider ()
{
canonicalNames = new HashMap ();
charsets = new HashMap ();
// US-ASCII aka ISO646-US
addCharset (new US_ASCII ());
// ISO-8859-1 aka ISO-LATIN-1
addCharset (new ISO_8859_1 ());
// UTF-8
addCharset (new UTF_8 ());
// UTF-16BE
addCharset (new UTF_16BE ());
// UTF-16LE
addCharset (new UTF_16LE ());
// UTF-16
addCharset (new UTF_16 ());
}
public Iterator charsets ()
{
return Collections.unmodifiableCollection (charsets.values ())
.iterator ();
}
/**
* Returns a Charset instance by converting the given
* name to lower-case, looking up the canonical charset
* name and finally looking up the Charset with that name.
*
* <p>The lookup is therefore case-insensitive.</p>
*
* @returns The Charset having <code>charsetName</code>
* as its alias or null if no such Charset exist.
*/
public Charset charsetForName (String charsetName)
{
return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
}
/**
* Puts a Charset under its canonical name into the 'charsets' map.
* Then puts a mapping from all its alias names to the canonical name.
*
* <p>All names are converted to lower-case</p>.
*
* @param cs
*/
private void addCharset (Charset cs)
{
String canonicalName = cs.name().toLowerCase();
charsets.put (canonicalName, cs);
/* Adds a mapping between the canonical name
* itself making a lookup using that name
* no special case.
*/
canonicalNames.put(canonicalName, canonicalName);
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
}
public static synchronized Provider provider ()
{
if (singleton == null)
singleton = new Provider ();
return singleton;
}
}

View File

@ -39,6 +39,8 @@ exception statement from your version. */
package java.io;
import gnu.gcj.convert.*;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
/**
* This class reads characters from a byte input stream. The characters
@ -131,6 +133,25 @@ public class InputStreamReader extends Reader
this(in, BytesToUnicode.getDecoder(encoding_name));
}
/**
* Creates an InputStreamReader that uses a decoder of the given
* charset to decode the bytes in the InputStream into
* characters.
*/
public InputStreamReader(InputStream in, Charset charset)
{
this(in, new BytesToCharsetAdaptor(charset));
}
/**
* Creates an InputStreamReader that uses the given charset decoder
* to decode the bytes in the InputStream into characters.
*/
public InputStreamReader(InputStream in, CharsetDecoder decoder)
{
this(in, new BytesToCharsetAdaptor(decoder));
}
private InputStreamReader(InputStream in, BytesToUnicode decoder)
{
// FIXME: someone could pass in a BufferedInputStream whose buffer

View File

@ -39,6 +39,9 @@ exception statement from your version. */
package java.io;
import gnu.gcj.convert.UnicodeToBytes;
import gnu.gcj.convert.CharsetToBytesAdaptor;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
/**
* This class writes characters to an output stream that is byte oriented
@ -128,6 +131,31 @@ public class OutputStreamWriter extends Writer
this(out, UnicodeToBytes.getDefaultEncoder());
}
/**
* This method initializes a new instance of <code>OutputStreamWriter</code>
* to write to the specified stream using a given <code>Charset</code>.
*
* @param out The <code>OutputStream</code> to write to
* @param cs The <code>Charset</code> of the encoding to use
*/
public OutputStreamWriter(OutputStream out, Charset cs)
{
this(out, new CharsetToBytesAdaptor(cs));
}
/**
* This method initializes a new instance of <code>OutputStreamWriter</code>
* to write to the specified stream using a given
* <code>CharsetEncoder</code>.
*
* @param out The <code>OutputStream</code> to write to
* @param enc The <code>CharsetEncoder</code> to encode the output with
*/
public OutputStreamWriter(OutputStream out, CharsetEncoder enc)
{
this(out, new CharsetToBytesAdaptor(enc));
}
/**
* This method closes this stream, and the underlying
* <code>OutputStream</code>

View File

@ -38,6 +38,8 @@ exception statement from your version. */
package java.nio.charset;
import gnu.classpath.ServiceFactory;
import gnu.classpath.SystemProperties;
import gnu.java.nio.charset.Provider;
import java.io.BufferedReader;
@ -116,6 +118,53 @@ public abstract class Charset implements Comparable
}
}
/**
* Returns the system default charset.
*
* This may be set by the user or VM with the file.encoding
* property.
*
* @since 1.5
*/
public static Charset defaultCharset()
{
String encoding;
try
{
encoding = SystemProperties.getProperty("file.encoding");
}
catch(SecurityException e)
{
// Use fallback.
encoding = "ISO-8859-1";
}
catch(IllegalArgumentException e)
{
// Use fallback.
encoding = "ISO-8859-1";
}
try
{
return forName(encoding);
}
catch(UnsupportedCharsetException e)
{
// Ignore.
}
catch(IllegalCharsetNameException e)
{
// Ignore.
}
catch(IllegalArgumentException e)
{
// Ignore.
}
throw new IllegalStateException("Can't get default charset!");
}
public static boolean isSupported (String charsetName)
{
return charsetForName (charsetName) != null;
@ -155,7 +204,12 @@ public abstract class Charset implements Comparable
private static Charset charsetForName(String charsetName)
{
checkName (charsetName);
Charset cs = null;
// Try the default provider first
// (so we don't need to load external providers unless really necessary)
// if it is an exotic charset try loading the external providers.
Charset cs = provider().charsetForName(charsetName);
if (cs == null)
{
CharsetProvider[] providers = providers2();
for (int i = 0; i < providers.length; i++)
{
@ -163,12 +217,18 @@ public abstract class Charset implements Comparable
if (cs != null)
break;
}
}
return cs;
}
public static SortedMap availableCharsets()
{
TreeMap charsets = new TreeMap(String.CASE_INSENSITIVE_ORDER);
for (Iterator i = provider().charsets(); i.hasNext(); )
{
Charset cs = (Charset) i.next();
charsets.put(cs.name(), cs);
}
CharsetProvider[] providers = providers2();
for (int j = 0; j < providers.length; j++)
@ -206,7 +266,7 @@ public abstract class Charset implements Comparable
/**
* We need to support multiple providers, reading them from
* java.nio.charset.spi.CharsetProvider in the resource directory
* META-INF/services.
* META-INF/services. This returns the "extra" charset providers.
*/
private static CharsetProvider[] providers2()
{
@ -214,24 +274,10 @@ public abstract class Charset implements Comparable
{
try
{
Enumeration en = ClassLoader.getSystemResources
("META-INF/services/java.nio.charset.spi.CharsetProvider");
Iterator i = ServiceFactory.lookupProviders(CharsetProvider.class);
LinkedHashSet set = new LinkedHashSet();
set.add(provider());
while (en.hasMoreElements())
{
BufferedReader rdr = new BufferedReader(new InputStreamReader
(((URL) (en.nextElement())).openStream()));
while (true)
{
String s = rdr.readLine();
if (s == null)
break;
CharsetProvider p =
(CharsetProvider) ((Class.forName(s)).newInstance());
set.add(p);
}
}
while (i.hasNext())
set.add(i.next());
providers = new CharsetProvider[set.size()];
set.toArray(providers);

View File

@ -23,4 +23,3 @@ JDBC2.0
!java.awt.event.MouseEvent.modifiersEx
!org.omg.
!javax.rmi
!java.io.InputStreamReader.getEncoding

View File

@ -497,7 +497,9 @@ gnu/gcj.list: $(gnu_gcj_source_files)
gnu_gcj_convert_source_files = \
gnu/gcj/convert/BytesToCharsetAdaptor.java \
gnu/gcj/convert/BytesToUnicode.java \
gnu/gcj/convert/CharsetToBytesAdaptor.java \
gnu/gcj/convert/Convert.java \
gnu/gcj/convert/IOConverter.java \
gnu/gcj/convert/Input_8859_1.java \
@ -1350,7 +1352,7 @@ classpath/gnu/java/nio/charset/MacRomania.java \
classpath/gnu/java/nio/charset/MacSymbol.java \
classpath/gnu/java/nio/charset/MacThai.java \
classpath/gnu/java/nio/charset/MacTurkish.java \
gnu/java/nio/charset/Provider.java \
classpath/gnu/java/nio/charset/Provider.java \
classpath/gnu/java/nio/charset/US_ASCII.java \
classpath/gnu/java/nio/charset/UTF_16.java \
classpath/gnu/java/nio/charset/UTF_16BE.java \