// Character.java - Character class. /* Copyright (C) 1998, 1999 Cygnus Solutions This file is part of libgcj. This software is copyrighted work licensed under the terms of the Libgcj License. Please consult the file "LIBGCJ_LICENSE" for details. */ package java.lang; import java.io.Serializable; /** * @author Tom Tromey * @date September 10, 1998 */ /* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3 * "The Java Language Specification", ISBN 0-201-63451-1, * online API docs for JDK 1.2 beta from http://www.javasoft.com, * and The Unicode Standard Version 2.0. * Status: Believed complete and correct for JDK 1.1; 1.2 methods * unimplemented. */ public final class Character implements Serializable, Comparable { public static final char MIN_VALUE = '\u0000'; public static final char MAX_VALUE = '\uffff'; public static final int MIN_RADIX = 2; public static final int MAX_RADIX = 36; // This initialization is seemingly circular, but it is accepted // by javac, and is handled specially by gcc. public static final Class TYPE = char.class; // Space. public static final byte SPACE_SEPARATOR = 12; public static final byte LINE_SEPARATOR = 13; public static final byte PARAGRAPH_SEPARATOR = 14; // Letters. public static final byte UPPERCASE_LETTER = 1; public static final byte LOWERCASE_LETTER = 2; public static final byte TITLECASE_LETTER = 3; public static final byte MODIFIER_LETTER = 4; public static final byte OTHER_LETTER = 5; // Numbers. public static final byte DECIMAL_DIGIT_NUMBER = 9; public static final byte LETTER_NUMBER = 10; public static final byte OTHER_NUMBER = 11; // Marks. public static final byte NON_SPACING_MARK = 6; public static final byte ENCLOSING_MARK = 7; public static final byte COMBINING_SPACING_MARK = 8; // Punctuation. public static final byte DASH_PUNCTUATION = 20; public static final byte START_PUNCTUATION = 21; public static final byte END_PUNCTUATION = 22; public static final byte CONNECTOR_PUNCTUATION = 23; public static final byte OTHER_PUNCTUATION = 24; // Symbols. public static final byte MATH_SYMBOL = 25; public static final byte CURRENCY_SYMBOL = 26; public static final byte MODIFIER_SYMBOL = 27; public static final byte OTHER_SYMBOL = 28; // Format controls. public static final byte CONTROL = 15; // Note: The JCL book says that both FORMAT and PRIVATE_USE are 18. // However, FORMAT is actually 16. public static final byte FORMAT = 16; // Others. public static final byte UNASSIGNED = 0; public static final byte PRIVATE_USE = 18; public static final byte SURROGATE = 19; public Character (char ch) { value = ch; } public char charValue () { return value; } // See if a character is a digit. If so, return the corresponding // value. Otherwise return -1. private static native int digit_value (char ch); public static int digit (char ch, int radix) { if (radix < MIN_RADIX || radix > MAX_RADIX) return -1; int d = digit_value (ch); if (d == -1) { if (ch >= 'A' && ch <= 'Z') d = ch - 'A' + 10; else if (ch >= 'a' && ch <= 'z') d = ch - 'a' + 10; else return -1; } return d >= radix ? -1 : d; } public boolean equals (Object obj) { // Don't need to compare OBJ to null as instanceof will do this. if (obj instanceof Character) return value == ((Character) obj).value; return false; } public static char forDigit (int d, int rdx) { if (d < 0 || d >= rdx || rdx < MIN_RADIX || rdx > MAX_RADIX) return '\u0000'; if (d < 10) return (char) ('0' + d); // The Java Language Spec says to use lowercase, while the JCL // says to use uppercase. We go with the former. return (char) ('a' + d - 10); } public static native int getNumericValue (char ch); public static native int getType (char ch); public int hashCode () { return value; } public static boolean isDefined (char ch) { return getType (ch) != UNASSIGNED; } public static boolean isDigit (char ch) { return digit_value (ch) != -1; } // The JCL book says that the argument here is a Character. That is // wrong. public static boolean isIdentifierIgnorable (char ch) { // This information comes from the Unicode Standard. It isn't // auto-generated as it doesn't appear in the unidata table. return ((ch >= '\u0000' && ch <= '\u0008') || (ch >= '\u000e' && ch <= '\u001b') // JDK 1.2 docs say that these are ignorable. The Unicode // Standard is somewhat ambiguous on this issue. || (ch >= '\u007f' && ch <= '\u009f') || (ch >= '\u200c' && ch <= '\u200f') // JCl says 200a through 200e, but that is a typo. The // Unicode standard says the bidi controls are 202a // through 202e. || (ch >= '\u202a' && ch <= '\u202e') || (ch >= '\u206a' && ch <= '\u206f') || ch == '\ufeff'); } public static boolean isISOControl (char c) { return ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')); } public static boolean isJavaIdentifierPart (char ch) { if (isIdentifierIgnorable (ch) || isDigit (ch)) return true; int type = getType (ch); return (type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER || type == TITLECASE_LETTER || type == MODIFIER_LETTER || type == OTHER_LETTER || type == LETTER_NUMBER); } public static boolean isJavaIdentifierStart (char ch) { int type = getType (ch); return (type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER || type == TITLECASE_LETTER || type == MODIFIER_LETTER || type == OTHER_LETTER); } // Deprecated in 1.2. public static boolean isJavaLetter (char ch) { return ch == '$' || ch == '_' || isLetter (ch); } // Deprecated in 1.2. public static boolean isJavaLetterOrDigit (char ch) { return ch == '$' || ch == '_' || isLetterOrDigit (ch); } public static boolean isLetter (char ch) { int type = getType (ch); return (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER || type == TITLECASE_LETTER || type == MODIFIER_LETTER || type == OTHER_LETTER); } public static boolean isLetterOrDigit (char ch) { return isDigit (ch) || isLetter (ch); } public static native boolean isLowerCase (char ch); // Deprecated in JCL. public static boolean isSpace (char ch) { return ch == '\n' || ch == '\t' || ch == '\f' || ch == '\r' || ch == ' '; } public static native boolean isSpaceChar (char ch); public static native boolean isTitleCase (char ch); public static boolean isUnicodeIdentifierPart (char ch) { if (isIdentifierIgnorable (ch) || isDigit (ch)) return true; int type = getType (ch); return (type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER || type == TITLECASE_LETTER || type == MODIFIER_LETTER || type == OTHER_LETTER); } public static boolean isUnicodeIdentifierStart (char ch) { return isLetter (ch); } public static native boolean isUpperCase (char ch); public static boolean isWhitespace (char ch) { return ((ch >= '\u0009' && ch <= '\r') || (ch >= '\u001c' && ch <= '\u001f') || (ch != '\u00a0' && ch != '\ufeff' && isSpaceChar (ch))); } public static native char toLowerCase (char ch); public static native char toTitleCase (char ch); public static native char toUpperCase (char ch); public String toString () { return String.valueOf(value); } public int compareTo (Character anotherCharacter) { return value - anotherCharacter.value; } public int compareTo (Object o) { return compareTo ((Character) o); } // Private data. private char value; }