unicode-decomp.pl: Move from chartables.pl...
2002-03-06 Eric Blake <ebb9@email.byu.edu> * scripts/unicode-decomp.pl: Move from chartables.pl, and remove the code for generating include/java-chartables.h. * scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and merge with Classpath. * scripts/unicode-muncher.pl: Copy from Classpath. * scritps/MakeCharTables.java: New file. * gnu/gcj/convert/Blocks-3.txt: New file. * gnu/gcj/convert/UnicodeData-3.0.0.txt: New file. * gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file. * gnu/java/lang/CharData.java: Copy from Classpath. * Makefile.am (ordinary_java_source_files): Add gnu/java/lang/CharData.java. * configure.in: Remove --enable-fast-character option. * java/lang/Character.java: Merge algorithms and Javadoc with Classpath. * java/lang/natCharacter.cc: Implement Unicode lookup table more efficiently. * include/java-chardecomp.h: Regenerate. * include/java-chartables.h: Regenerate. From-SVN: r50370
This commit is contained in:
parent
90681dec69
commit
74b1875a09
|
@ -1,3 +1,25 @@
|
|||
2002-03-06 Eric Blake <ebb9@email.byu.edu>
|
||||
|
||||
* scripts/unicode-decomp.pl: Move from chartables.pl, and remove
|
||||
the code for generating include/java-chartables.h.
|
||||
* scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and
|
||||
merge with Classpath.
|
||||
* scripts/unicode-muncher.pl: Copy from Classpath.
|
||||
* scritps/MakeCharTables.java: New file.
|
||||
* gnu/gcj/convert/Blocks-3.txt: New file.
|
||||
* gnu/gcj/convert/UnicodeData-3.0.0.txt: New file.
|
||||
* gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file.
|
||||
* gnu/java/lang/CharData.java: Copy from Classpath.
|
||||
* Makefile.am (ordinary_java_source_files): Add
|
||||
gnu/java/lang/CharData.java.
|
||||
* configure.in: Remove --enable-fast-character option.
|
||||
* java/lang/Character.java: Merge algorithms and Javadoc with
|
||||
Classpath.
|
||||
* java/lang/natCharacter.cc: Implement Unicode lookup table more
|
||||
efficiently.
|
||||
* include/java-chardecomp.h: Regenerate.
|
||||
* include/java-chartables.h: Regenerate.
|
||||
|
||||
2002-03-06 Bryce McKinlay <bryce@waitaki.otago.ac.nz>
|
||||
|
||||
* java/awt/MediaTracker.java: Implemented.
|
||||
|
|
|
@ -1288,6 +1288,7 @@ gnu/java/io/NullOutputStream.java \
|
|||
gnu/java/io/ObjectIdentityWrapper.java \
|
||||
gnu/java/lang/ArrayHelper.java \
|
||||
gnu/java/lang/ClassHelper.java \
|
||||
gnu/java/lang/CharData.java \
|
||||
gnu/java/lang/reflect/TypeSignature.java \
|
||||
gnu/java/locale/Calendar.java \
|
||||
gnu/java/locale/Calendar_de.java \
|
||||
|
|
|
@ -1,965 +0,0 @@
|
|||
# chartables.pl - A perl program to generate tables for use by the
|
||||
# Character class.
|
||||
|
||||
# Copyright (C) 1998, 1999 Red Hat, Inc.
|
||||
#
|
||||
# This file is part of libjava.
|
||||
#
|
||||
# This software is copyrighted work licensed under the terms of the
|
||||
# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
|
||||
# details.
|
||||
|
||||
# This program requires a `unidata.txt' file of the form distributed
|
||||
# on the Unicode 2.0 CD ROM. Or, get it more conveniently here:
|
||||
# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
|
||||
# Version `2.1.8' of this file was last used to update the Character class.
|
||||
|
||||
# Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
|
||||
# "The Java Language Specification", ISBN 0-201-63451-1
|
||||
# plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
|
||||
|
||||
# Usage: perl chartables.pl [-n] UnicodeData-VERSION.txt
|
||||
# If this exits with nonzero status, then you must investigate the
|
||||
# cause of the problem.
|
||||
# Diagnostics and other information to stderr.
|
||||
# This creates the new include/java-chartables.h and
|
||||
# include/java-chardecomp.h files directly.
|
||||
# With -n, the files are not created, but all processing
|
||||
# still occurs.
|
||||
|
||||
# Fields in the table.
|
||||
$CODE = 0;
|
||||
$NAME = 1;
|
||||
$CATEGORY = 2;
|
||||
$DECOMPOSITION = 5;
|
||||
$DECIMAL = 6;
|
||||
$DIGIT = 7;
|
||||
$NUMERIC = 8;
|
||||
$UPPERCASE = 12;
|
||||
$LOWERCASE = 13;
|
||||
$TITLECASE = 14;
|
||||
|
||||
# A special case.
|
||||
$TAMIL_DIGIT_ONE = 0x0be7;
|
||||
$TAMIL_DIGIT_NINE = 0x0bef;
|
||||
|
||||
# These are endpoints of legitimate gaps in the tables.
|
||||
$CJK_IDEOGRAPH_END = 0x9fa5;
|
||||
$HANGUL_END = 0xd7a3;
|
||||
$HIGH_SURROGATE_END = 0xdb7f;
|
||||
$PRIVATE_HIGH_SURROGATE_END = 0xdbff;
|
||||
$LOW_SURROGATE_END = 0xdfff;
|
||||
$PRIVATE_END = 0xf8ff;
|
||||
|
||||
%title_to_upper = ();
|
||||
%title_to_lower = ();
|
||||
%numerics = ();
|
||||
%name = ();
|
||||
|
||||
@digit_start = ();
|
||||
@digit_end = ();
|
||||
|
||||
@space_start = ();
|
||||
@space_end = ();
|
||||
|
||||
# @letter_start = ();
|
||||
# @letter_end = ();
|
||||
|
||||
@all_start = ();
|
||||
@all_end = ();
|
||||
@all_cats = ();
|
||||
|
||||
@upper_start = ();
|
||||
@upper_end = ();
|
||||
@upper_map = ();
|
||||
%upper_anom = ();
|
||||
|
||||
@lower_start = ();
|
||||
@lower_end = ();
|
||||
@lower_map = ();
|
||||
%lower_anom = ();
|
||||
|
||||
@attributes = ();
|
||||
|
||||
# There are a few characters which actually need two attributes.
|
||||
# These are special-cased.
|
||||
$ROMAN_START = 0x2160;
|
||||
$ROMAN_END = 0x217f;
|
||||
%second_attributes = ();
|
||||
|
||||
$prevcode = -1;
|
||||
$status = 0;
|
||||
|
||||
%category_map =
|
||||
(
|
||||
'Mn' => 'NON_SPACING_MARK',
|
||||
'Mc' => 'COMBINING_SPACING_MARK',
|
||||
'Me' => 'ENCLOSING_MARK',
|
||||
'Nd' => 'DECIMAL_DIGIT_NUMBER',
|
||||
'Nl' => 'LETTER_NUMBER',
|
||||
'No' => 'OTHER_NUMBER',
|
||||
'Zs' => 'SPACE_SEPARATOR',
|
||||
'Zl' => 'LINE_SEPARATOR',
|
||||
'Zp' => 'PARAGRAPH_SEPARATOR',
|
||||
'Cc' => 'CONTROL',
|
||||
'Cf' => 'FORMAT',
|
||||
'Cs' => 'SURROGATE',
|
||||
'Co' => 'PRIVATE_USE',
|
||||
'Cn' => 'UNASSIGNED',
|
||||
'Lu' => 'UPPERCASE_LETTER',
|
||||
'Ll' => 'LOWERCASE_LETTER',
|
||||
'Lt' => 'TITLECASE_LETTER',
|
||||
'Lm' => 'MODIFIER_LETTER',
|
||||
'Lo' => 'OTHER_LETTER',
|
||||
'Pc' => 'CONNECTOR_PUNCTUATION',
|
||||
'Pd' => 'DASH_PUNCTUATION',
|
||||
'Ps' => 'START_PUNCTUATION',
|
||||
'Pe' => 'END_PUNCTUATION',
|
||||
'Pi' => 'START_PUNCTUATION',
|
||||
'Pf' => 'END_PUNCTUATION',
|
||||
'Po' => 'OTHER_PUNCTUATION',
|
||||
'Sm' => 'MATH_SYMBOL',
|
||||
'Sc' => 'CURRENCY_SYMBOL',
|
||||
'Sk' => 'MODIFIER_SYMBOL',
|
||||
'So' => 'OTHER_SYMBOL'
|
||||
);
|
||||
|
||||
# These maps characters to their decompositions.
|
||||
%canonical_decomposition = ();
|
||||
%full_decomposition = ();
|
||||
|
||||
|
||||
# Handle `-n' and open output files.
|
||||
local ($f1, $f2) = ('include/java-chartables.h',
|
||||
'include/java-chardecomp.h');
|
||||
if ($ARGV[0] eq '-n')
|
||||
{
|
||||
shift @ARGV;
|
||||
$f1 = '/dev/null';
|
||||
$f2 = '/dev/null';
|
||||
}
|
||||
|
||||
open (CHARTABLE, "> $f1");
|
||||
open (DECOMP, "> $f2");
|
||||
|
||||
# Process the Unicode file.
|
||||
while (<>)
|
||||
{
|
||||
chop;
|
||||
# Specify a limit for split so that we pick up trailing fields.
|
||||
# We make the limit larger than we need, to catch the case where
|
||||
# there are extra fields.
|
||||
@fields = split (';', $_, 30);
|
||||
# Convert code to number.
|
||||
$ncode = hex ($fields[$CODE]);
|
||||
|
||||
if ($#fields != 14)
|
||||
{
|
||||
print STDERR ("Entry for \\u", $fields[$CODE],
|
||||
" has wrong number of fields: ", $#fields, "\n");
|
||||
}
|
||||
|
||||
$name{$fields[$CODE]} = $fields[$NAME];
|
||||
|
||||
# If we've found a gap in the table, fill it in.
|
||||
if ($ncode != $prevcode + 1)
|
||||
{
|
||||
&process_gap (*fields, $prevcode, $ncode);
|
||||
}
|
||||
|
||||
&process_char (*fields, $ncode);
|
||||
|
||||
$prevcode = $ncode;
|
||||
}
|
||||
|
||||
if ($prevcode != 0xffff)
|
||||
{
|
||||
# Setting of `fields' parameter doesn't matter here.
|
||||
&process_gap (*fields, $prevcode, 0x10000);
|
||||
}
|
||||
|
||||
print CHARTABLE "// java-chartables.h - Character tables for java.lang.Character -*- c++ -*-\n\n";
|
||||
print CHARTABLE "#ifndef __JAVA_CHARTABLES_H__\n";
|
||||
print CHARTABLE "#define __JAVA_CHARTABLES_H__\n\n";
|
||||
print CHARTABLE "// These tables are automatically generated by the chartables.pl\n";
|
||||
print CHARTABLE "// script. DO NOT EDIT the tables. Instead, fix the script\n";
|
||||
print CHARTABLE "// and run it again.\n\n";
|
||||
print CHARTABLE "// This file should only be included by natCharacter.cc\n\n";
|
||||
|
||||
|
||||
$bytes = 0;
|
||||
|
||||
# Titlecase mapping tables.
|
||||
if ($#title_to_lower != $#title_to_upper)
|
||||
{
|
||||
# If this fails we need to reimplement toTitleCase.
|
||||
print STDERR "titlecase mappings have different sizes\n";
|
||||
$status = 1;
|
||||
}
|
||||
# Also ensure that the tables are entirely parallel.
|
||||
foreach $key (sort keys %title_to_lower)
|
||||
{
|
||||
if (! defined $title_to_upper{$key})
|
||||
{
|
||||
print STDERR "titlecase mappings have different entries\n";
|
||||
$status = 1;
|
||||
}
|
||||
}
|
||||
&print_single_map ("title_to_lower_table", %title_to_lower);
|
||||
&print_single_map ("title_to_upper_table", %title_to_upper);
|
||||
|
||||
print CHARTABLE "#ifdef COMPACT_CHARACTER\n\n";
|
||||
|
||||
printf CHARTABLE "#define TAMIL_DIGIT_ONE 0x%04x\n\n", $TAMIL_DIGIT_ONE;
|
||||
|
||||
# All numeric values.
|
||||
&print_numerics;
|
||||
|
||||
# Digits only.
|
||||
&print_block ("digit_table", *digit_start, *digit_end);
|
||||
|
||||
# Space characters.
|
||||
&print_block ("space_table", *space_start, *space_end);
|
||||
|
||||
# Letters. We used to generate a separate letter table. But this
|
||||
# doesn't really seem worthwhile. Simply using `all_table' saves us
|
||||
# about 800 bytes, and only adds 3 table probes to isLetter.
|
||||
# &print_block ("letter_table", *letter_start, *letter_end);
|
||||
|
||||
# Case tables.
|
||||
&print_case_table ("upper", *upper_start, *upper_end, *upper_map, *upper_anom);
|
||||
&print_case_table ("lower", *lower_start, *lower_end, *lower_map, *lower_anom);
|
||||
|
||||
# Everything else.
|
||||
&print_all_block (*all_start, *all_end, *all_cats);
|
||||
|
||||
print CHARTABLE "#else /* COMPACT_CHARACTER */\n\n";
|
||||
|
||||
printf CHARTABLE "#define ROMAN_START 0x%04x\n", $ROMAN_START;
|
||||
printf CHARTABLE "#define ROMAN_END 0x%04x\n\n", $ROMAN_END;
|
||||
|
||||
&print_fast_tables (*all_start, *all_end, *all_cats,
|
||||
*attributes, *second_attributes);
|
||||
|
||||
print CHARTABLE "#endif /* COMPACT_CHARACTER */\n\n";
|
||||
|
||||
print CHARTABLE "#endif /* __JAVA_CHARTABLES_H__ */\n";
|
||||
|
||||
printf STDERR "Approximately %d bytes of data generated (compact case)\n",
|
||||
$bytes;
|
||||
|
||||
|
||||
# Now generate decomposition tables.
|
||||
printf DECOMP "// java-chardecomp.h - Decomposition character tables -*- c++ -*-\n\n";
|
||||
printf DECOMP "#ifndef __JAVA_CHARDECOMP_H__\n";
|
||||
printf DECOMP "#define __JAVA_CHARDECOMP_H__\n\n";
|
||||
print DECOMP "// These tables are automatically generated by the chartables.pl\n";
|
||||
print DECOMP "// script. DO NOT EDIT the tables. Instead, fix the script\n";
|
||||
print DECOMP "// and run it again.\n\n";
|
||||
print DECOMP "// This file should only be included by natCollator.cc\n\n";
|
||||
|
||||
print DECOMP "struct decomp_entry\n{\n";
|
||||
print DECOMP " jchar key;\n";
|
||||
print DECOMP " const char *value;\n";
|
||||
print DECOMP "};\n\n";
|
||||
|
||||
&write_decompositions;
|
||||
|
||||
printf DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
|
||||
|
||||
|
||||
close (CHARTABLE);
|
||||
close (DECOMP);
|
||||
|
||||
exit $status;
|
||||
|
||||
|
||||
# Process a gap in the space.
|
||||
sub process_gap
|
||||
{
|
||||
local (*fields, $prevcode, $ncode) = @_;
|
||||
local (@gap_fields, $i);
|
||||
|
||||
if ($ncode == $CJK_IDEOGRAPH_END
|
||||
|| $ncode == $HANGUL_END
|
||||
|| $ncode == $HIGH_SURROGATE_END
|
||||
|| $ncode == $PRIVATE_HIGH_SURROGATE_END
|
||||
|| $ncode == $LOW_SURROGATE_END
|
||||
|| $ncode == $PRIVATE_END)
|
||||
{
|
||||
# The characters in the gap we just found are known to
|
||||
# have the same properties as the character at the end of
|
||||
# the gap.
|
||||
@gap_fields = @fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
# This prints too much to be enabled.
|
||||
# print STDERR "Gap found at \\u", $fields[$CODE], "\n";
|
||||
@gap_fields = ('', '', 'Cn', '', '', '', '', '', '', '', '',
|
||||
'', '', '', '');
|
||||
}
|
||||
|
||||
for ($i = $prevcode + 1; $i < $ncode; ++$i)
|
||||
{
|
||||
$gap_fields[$CODE] = sprintf ("%04x", $i);
|
||||
$gap_fields[$NAME] = "CHARACTER " . $gap_fields[$CODE];
|
||||
&process_char (*gap_fields, $i);
|
||||
}
|
||||
}
|
||||
|
||||
# Process a single character.
|
||||
sub process_char
|
||||
{
|
||||
local (*fields, $ncode) = @_;
|
||||
|
||||
if ($fields[$DECOMPOSITION] ne '')
|
||||
{
|
||||
&add_decomposition ($ncode, $fields[$DECOMPOSITION]);
|
||||
}
|
||||
|
||||
# If this is a titlecase character, mark it.
|
||||
if ($fields[$CATEGORY] eq 'Lt')
|
||||
{
|
||||
$title_to_upper{$fields[$CODE]} = $fields[$UPPERCASE];
|
||||
$title_to_lower{$fields[$CODE]} = $fields[$LOWERCASE];
|
||||
}
|
||||
else
|
||||
{
|
||||
# For upper and lower case mappings, we try to build compact
|
||||
# tables that map range onto range. We specifically want to
|
||||
# avoid titlecase characters. Java specifies a range check to
|
||||
# make sure the character is not between 0x2000 and 0x2fff.
|
||||
# We avoid that here because we need to generate table entries
|
||||
# -- toLower and toUpper still work in that range.
|
||||
if ($fields[$UPPERCASE] eq ''
|
||||
&& ($fields[$LOWERCASE] ne ''
|
||||
|| $fields[$NAME] =~ /CAPITAL (LETTER|LIGATURE)/))
|
||||
{
|
||||
if ($fields[$LOWERCASE] ne '')
|
||||
{
|
||||
&update_case_block (*upper_start, *upper_end, *upper_map,
|
||||
$fields[$CODE], $fields[$LOWERCASE]);
|
||||
&set_attribute ($ncode, hex ($fields[$LOWERCASE]));
|
||||
}
|
||||
else
|
||||
{
|
||||
$upper_anom{$fields[$CODE]} = 1;
|
||||
}
|
||||
}
|
||||
elsif ($fields[$LOWERCASE] ne '')
|
||||
{
|
||||
print STDERR ("Java missed upper case char \\u",
|
||||
$fields[$CODE], "\n");
|
||||
}
|
||||
elsif ($fields[$CATEGORY] eq 'Lu')
|
||||
{
|
||||
# This case is for letters which are marked as upper case
|
||||
# but for which there is no lower case equivalent. For
|
||||
# instance, LATIN LETTER YR.
|
||||
}
|
||||
|
||||
if ($fields[$LOWERCASE] eq ''
|
||||
&& ($fields[$UPPERCASE] ne ''
|
||||
|| $fields[$NAME] =~ /SMALL (LETTER|LIGATURE)/))
|
||||
{
|
||||
if ($fields[$UPPERCASE] ne '')
|
||||
{
|
||||
&update_case_block (*lower_start, *lower_end, *lower_map,
|
||||
$fields[$CODE], $fields[$UPPERCASE]);
|
||||
&set_attribute ($ncode, hex ($fields[$UPPERCASE]));
|
||||
}
|
||||
else
|
||||
{
|
||||
$lower_anom{$fields[$CODE]} = 1;
|
||||
}
|
||||
}
|
||||
elsif ($fields[$UPPERCASE] ne '')
|
||||
{
|
||||
print STDERR ("Java missed lower case char \\u",
|
||||
$fields[$CODE], "\n");
|
||||
}
|
||||
elsif ($fields[$CATEGORY] eq 'Ll')
|
||||
{
|
||||
# This case is for letters which are marked as lower case
|
||||
# but for which there is no upper case equivalent. For
|
||||
# instance, FEMININE ORDINAL INDICATOR.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# If we have a non-decimal numeric value, add it to the list.
|
||||
if ($fields[$CATEGORY] eq 'Nd'
|
||||
&& ($ncode < 0x2000 || $ncode > 0x2fff)
|
||||
&& $fields[$NAME] =~ /DIGIT/)
|
||||
{
|
||||
# This is a digit character that is handled elsewhere.
|
||||
}
|
||||
elsif ($fields[$DIGIT] ne '' || $fields[$NUMERIC] ne '')
|
||||
{
|
||||
# Do a simple check.
|
||||
if ($fields[$DECIMAL] ne '')
|
||||
{
|
||||
# This catches bugs in an earlier implementation of
|
||||
# chartables.pl. Now it is here for historical interest
|
||||
# only.
|
||||
# print STDERR ("Character \u", $fields[$CODE],
|
||||
# " would have been missed as digit\n");
|
||||
}
|
||||
|
||||
local ($val) = $fields[$DIGIT];
|
||||
$val = $fields[$NUMERIC] if $val eq '';
|
||||
local ($ok) = 1;
|
||||
|
||||
# If we have a value which is not a positive integer, then we
|
||||
# set the value to -2 to make life easier for
|
||||
# Character.getNumericValue.
|
||||
if ($val !~ m/^[0-9]+$/)
|
||||
{
|
||||
if ($fields[$CATEGORY] ne 'Nl'
|
||||
&& $fields[$CATEGORY] ne 'No')
|
||||
{
|
||||
# This shows a few errors in the Unicode table. These
|
||||
# characters have a missing Numeric field, and the `N'
|
||||
# for the mirrored field shows up there instead. I
|
||||
# reported these characters to errata@unicode.org on
|
||||
# Thu Sep 10 1998. They said it will be fixed in the
|
||||
# 2.1.6 release of the tables.
|
||||
print STDERR ("Character \u", $fields[$CODE],
|
||||
" has value but is not numeric; val = '",
|
||||
$val, "'\n");
|
||||
# We skip these.
|
||||
$ok = 0;
|
||||
}
|
||||
$val = "-2";
|
||||
}
|
||||
|
||||
if ($ok)
|
||||
{
|
||||
$numerics{$fields[$CODE]} = $val;
|
||||
&set_attribute ($ncode, $val);
|
||||
}
|
||||
}
|
||||
|
||||
# We build a table that lists ranges of ordinary decimal values.
|
||||
# At each step we make sure that the digits are in the correct
|
||||
# order, with no holes, as this is assumed by Character. If this
|
||||
# fails, reimplementation is required. This implementation
|
||||
# dovetails nicely with the Java Spec, which has strange rules for
|
||||
# what constitutes a decimal value. In particular the Unicode
|
||||
# name must contain the word `DIGIT'. The spec doesn't directly
|
||||
# say that digits must have type `Nd' (or that their value must an
|
||||
# integer), but that can be inferred from the list of digits in
|
||||
# the book(s). Currently the only Unicode characters whose name
|
||||
# includes `DIGIT' which would not fit are the Tibetan "half"
|
||||
# digits.
|
||||
if ($fields[$CATEGORY] eq 'Nd')
|
||||
{
|
||||
if (($ncode < 0x2000 || $ncode > 0x2fff)
|
||||
&& $fields[$NAME] =~ /DIGIT/)
|
||||
{
|
||||
&update_digit_block (*digit_start, *digit_end, $fields[$CODE],
|
||||
$fields[$DECIMAL]);
|
||||
&set_attribute ($ncode, $fields[$DECIMAL]);
|
||||
}
|
||||
else
|
||||
{
|
||||
# If this fails then Character.getType will fail. We
|
||||
# assume that things in `digit_table' are the only
|
||||
# category `Nd' characters.
|
||||
print STDERR ("Character \u", $fields[$CODE],
|
||||
" is class Nd but not in digit table\n");
|
||||
$status = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Keep track of space characters.
|
||||
if ($fields[$CATEGORY] =~ /Z[slp]/)
|
||||
{
|
||||
&update_block (*space_start, *space_end, $fields[$CODE]);
|
||||
}
|
||||
|
||||
# Keep track of letters.
|
||||
# if ($fields[$CATEGORY] =~ /L[ultmo]/)
|
||||
# {
|
||||
# &update_letter_block (*letter_start, *letter_end, $fields[$CODE],
|
||||
# $fields[$CATEGORY]);
|
||||
# }
|
||||
|
||||
# Keep track of all characters. You might think we wouldn't have
|
||||
# to do this for uppercase letters, or other characters we already
|
||||
# "classify". The problem is that this classification is
|
||||
# different. E.g., \u216f is uppercase by Java rules, but is a
|
||||
# LETTER_NUMBER here.
|
||||
&update_all_block (*all_start, *all_end, *all_cats,
|
||||
$fields[$CODE], $fields[$CATEGORY]);
|
||||
}
|
||||
|
||||
|
||||
# Called to add a new decomposition.
|
||||
sub add_decomposition
|
||||
{
|
||||
local ($ncode, $value) = @_;
|
||||
local ($is_full) = 0;
|
||||
local ($first) = 1;
|
||||
local (@decomp) = ();
|
||||
|
||||
foreach (split (' ', $value))
|
||||
{
|
||||
if ($first && /^\<.*\>$/)
|
||||
{
|
||||
$is_full = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
push (@decomp, hex ($_));
|
||||
}
|
||||
$first = 0;
|
||||
}
|
||||
|
||||
# We pack the value into a string because this means we can stick
|
||||
# with Perl 4 features.
|
||||
local ($s) = pack "I*", @decomp;
|
||||
if ($is_full)
|
||||
{
|
||||
$full_decomposition{$ncode} = $s;
|
||||
}
|
||||
else
|
||||
{
|
||||
$canonical_decomposition{$ncode} = $s;
|
||||
}
|
||||
}
|
||||
|
||||
# Write a single decomposition table.
|
||||
sub write_single_decomposition
|
||||
{
|
||||
local ($name, $is_canon, %table) = @_;
|
||||
|
||||
printf DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
|
||||
|
||||
local ($key, @expansion, $char);
|
||||
local ($first_line) = 1;
|
||||
|
||||
for ($key = 0; $key <= 65535; ++$key)
|
||||
{
|
||||
next if ! defined $table{$key};
|
||||
|
||||
printf DECOMP ",\n"
|
||||
unless $first_line;
|
||||
$first_line = 0;
|
||||
|
||||
printf DECOMP " { 0x%04x, \"", $key;
|
||||
|
||||
# We represent the expansion as a series of bytes, terminated
|
||||
# with a double nul. This is ugly, but relatively
|
||||
# space-efficient. Most expansions are short, but there are a
|
||||
# few that are very long (e.g. \uFDFA). This means that if we
|
||||
# chose a fixed-space representation we would waste a lot of
|
||||
# space.
|
||||
@expansion = unpack "I*", $table{$key};
|
||||
foreach $char (@expansion)
|
||||
{
|
||||
printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
|
||||
}
|
||||
|
||||
printf DECOMP "\" }";
|
||||
}
|
||||
|
||||
printf DECOMP "\n};\n\n";
|
||||
}
|
||||
|
||||
sub write_decompositions
|
||||
{
|
||||
&write_single_decomposition ('canonical', 1, %canonical_decomposition);
|
||||
&write_single_decomposition ('full', 0, %full_decomposition);
|
||||
}
|
||||
|
||||
# We represent a block of characters with a pair of lists. This
|
||||
# function updates the pair to account for the new character. Returns
|
||||
# 1 if we added to the old block, 0 otherwise.
|
||||
sub update_block
|
||||
{
|
||||
local (*start, *end, $char) = @_;
|
||||
|
||||
local ($nchar) = hex ($char);
|
||||
local ($count) = $#end;
|
||||
if ($count >= 0 && $end[$count] == $nchar - 1)
|
||||
{
|
||||
++$end[$count];
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
++$count;
|
||||
$start[$count] = $nchar;
|
||||
$end[$count] = $nchar;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
# Return true if we will be appending this character to the end of the
|
||||
# existing block.
|
||||
sub block_append_p
|
||||
{
|
||||
local (*end, $char) = @_;
|
||||
return $#end >= 0 && $end[$#end] == $char - 1;
|
||||
}
|
||||
|
||||
# This updates the digit block. This table is much like an ordinary
|
||||
# block, but it has an extra constraint.
|
||||
sub update_digit_block
|
||||
{
|
||||
local (*start, *end, $char, $value) = @_;
|
||||
|
||||
&update_block ($start, $end, $char);
|
||||
local ($nchar) = hex ($char);
|
||||
|
||||
# We want to make sure that the new digit's value is correct for
|
||||
# its place in the block. However, we special-case Tamil digits,
|
||||
# since Tamil does not have a digit `0'.
|
||||
local ($count) = $#start;
|
||||
if (($nchar < $TAMIL_DIGIT_ONE || $nchar > $TAMIL_DIGIT_NINE)
|
||||
&& $nchar - $start[$count] != $value)
|
||||
{
|
||||
# If this fails then Character.digit_value will be wrong.
|
||||
print STDERR "Character \\u", $char, " violates digit constraint\n";
|
||||
$status = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Update letter table. We could be smart about avoiding upper or
|
||||
# lower case letters, but it is much simpler to just track them all.
|
||||
sub update_letter_block
|
||||
{
|
||||
local (*start, *end, $char, $category) = @_;
|
||||
|
||||
&update_block (*start, *end, $char);
|
||||
}
|
||||
|
||||
# Update `all' table. This table holds all the characters we don't
|
||||
# already categorize for other reasons. FIXME: if a given type has
|
||||
# very few characters, we should just inline the code. E.g., there is
|
||||
# only one paragraph separator.
|
||||
sub update_all_block
|
||||
{
|
||||
local (*start, *end, *cats, $char, $category) = @_;
|
||||
|
||||
local ($nchar) = hex ($char);
|
||||
local ($count) = $#end;
|
||||
if ($count >= 0
|
||||
&& $end[$count] == $nchar - 1
|
||||
&& $cats[$count] eq $category)
|
||||
{
|
||||
++$end[$count];
|
||||
}
|
||||
else
|
||||
{
|
||||
++$count;
|
||||
$start[$count] = $nchar;
|
||||
$end[$count] = $nchar;
|
||||
$cats[$count] = $category;
|
||||
}
|
||||
}
|
||||
|
||||
# Update a case table. We handle case tables specially because we
|
||||
# want to map (e.g.) a block of uppercase characters directly onto the
|
||||
# corresponding block of lowercase characters. Therefore we generate
|
||||
# a new entry when the block would no longer map directly.
|
||||
sub update_case_block
|
||||
{
|
||||
local (*start, *end, *map, $char, $mapchar) = @_;
|
||||
|
||||
local ($nchar) = hex ($char);
|
||||
local ($nmap) = hex ($mapchar);
|
||||
|
||||
local ($count) = $#end;
|
||||
if ($count >= 0
|
||||
&& $end[$count] == $nchar - 1
|
||||
&& $nchar - $start[$count] == $nmap - $map[$count])
|
||||
{
|
||||
++$end[$count];
|
||||
}
|
||||
else
|
||||
{
|
||||
++$count;
|
||||
$start[$count] = $nchar;
|
||||
$end[$count] = $nchar;
|
||||
$map[$count] = $nmap;
|
||||
}
|
||||
}
|
||||
|
||||
# Set the attribute value for the character. Each character can have
|
||||
# only one attribute.
|
||||
sub set_attribute
|
||||
{
|
||||
local ($ncode, $attr) = @_;
|
||||
|
||||
if ($attributes{$ncode} ne '' && $attributes{$ncode} ne $attr)
|
||||
{
|
||||
if ($ncode >= $ROMAN_START && $ncode <= $ROMAN_END)
|
||||
{
|
||||
$second_attributes{$ncode} = $attr;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf STDERR "character \\u%04x already has attribute\n", $ncode;
|
||||
}
|
||||
}
|
||||
# Attributes can be interpreted as unsigned in some situations,
|
||||
# so we check against 65535. This could cause errors -- we need
|
||||
# to check the interpretation here.
|
||||
elsif ($attr < -32768 || $attr > 65535)
|
||||
{
|
||||
printf STDERR "attribute out of range for character \\u%04x\n", $ncode;
|
||||
}
|
||||
else
|
||||
{
|
||||
$attributes{$ncode} = $attr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Print a block table.
|
||||
sub print_block
|
||||
{
|
||||
local ($title, *start, *end) = @_;
|
||||
|
||||
print CHARTABLE "static const jchar ", $title, "[][2] =\n";
|
||||
print CHARTABLE " {\n";
|
||||
|
||||
local ($i) = 0;
|
||||
while ($i <= $#start)
|
||||
{
|
||||
print CHARTABLE " { ";
|
||||
&print_char ($start[$i]);
|
||||
print CHARTABLE ", ";
|
||||
&print_char ($end[$i]);
|
||||
print CHARTABLE " }";
|
||||
print CHARTABLE "," if ($i != $#start);
|
||||
print CHARTABLE "\n";
|
||||
++$i;
|
||||
$bytes += 4; # Two bytes per char.
|
||||
}
|
||||
|
||||
print CHARTABLE " };\n\n";
|
||||
}
|
||||
|
||||
# Print the numerics table.
|
||||
sub print_numerics
|
||||
{
|
||||
local ($i, $key, $count, @keys);
|
||||
|
||||
$i = 0;
|
||||
@keys = sort keys %numerics;
|
||||
$count = @keys;
|
||||
|
||||
print CHARTABLE "static const jchar numeric_table[] =\n";
|
||||
print CHARTABLE " { ";
|
||||
foreach $key (@keys)
|
||||
{
|
||||
&print_char (hex ($key));
|
||||
++$i;
|
||||
print CHARTABLE ", " if $i < $count;
|
||||
# Print 5 per line.
|
||||
print CHARTABLE "\n " if ($i % 5 == 0);
|
||||
$bytes += 2; # One character.
|
||||
}
|
||||
print CHARTABLE " };\n\n";
|
||||
|
||||
print CHARTABLE "static const jshort numeric_value[] =\n";
|
||||
print CHARTABLE " { ";
|
||||
$i = 0;
|
||||
foreach $key (@keys)
|
||||
{
|
||||
print CHARTABLE $numerics{$key};
|
||||
if ($numerics{$key} > 32767 || $numerics{$key} < -32768)
|
||||
{
|
||||
# This means our generated type info is incorrect. We
|
||||
# could just detect and work around this here, but I'm
|
||||
# lazy.
|
||||
print STDERR "numeric value won't fit in a short\n";
|
||||
$status = 1;
|
||||
}
|
||||
++$i;
|
||||
print CHARTABLE ", " if $i < $count;
|
||||
# Print 10 per line.
|
||||
print CHARTABLE "\n " if ($i % 10 == 0);
|
||||
$bytes += 2; # One short.
|
||||
}
|
||||
print CHARTABLE " };\n\n";
|
||||
}
|
||||
|
||||
# Print a table that maps one single letter onto another. It assumes
|
||||
# the map is index by char code.
|
||||
sub print_single_map
|
||||
{
|
||||
local ($title, %map) = @_;
|
||||
|
||||
local (@keys) = sort keys %map;
|
||||
$num = @keys;
|
||||
print CHARTABLE "static const jchar ", $title, "[][2] =\n";
|
||||
print CHARTABLE " {\n";
|
||||
$i = 0;
|
||||
for $key (@keys)
|
||||
{
|
||||
print CHARTABLE " { ";
|
||||
&print_char (hex ($key));
|
||||
print CHARTABLE ", ";
|
||||
&print_char (hex ($map{$key}));
|
||||
print CHARTABLE " }";
|
||||
++$i;
|
||||
if ($i < $num)
|
||||
{
|
||||
print CHARTABLE ",";
|
||||
}
|
||||
else
|
||||
{
|
||||
print CHARTABLE " ";
|
||||
}
|
||||
print CHARTABLE " // ", $name{$key}, "\n";
|
||||
$bytes += 4; # Two bytes per char.
|
||||
}
|
||||
print CHARTABLE " };\n\n";
|
||||
}
|
||||
|
||||
# Print the `all' block.
|
||||
sub print_all_block
|
||||
{
|
||||
local (*start, *end, *cats) = @_;
|
||||
|
||||
&print_block ("all_table", *start, *end);
|
||||
|
||||
local ($i) = 0;
|
||||
local ($sum) = 0;
|
||||
while ($i <= $#start)
|
||||
{
|
||||
$sum += $end[$i] - $start[$i] + 1;
|
||||
++$i;
|
||||
}
|
||||
# We do this computation just to make sure it isn't cheaper to
|
||||
# simply list all the characters individually.
|
||||
printf STDERR ("all_table encodes %d characters in %d entries\n",
|
||||
$sum, $#start + 1);
|
||||
|
||||
print CHARTABLE "static const jbyte category_table[] =\n";
|
||||
print CHARTABLE " { ";
|
||||
|
||||
$i = 0;
|
||||
while ($i <= $#cats)
|
||||
{
|
||||
if ($i > 0 && $cats[$i] eq $cats[$i - 1])
|
||||
{
|
||||
# This isn't an error. We can have a duplicate because
|
||||
# two ranges are not adjacent while the intervening
|
||||
# characters are left out of the table for other reasons.
|
||||
# We could exploit this to make the table a little smaller.
|
||||
# printf STDERR "Duplicate all entry at \\u%04x\n", $start[$i];
|
||||
}
|
||||
print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
|
||||
print CHARTABLE ", " if ($i < $#cats);
|
||||
++$i;
|
||||
print CHARTABLE "\n ";
|
||||
++$bytes;
|
||||
}
|
||||
print CHARTABLE " };\n\n";
|
||||
}
|
||||
|
||||
# Print case table.
|
||||
sub print_case_table
|
||||
{
|
||||
local ($title, *start, *end, *map, *anomalous) = @_;
|
||||
|
||||
&print_block ($title . '_case_table', *start, *end);
|
||||
|
||||
print CHARTABLE "static const jchar ", $title, "_case_map_table[] =\n";
|
||||
print CHARTABLE " { ";
|
||||
|
||||
local ($i) = 0;
|
||||
while ($i <= $#map)
|
||||
{
|
||||
&print_char ($map[$i]);
|
||||
print CHARTABLE ", " if $i < $#map;
|
||||
++$i;
|
||||
print CHARTABLE "\n " if $i % 5 == 0;
|
||||
$bytes += 2;
|
||||
}
|
||||
print CHARTABLE " };\n";
|
||||
|
||||
|
||||
local ($key, @keys);
|
||||
@keys = sort keys %anomalous;
|
||||
|
||||
if ($title eq 'upper')
|
||||
{
|
||||
if ($#keys >= 0)
|
||||
{
|
||||
# If these are found we need to change Character.isUpperCase.
|
||||
print STDERR "Found anomalous upper case characters\n";
|
||||
$status = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
print CHARTABLE "\n";
|
||||
print CHARTABLE "static const jchar ", $title, "_anomalous_table[] =\n";
|
||||
print CHARTABLE " { ";
|
||||
$i = 0;
|
||||
foreach $key (@keys)
|
||||
{
|
||||
&print_char (hex ($key));
|
||||
print CHARTABLE ", " if $i < $#keys;
|
||||
++$i;
|
||||
print CHARTABLE "\n " if $i % 5 == 0;
|
||||
$bytes += 2;
|
||||
}
|
||||
print CHARTABLE " };\n";
|
||||
}
|
||||
|
||||
print CHARTABLE "\n";
|
||||
}
|
||||
|
||||
# Print the type table and attributes table for the fast version.
|
||||
sub print_fast_tables
|
||||
{
|
||||
local (*start, *end, *cats, *atts, *second_atts) = @_;
|
||||
|
||||
print CHARTABLE "static const jbyte type_table[] =\n{ ";
|
||||
|
||||
local ($i, $j);
|
||||
for ($i = 0; $i <= $#cats; ++$i)
|
||||
{
|
||||
for ($j = $start[$i]; $j <= $end[$i]; ++$j)
|
||||
{
|
||||
print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
|
||||
print CHARTABLE "," if ($i < $#cats || $j < $end[$i]);
|
||||
print CHARTABLE "\n ";
|
||||
}
|
||||
}
|
||||
print CHARTABLE "\n };\n\n";
|
||||
|
||||
print CHARTABLE "static const jshort attribute_table[] =\n{ ";
|
||||
for ($i = 0; $i <= 0xffff; ++$i)
|
||||
{
|
||||
$atts{$i} = 0 if ! defined $atts{$i};
|
||||
print CHARTABLE $atts{$i};
|
||||
print CHARTABLE ", " if $i < 0xffff;
|
||||
print CHARTABLE "\n " if $i % 5 == 1;
|
||||
}
|
||||
print CHARTABLE "\n };\n\n";
|
||||
|
||||
print CHARTABLE "static const jshort secondary_attribute_table[] =\n{ ";
|
||||
for ($i = $ROMAN_START; $i <= $ROMAN_END; ++$i)
|
||||
{
|
||||
print CHARTABLE $second_atts{$i};
|
||||
print CHARTABLE ", " if $i < $ROMAN_END;
|
||||
print CHARTABLE "\n " if $i % 5 == 1;
|
||||
}
|
||||
print CHARTABLE "\n };\n\n";
|
||||
}
|
||||
|
||||
# Print a character constant.
|
||||
sub print_char
|
||||
{
|
||||
local ($ncode) = @_;
|
||||
printf CHARTABLE "0x%04x", $ncode;
|
||||
}
|
|
@ -42,13 +42,6 @@ AC_SUBST(COMPPATH)
|
|||
dnl The -no-testsuite modules omit the test subdir.
|
||||
AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
|
||||
|
||||
dnl See whether the user prefers size or speed for Character.
|
||||
dnl The default is size.
|
||||
AC_ARG_ENABLE(fast-character,
|
||||
[ --enable-fast-character prefer speed over size for Character],
|
||||
# Nothing
|
||||
, AC_DEFINE(COMPACT_CHARACTER))
|
||||
|
||||
dnl Should the runtime set system properties by examining the
|
||||
dnl environment variable GCJ_PROPERTIES?
|
||||
AC_ARG_ENABLE(getenv-properties,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +1,39 @@
|
|||
// natCharacter.cc - Native part of Character class.
|
||||
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
|
||||
Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
|
||||
|
||||
/* Copyright (C) 1998, 1999 Free Software Foundation
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
This file is part of libgcj.
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
|
@ -18,267 +45,69 @@ details. */
|
|||
|
||||
|
||||
|
||||
#define asize(x) ((sizeof (x)) / sizeof (x[0]))
|
||||
|
||||
static jchar
|
||||
to_lower_title (jchar ch)
|
||||
jchar
|
||||
java::lang::Character::readChar(jchar ch)
|
||||
{
|
||||
for (unsigned int i = 0; i < asize (title_to_upper_table); ++i)
|
||||
{
|
||||
// We can assume that the entries in the two tables are
|
||||
// parallel. This is checked in the script.
|
||||
if (title_to_upper_table[i][1] == ch
|
||||
|| title_to_upper_table[i][0] == ch)
|
||||
return title_to_lower_table[i][1];
|
||||
}
|
||||
return ch;
|
||||
// Perform 16-bit addition to find the correct entry in data.
|
||||
return data[(jchar) (blocks[ch >> SHIFT] + ch)];
|
||||
}
|
||||
|
||||
static jchar
|
||||
to_upper_title (jchar ch)
|
||||
jint
|
||||
java::lang::Character::getType(jchar ch)
|
||||
{
|
||||
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
|
||||
{
|
||||
// We can assume that the entries in the two tables are
|
||||
// parallel. This is checked in the script.
|
||||
if (title_to_lower_table[i][1] == ch
|
||||
|| title_to_lower_table[i][0] == ch)
|
||||
return title_to_upper_table[i][1];
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isTitleCase (jchar ch)
|
||||
{
|
||||
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
|
||||
{
|
||||
if (title_to_lower_table[i][0] == ch)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
// Perform 16-bit addition to find the correct entry in data.
|
||||
return (jint) (data[(jchar) (blocks[ch >> SHIFT] + ch)] & TYPE_MASK);
|
||||
}
|
||||
|
||||
jchar
|
||||
java::lang::Character::toTitleCase (jchar ch)
|
||||
java::lang::Character::toLowerCase(jchar ch)
|
||||
{
|
||||
// Both titlecase mapping tables have the same length. This is
|
||||
// checked in the chartables script.
|
||||
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
|
||||
{
|
||||
if (title_to_lower_table[i][0] == ch)
|
||||
return ch;
|
||||
if (title_to_lower_table[i][1] == ch)
|
||||
return title_to_lower_table[i][0];
|
||||
if (title_to_upper_table[i][1] == ch)
|
||||
return title_to_upper_table[i][0];
|
||||
}
|
||||
return toUpperCase (ch);
|
||||
}
|
||||
|
||||
#ifdef COMPACT_CHARACTER
|
||||
|
||||
static int
|
||||
table_search (const jchar table[][2], int table_len, jchar ch)
|
||||
{
|
||||
int low, high, i, old;
|
||||
|
||||
low = 0;
|
||||
high = table_len;
|
||||
i = high / 2;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (ch < table[i][0])
|
||||
high = i;
|
||||
else if (ch > table[i][1])
|
||||
low = i;
|
||||
else
|
||||
return i;
|
||||
|
||||
old = i;
|
||||
i = (high + low) / 2;
|
||||
if (i == old)
|
||||
break;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
jint
|
||||
java::lang::Character::digit_value (jchar ch)
|
||||
{
|
||||
int index = table_search (digit_table, asize (digit_table), ch);
|
||||
if (index == -1)
|
||||
return -1;
|
||||
|
||||
jchar base = digit_table[index][0];
|
||||
// Tamil doesn't have a digit `0'. So we special-case it here.
|
||||
if (base == TAMIL_DIGIT_ONE)
|
||||
return ch - base + 1;
|
||||
return ch - base;
|
||||
}
|
||||
|
||||
jint
|
||||
java::lang::Character::getNumericValue (jchar ch)
|
||||
{
|
||||
jint d = digit (ch, 36);
|
||||
if (d != -1)
|
||||
return d;
|
||||
|
||||
for (unsigned int i = 0; i < asize (numeric_table); ++i)
|
||||
{
|
||||
if (numeric_table[i] == ch)
|
||||
return numeric_value[i];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
jint
|
||||
java::lang::Character::getType (jchar ch)
|
||||
{
|
||||
int index = table_search (all_table, asize (all_table), ch);
|
||||
if (index != -1)
|
||||
return category_table[index];
|
||||
return UNASSIGNED;
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isLowerCase (jchar ch)
|
||||
{
|
||||
if (ch >= 0x2000 && ch <= 0x2fff)
|
||||
return false;
|
||||
if (table_search (lower_case_table, asize (lower_case_table), ch) != -1)
|
||||
return true;
|
||||
|
||||
int low, high, i, old;
|
||||
|
||||
low = 0;
|
||||
high = asize (lower_anomalous_table);
|
||||
i = high / 2;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (ch < lower_anomalous_table[i])
|
||||
high = i;
|
||||
else if (ch > lower_anomalous_table[i])
|
||||
low = i;
|
||||
else
|
||||
return true;
|
||||
|
||||
old = i;
|
||||
i = (high + low) / 2;
|
||||
if (i == old)
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isSpaceChar (jchar ch)
|
||||
{
|
||||
return table_search (space_table, asize (space_table), ch) != -1;
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isUpperCase (jchar ch)
|
||||
{
|
||||
if (ch >= 0x2000 && ch <= 0x2fff)
|
||||
return false;
|
||||
return table_search (upper_case_table, asize (upper_case_table), ch) != -1;
|
||||
return (jchar) (ch + lower[readChar(ch) >> 7]);
|
||||
}
|
||||
|
||||
jchar
|
||||
java::lang::Character::toLowerCase (jchar ch)
|
||||
java::lang::Character::toUpperCase(jchar ch)
|
||||
{
|
||||
int index = table_search (upper_case_table, asize (upper_case_table), ch);
|
||||
if (index == -1)
|
||||
return to_lower_title (ch);
|
||||
return (jchar) (ch - upper_case_table[index][0]
|
||||
+ upper_case_map_table[index]);
|
||||
return (jchar) (ch + upper[readChar(ch) >> 7]);
|
||||
}
|
||||
|
||||
jchar
|
||||
java::lang::Character::toUpperCase (jchar ch)
|
||||
java::lang::Character::toTitleCase(jchar ch)
|
||||
{
|
||||
int index = table_search (lower_case_table, asize (lower_case_table), ch);
|
||||
if (index == -1)
|
||||
return to_upper_title (ch);
|
||||
return (jchar) (ch - lower_case_table[index][0]
|
||||
+ lower_case_map_table[index]);
|
||||
}
|
||||
|
||||
#else /* COMPACT_CHARACTER */
|
||||
|
||||
jint
|
||||
java::lang::Character::digit_value (jchar ch)
|
||||
{
|
||||
if (type_table[ch] == DECIMAL_DIGIT_NUMBER)
|
||||
return attribute_table[ch];
|
||||
return -1;
|
||||
// As title is short, it doesn't hurt to exhaustively iterate over it.
|
||||
for (int i = title_length - 2; i >= 0; i -= 2)
|
||||
if (title[i] == ch)
|
||||
return title[i + 1];
|
||||
return toUpperCase(ch);
|
||||
}
|
||||
|
||||
jint
|
||||
java::lang::Character::getNumericValue (jchar ch)
|
||||
java::lang::Character::digit(jchar ch, jint radix)
|
||||
{
|
||||
jint d = digit (ch, 36);
|
||||
if (d != -1)
|
||||
return d;
|
||||
|
||||
// Some characters require two attributes. We special-case them here.
|
||||
if (ch >= ROMAN_START && ch <= ROMAN_END)
|
||||
return secondary_attribute_table[ch - ROMAN_START];
|
||||
if (type_table[ch] == LETTER_NUMBER || type_table[ch] == OTHER_NUMBER)
|
||||
return attribute_table[ch];
|
||||
return -1;
|
||||
if (radix < MIN_RADIX || radix > MAX_RADIX)
|
||||
return (jint) -1;
|
||||
jchar attr = readChar(ch);
|
||||
if (((1 << (attr & TYPE_MASK))
|
||||
& ((1 << UPPERCASE_LETTER)
|
||||
| (1 << LOWERCASE_LETTER)
|
||||
| (1 << DECIMAL_DIGIT_NUMBER))))
|
||||
{
|
||||
// Signedness doesn't matter; 0xffff vs. -1 are both rejected.
|
||||
jint digit = (jint) numValue[attr >> 7];
|
||||
return (digit >= 0 && digit < radix) ? digit : (jint) -1;
|
||||
}
|
||||
return (jint) -1;
|
||||
}
|
||||
|
||||
jint
|
||||
java::lang::Character::getType (jchar ch)
|
||||
java::lang::Character::getNumericValue(jchar ch)
|
||||
{
|
||||
return type_table[ch];
|
||||
// numValue is stored as an array of jshort, since 10000 is the maximum.
|
||||
return (jint) numValue[readChar(ch) >> 7];
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isLowerCase (jchar ch)
|
||||
jbyte
|
||||
java::lang::Character::getDirectionality(jchar ch)
|
||||
{
|
||||
if (ch >= 0x2000 && ch <= 0x2fff)
|
||||
return false;
|
||||
return type_table[ch] == LOWERCASE_LETTER;
|
||||
return direction[readChar(ch) >> 7];
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isSpaceChar (jchar ch)
|
||||
{
|
||||
return (type_table[ch] == SPACE_SEPARATOR
|
||||
|| type_table[ch] == LINE_SEPARATOR
|
||||
|| type_table[ch] == PARAGRAPH_SEPARATOR);
|
||||
}
|
||||
|
||||
jboolean
|
||||
java::lang::Character::isUpperCase (jchar ch)
|
||||
{
|
||||
if (ch >= 0x2000 && ch <= 0x2fff)
|
||||
return false;
|
||||
return type_table[ch] == UPPERCASE_LETTER;
|
||||
}
|
||||
|
||||
jchar
|
||||
java::lang::Character::toLowerCase (jchar ch)
|
||||
{
|
||||
if (type_table[ch] == UPPERCASE_LETTER)
|
||||
return attribute_table[ch];
|
||||
return to_lower_title (ch);
|
||||
}
|
||||
|
||||
jchar
|
||||
java::lang::Character::toUpperCase (jchar ch)
|
||||
{
|
||||
if (type_table[ch] == LOWERCASE_LETTER)
|
||||
return attribute_table[ch];
|
||||
return to_upper_title (ch);
|
||||
}
|
||||
|
||||
#endif /* COMPACT_CHARACTER */
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
#! /usr/bin/perl
|
||||
|
||||
if ($ARGV[0] eq '')
|
||||
{
|
||||
$file = 'Blocks.txt';
|
||||
if (! -f $file)
|
||||
{
|
||||
# Too painful to figure out how to get Perl to do it.
|
||||
system 'wget -o .wget-log http://www.unicode.org/Public/UNIDATA/Blocks.txt';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$file = $ARGV[0];
|
||||
}
|
||||
|
||||
open (INPUT, "< $file") || die "couldn't open $file: $!";
|
||||
|
||||
@array = ();
|
||||
while (<INPUT>)
|
||||
{
|
||||
next if /^#/;
|
||||
chop;
|
||||
next if /^$/;
|
||||
|
||||
($start, $to, $text) = split (/; /);
|
||||
($symbol = $text) =~ tr/a-z/A-Z/;
|
||||
$symbol =~ s/[- ]/_/g;
|
||||
|
||||
# Special case for one of the SPECIALS.
|
||||
next if $start eq 'FEFF';
|
||||
|
||||
# Special case some areas that our heuristic mishandles.
|
||||
if ($symbol eq 'HIGH_SURROGATES')
|
||||
{
|
||||
$symbol = 'SURROGATES_AREA';
|
||||
$text = 'Surrogates Area';
|
||||
$to = 'DFFF';
|
||||
}
|
||||
elsif ($symbol =~ /SURROGATES/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
elsif ($symbol eq 'PRIVATE_USE')
|
||||
{
|
||||
$symbol .= '_AREA';
|
||||
$text = 'Private Use Area';
|
||||
}
|
||||
|
||||
printf " public static final UnicodeBlock %s = new UnicodeBlock (\"%s\", '\\u%s', '\\u%s');\n",
|
||||
$symbol, $text, $start, $to;
|
||||
|
||||
push (@array, $symbol);
|
||||
}
|
||||
|
||||
printf " private static final UnicodeBlock[] blocks = {\n";
|
||||
foreach (@array)
|
||||
{
|
||||
printf " %s", $_;
|
||||
printf "," unless $_ eq 'SPECIALS';
|
||||
printf "\n";
|
||||
}
|
||||
printf " };\n";
|
||||
|
||||
close (INPUT);
|
Loading…
Reference in New Issue