2000-08-02 21:56:53 +02:00
|
|
|
# encodings.pl - Download IANA text and compute alias list.
|
|
|
|
# Assumes you are running this program from gnu/gcj/convert/.
|
|
|
|
# Output suitable for direct inclusion in IOConverter.java.
|
|
|
|
|
|
|
|
# Map IANA canonical names onto our canonical names.
|
|
|
|
%map = (
|
2000-11-01 18:00:02 +01:00
|
|
|
'ANSI_X3.4-1968' => 'ASCII',
|
2000-08-02 21:56:53 +02:00
|
|
|
'ISO_8859-1:1987' => '8859_1',
|
|
|
|
'UTF-8' => 'UTF8',
|
|
|
|
'Shift_JIS' => 'SJIS',
|
2005-11-04 16:08:18 +01:00
|
|
|
'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS',
|
|
|
|
'UTF16-LE' => 'UnicodeLittle',
|
|
|
|
'UTF16-BE' => 'UnicodeBig'
|
2000-08-02 21:56:53 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
if ($ARGV[0] eq '')
|
|
|
|
{
|
|
|
|
$file = 'character-sets';
|
|
|
|
if (! -f $file)
|
|
|
|
{
|
|
|
|
# Too painful to figure out how to get Perl to do it.
|
2001-06-26 06:36:47 +02:00
|
|
|
system 'wget -o .wget-log http://www.iana.org/assignments/character-sets';
|
2000-08-02 21:56:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$file = $ARGV[0];
|
|
|
|
}
|
|
|
|
|
2005-11-04 16:08:18 +01:00
|
|
|
# Include canonical names in the output.
|
|
|
|
foreach $key (keys %map)
|
|
|
|
{
|
|
|
|
$output{lc ($key)} = $map{$key};
|
|
|
|
}
|
|
|
|
|
2000-08-02 21:56:53 +02:00
|
|
|
open (INPUT, "< $file") || die "couldn't open $file: $!";
|
|
|
|
|
|
|
|
$body = 0;
|
|
|
|
$current = '';
|
|
|
|
while (<INPUT>)
|
|
|
|
{
|
|
|
|
chop;
|
|
|
|
$body = 1 if /^Name:/;
|
|
|
|
next unless $body;
|
|
|
|
|
|
|
|
if (/^$/)
|
|
|
|
{
|
|
|
|
$current = '';
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
($type, $name) = split (/\s+/);
|
2001-06-26 06:36:47 +02:00
|
|
|
# Encoding names are case-insensitive. We do all processing on
|
|
|
|
# the lower-case form.
|
|
|
|
my $lower = lc ($name);
|
2000-08-02 21:56:53 +02:00
|
|
|
if ($type eq 'Name:')
|
|
|
|
{
|
|
|
|
$current = $map{$name};
|
|
|
|
if ($current)
|
|
|
|
{
|
2005-11-04 16:08:18 +01:00
|
|
|
$output{$lower} = $current;
|
2000-08-02 21:56:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif ($type eq 'Alias:')
|
|
|
|
{
|
|
|
|
# The IANA list has some ugliness.
|
2005-11-04 16:08:18 +01:00
|
|
|
if ($name ne '' && $lower ne 'none' && $current)
|
2000-08-02 21:56:53 +02:00
|
|
|
{
|
2005-11-04 16:08:18 +01:00
|
|
|
$output{$lower} = $current;
|
2000-08-02 21:56:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close (INPUT);
|
2005-11-04 16:08:18 +01:00
|
|
|
|
|
|
|
foreach $key (sort keys %output)
|
|
|
|
{
|
|
|
|
print " hash.put (\"$key\", \"$output{$key}\");\n";
|
|
|
|
}
|