JIS0201.h: New file, generated from Unicode table.
� * gnu/gcj/convert/JIS0201.h: New file, generated from Unicode table. * gnu/gcj/convert/Input_JavaSrc.java: New BytesToUnicode class. * gnu/gcj/convert/Input_SJIS.java: New BytesToUnicode class. * gnu/gcj/convert/Output_EUCJIS.java: New UnicodeToBytes class. * gnu/gcj/convert/Output_SJIS.java: New UnicodeToBytes class. * gnu/gcj/convert/natInput_EUCJIS.cc: New file. * gnu/gcj/convert/natInput_SJIS.cc: New file. * gnu/gcj/convert/natOutput_EUCJIS.cc: New file. * gnu/gcj/convert/natOutput_SJIS.cc: New file. * gnu/gcj/convert/make-trie.c: New file: functions to make a trie. * gnu/gcj/convert/gen-from-JIS.c: Invoke make-trie for output. * gnu/gcj/convert/Unicode_to_JIS.cc: New generated trie table. From-SVN: r26502
This commit is contained in:
parent
7b824de381
commit
0ff6b2f10b
89
libjava/gnu/gcj/convert/Input_JavaSrc.java
Normal file
89
libjava/gnu/gcj/convert/Input_JavaSrc.java
Normal file
@ -0,0 +1,89 @@
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
/**
|
||||
* Convert Ascii with \ u XXXX-escapes to Unicode.
|
||||
* @author Per Bothner <bothner@cygnus.com>
|
||||
* @date April 1999.
|
||||
*/
|
||||
|
||||
public class Input_JavaSrc extends BytesToUnicode
|
||||
{
|
||||
public String getName() { return "JavaSrc"; }
|
||||
|
||||
// 0: normal
|
||||
// 1: seen '\\'
|
||||
// 2: seen '\\' and 'u'
|
||||
// 3: seen '\\' and need to emit value.
|
||||
// 4, 5, 6, 7: seen '\\u', 'u' and (state-3) hex digits.
|
||||
int state = 0;
|
||||
|
||||
int value;
|
||||
|
||||
public int read (char[] outbuffer, int outpos, int outlength)
|
||||
{
|
||||
int origpos = outpos;
|
||||
for (;;)
|
||||
{
|
||||
if (inpos >= inlength)
|
||||
break;
|
||||
if (outpos >= outlength)
|
||||
break;
|
||||
char b = (char) (inbuffer[inpos++] & 0xFF);
|
||||
switch (state)
|
||||
{
|
||||
case 0:
|
||||
if (b == '\\')
|
||||
{
|
||||
state = 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (b == 'u')
|
||||
{
|
||||
state = 2;
|
||||
continue;
|
||||
}
|
||||
if (b != '\\')
|
||||
{
|
||||
value = b;
|
||||
b = '\\';
|
||||
state = 3;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
b = (char) value;
|
||||
break;
|
||||
default: // case 4: case 5: case 6: case 7:
|
||||
int digit = Character.digit(b, 16);
|
||||
if (digit < 0)
|
||||
{
|
||||
b = '\uFFFD';
|
||||
state = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = value * 16 + digit;
|
||||
if (state < 7)
|
||||
{
|
||||
state++;
|
||||
continue;
|
||||
}
|
||||
b = (char) value;
|
||||
}
|
||||
state = 0;
|
||||
}
|
||||
outbuffer[outpos++] = b;
|
||||
}
|
||||
return outpos - origpos;
|
||||
}
|
||||
}
|
||||
|
26
libjava/gnu/gcj/convert/Output_SJIS.java
Normal file
26
libjava/gnu/gcj/convert/Output_SJIS.java
Normal file
@ -0,0 +1,26 @@
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
/**
|
||||
* Convert Unicode to SJIS (Shift JIS, used on Japanese MS-Windows).
|
||||
* @author Per Bothner <bothner@cygnus.com>
|
||||
* @date April 1999.
|
||||
*/
|
||||
|
||||
public class Output_SJIS extends UnicodeToBytes
|
||||
{
|
||||
public String getName() { return "SJIS"; }
|
||||
|
||||
public native int write (char[] inbuffer, int inpos, int inlength);
|
||||
|
||||
public native int write (String str, int inpos, int inlength, char[] work);
|
||||
|
||||
int pending = -1;
|
||||
}
|
23139
libjava/gnu/gcj/convert/Unicode_to_JIS.cc
Normal file
23139
libjava/gnu/gcj/convert/Unicode_to_JIS.cc
Normal file
File diff suppressed because it is too large
Load Diff
76
libjava/gnu/gcj/convert/natInput_SJIS.cc
Normal file
76
libjava/gnu/gcj/convert/natInput_SJIS.cc
Normal file
@ -0,0 +1,76 @@
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
#include <config.h>
|
||||
#include <cni.h>
|
||||
#include <gnu/gcj/convert/Input_SJIS.h>
|
||||
|
||||
#define ERROR_CHAR 0xFFFD
|
||||
|
||||
extern unsigned short JIS0208_to_Unicode[84][94];
|
||||
extern unsigned short JIS0212_to_Unicode[76][94];
|
||||
|
||||
jint
|
||||
gnu::gcj::convert::Input_SJIS::read(jcharArray outbuffer, jint outpos,
|
||||
jint outlength)
|
||||
{
|
||||
jint start_outpos = outpos;
|
||||
for (;;)
|
||||
{
|
||||
if (outpos >= outlength)
|
||||
break;
|
||||
if (inpos >= inlength)
|
||||
break;
|
||||
int b = ((unsigned char*) elements(inbuffer))[inpos++];
|
||||
if (first_byte == 0)
|
||||
{
|
||||
if (b < 128)
|
||||
{
|
||||
#if 1
|
||||
// Technically, we should translate 0x5c to Yen symbol;
|
||||
// in practice, it is not clear.
|
||||
if (b == 0x5c)
|
||||
b = 0x00A5; // Yen sign.
|
||||
#endif
|
||||
elements(outbuffer)[outpos++] = (char) b;
|
||||
}
|
||||
else if (b >= 0xA1 && b <= 0xDF)
|
||||
{
|
||||
b += 0xFF61 - 0xA1;
|
||||
elements(outbuffer)[outpos++] = b;
|
||||
}
|
||||
else
|
||||
first_byte = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
// From Lunde: "CJKV Informatio Processing", O'Reilly, 1999, p 420:
|
||||
bool adjust = b < 159;
|
||||
int rowOffset = first_byte < 160 ? 112 : 176;
|
||||
int cellOffset = adjust ? (b > 127 ? 32 : 31) : 126;
|
||||
first_byte = ((first_byte - rowOffset) << 1) - adjust;
|
||||
b -= cellOffset;
|
||||
|
||||
first_byte -= 33;
|
||||
b -= 33;
|
||||
|
||||
if ((unsigned) first_byte >= 84 || (unsigned) b >= 94)
|
||||
b = ERROR_CHAR;
|
||||
else
|
||||
{
|
||||
b = JIS0208_to_Unicode[first_byte][b];
|
||||
if (b == 0)
|
||||
b = ERROR_CHAR;
|
||||
}
|
||||
elements(outbuffer)[outpos++] = b;
|
||||
|
||||
first_byte = 0;
|
||||
}
|
||||
}
|
||||
return outpos - start_outpos;
|
||||
}
|
Loading…
Reference in New Issue
Block a user