Update.
* charmaps/GBK: Add commented mappings for GBK characters not yet in Unicode. 2000-09-23 Bruno Haible <haible@clisp.cons.org>
This commit is contained in:
parent
fbb1f75f1e
commit
a2aa7df3d6
@ -65,9 +65,11 @@
|
||||
All these characters are not defined in GB2312. Besides this \
|
||||
there is an incomatibility in the mapping. The Unicode tables \
|
||||
say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to \
|
||||
U00B7. Since we are free to do whatever we want if a mapping \
|
||||
is not available we will not flag this as an error but instead \
|
||||
map the two positions. But this means that the mapping \
|
||||
U00B7. Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK \
|
||||
it maps to U2014. Since we are free to do whatever we want if \
|
||||
a mapping is not available we will not flag this as an error \
|
||||
but instead map the two positions. But this means that the \
|
||||
mapping \
|
||||
\
|
||||
UCS4 -> GB2312 -> GBK -> UCS4 \
|
||||
\
|
||||
@ -89,6 +91,10 @@
|
||||
\
|
||||
ch = (ch << 8) | inptr[1]; \
|
||||
\
|
||||
/* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312). */ \
|
||||
if (__builtin_expect (ch == 0xa844, 0)) \
|
||||
ch = 0xa1aa; \
|
||||
\
|
||||
/* Now determine whether the character is valid. */ \
|
||||
if (__builtin_expect (ch, 0xa1a1) < 0xa1a1 \
|
||||
|| __builtin_expect (ch, 0xa1a1) > 0xf7fe \
|
||||
@ -123,8 +129,8 @@
|
||||
#define BODY \
|
||||
{ \
|
||||
/* We don't have to care about characters we cannot map. The only \
|
||||
problem is the mapping of 0xA1A4 but as explained above we do not \
|
||||
do anything special here. */ \
|
||||
problem are the mapping of 0xA1A4 and 0xA1AA but as explained above \
|
||||
we do not do anything special here. */ \
|
||||
unsigned char ch = *inptr++; \
|
||||
\
|
||||
if (ch > 0x7f) \
|
||||
|
@ -1570,7 +1570,7 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x17fb] = 0x72d6, [0x17fc] = 0x72d8, [0x17fd] = 0x72da, [0x17fe] = 0x72db,
|
||||
[0x1861] = 0x3000, [0x1862] = 0x3001, [0x1863] = 0x3002, [0x1864] = 0x00b7,
|
||||
[0x1865] = 0x02c9, [0x1866] = 0x02c7, [0x1867] = 0x00a8, [0x1868] = 0x3003,
|
||||
[0x1869] = 0x3005, [0x186a] = 0x2015, [0x186b] = 0xff5e, [0x186c] = 0x2016,
|
||||
[0x1869] = 0x3005, [0x186a] = 0x2014, [0x186b] = 0xff5e, [0x186c] = 0x2016,
|
||||
[0x186d] = 0x2026, [0x186e] = 0x2018, [0x186f] = 0x2019, [0x1870] = 0x201c,
|
||||
[0x1871] = 0x201d, [0x1872] = 0x3014, [0x1873] = 0x3015, [0x1874] = 0x3008,
|
||||
[0x1875] = 0x3009, [0x1876] = 0x300a, [0x1877] = 0x300b, [0x1878] = 0x300c,
|
||||
@ -1712,7 +1712,7 @@ static const uint16_t __gbk_to_ucs[] =
|
||||
[0x1d2a] = 0x0448, [0x1d2b] = 0x0449, [0x1d2c] = 0x044a, [0x1d2d] = 0x044b,
|
||||
[0x1d2e] = 0x044c, [0x1d2f] = 0x044d, [0x1d30] = 0x044e, [0x1d31] = 0x044f,
|
||||
[0x1d40] = 0x02ca, [0x1d41] = 0x02cb, [0x1d42] = 0x02d9, [0x1d43] = 0x2013,
|
||||
[0x1d44] = 0x2014, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
|
||||
[0x1d44] = 0x2015, [0x1d45] = 0x2025, [0x1d46] = 0x2035, [0x1d47] = 0x2105,
|
||||
[0x1d48] = 0x2109, [0x1d49] = 0x2196, [0x1d4a] = 0x2197, [0x1d4b] = 0x2198,
|
||||
[0x1d4c] = 0x2199, [0x1d4d] = 0x2215, [0x1d4e] = 0x221f, [0x1d4f] = 0x2223,
|
||||
[0x1d50] = 0x2252, [0x1d51] = 0x2266, [0x1d52] = 0x2267, [0x1d53] = 0x22bf,
|
||||
@ -5661,8 +5661,8 @@ static const char __gbk_from_ucs4_tab3[][2] =
|
||||
*/
|
||||
static const char __gbk_from_ucs4_tab4[][2] =
|
||||
{
|
||||
[0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa8\x44",
|
||||
[0x0005] = "\xa1\xaa", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
|
||||
[0x0000] = "\xa9\x5c", [0x0003] = "\xa8\x43", [0x0004] = "\xa1\xaa",
|
||||
[0x0005] = "\xa8\x44", [0x0006] = "\xa1\xac", [0x0008] = "\xa1\xae",
|
||||
[0x0009] = "\xa1\xaf", [0x000c] = "\xa1\xb0", [0x000d] = "\xa1\xb1",
|
||||
[0x0015] = "\xa8\x45", [0x0016] = "\xa1\xad", [0x0020] = "\xa1\xeb",
|
||||
[0x0022] = "\xa1\xe4", [0x0023] = "\xa1\xe5", [0x0025] = "\xa8\x46",
|
||||
@ -13153,8 +13153,10 @@ static const char __gbk_from_ucs4_tab12[][2] =
|
||||
\
|
||||
ch2 = inptr[1]; \
|
||||
\
|
||||
/* All second bytes of a multibyte character must be >= 0x40. */ \
|
||||
if (__builtin_expect (ch2, 0x41) < 0x40) \
|
||||
/* All second bytes of a multibyte character must be >= 0x40, and \
|
||||
the __gbk_to_ucs table only covers the range up to 0xfe 0xa0. */ \
|
||||
if (__builtin_expect (ch2, 0x41) < 0x40 \
|
||||
|| (__builtin_expect (ch, 0x81) == 0xfe && ch2 > 0xa0)) \
|
||||
{ \
|
||||
/* This is an illegal character. */ \
|
||||
if (! ignore_errors_p ()) \
|
||||
|
4
iconvdata/testdata/GBK..UTF8
vendored
4
iconvdata/testdata/GBK..UTF8
vendored
@ -389,7 +389,7 @@
|
||||
犘 犙 犚 犛 犜 犝 犞 犠 犡 犢 犣 犤 犥 犦 犧 犨
|
||||
犩 犪 犫 犮 犱 犲 犳 犵 犺 犻 犼 犽 犾 犿 狀 狅
|
||||
狆 狇 狉 狊 狋 狌 狏 狑 狓 狔 狕 狖 狘 狚 狛
|
||||
、 。 · ˉ ˇ ¨ 〃 々 ― ~ ‖ … ‘ ’
|
||||
、 。 · ˉ ˇ ¨ 〃 々 — ~ ‖ … ‘ ’
|
||||
“ ” 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】
|
||||
± × ÷ ∶ ∧ ∨ ∑ ∏ ∪ ∩ ∈ ∷ √ ⊥ ∥ ∠
|
||||
⌒ ⊙ ∫ ∮ ≡ ≌ ≈ ∽ ∝ ≠ ≮ ≯ ≤ ≥ ∞ ∵
|
||||
@ -431,7 +431,7 @@
|
||||
а б в г д е ё ж з и й к л м н
|
||||
о п р с т у ф х ц ч ш щ ъ ы ь э
|
||||
ю я
|
||||
ˊ ˋ ˙ – — ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
|
||||
ˊ ˋ ˙ – ― ‥ ‵ ℅ ℉ ↖ ↗ ↘ ↙ ∕ ∟ ∣
|
||||
≒ ≦ ≧ ⊿ ═ ║ ╒ ╓ ╔ ╕ ╖ ╗ ╘ ╙ ╚ ╛
|
||||
╜ ╝ ╞ ╟ ╠ ╡ ╢ ╣ ╤ ╥ ╦ ╧ ╨ ╩ ╪ ╫
|
||||
╬ ╭ ╮ ╯ ╰ ╱ ╲ ╳ ▁ ▂ ▃ ▄ ▅ ▆ ▇
|
||||
|
@ -1,3 +1,8 @@
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* charmaps/GBK: Add commented mappings for GBK characters not yet in
|
||||
Unicode.
|
||||
|
||||
2000-09-23 Bruno Haible <haible@clisp.cons.org>
|
||||
|
||||
* charmaps/GBK: Remove /x80 entry.
|
||||
|
@ -6898,8 +6898,10 @@ CHARMAP
|
||||
<U00FC> /xa8/xb9 LATIN SMALL LETTER U WITH DIAERESIS
|
||||
<U00EA> /xa8/xba LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
<U0251> /xa8/xbb LATIN SMALL LETTER ALPHA
|
||||
% <UE7C7> /xa8/xbc
|
||||
<U0144> /xa8/xbd LATIN SMALL LETTER N WITH ACUTE
|
||||
<U0148> /xa8/xbe LATIN SMALL LETTER N WITH CARON
|
||||
% <UE7C8> /xa8/xbf
|
||||
<U0261> /xa8/xc0 LATIN SMALL LETTER SCRIPT G
|
||||
<U3105> /xa8/xc5 BOPOMOFO LETTER B
|
||||
<U3106> /xa8/xc6 BOPOMOFO LETTER P
|
||||
@ -7005,6 +7007,19 @@ CHARMAP
|
||||
<UFE69> /xa9/x86 SMALL DOLLAR SIGN
|
||||
<UFE6A> /xa9/x87 SMALL PERCENT SIGN
|
||||
<UFE6B> /xa9/x88 SMALL COMMERCIAL AT
|
||||
% <UE7E7> /xa9/x89
|
||||
% <UE7E8> /xa9/x8a
|
||||
% <UE7E9> /xa9/x8b
|
||||
% <UE7EA> /xa9/x8c
|
||||
% <UE7EB> /xa9/x8d
|
||||
% <UE7EC> /xa9/x8e
|
||||
% <UE7ED> /xa9/x8f
|
||||
% <UE7EE> /xa9/x90
|
||||
% <UE7EF> /xa9/x91
|
||||
% <UE7F0> /xa9/x92
|
||||
% <UE7F1> /xa9/x93
|
||||
% <UE7F2> /xa9/x94
|
||||
% <UE7F3> /xa9/x95
|
||||
<U3007> /xa9/x96 IDEOGRAPHIC NUMBER ZERO
|
||||
<U2500> /xa9/xa4 BOX DRAWINGS LIGHT HORIZONTAL
|
||||
<U2501> /xa9/xa5 BOX DRAWINGS HEAVY HORIZONTAL
|
||||
@ -21925,6 +21940,86 @@ CHARMAP
|
||||
<UFA27> /xfe/x4d <CJK>
|
||||
<UFA28> /xfe/x4e <CJK>
|
||||
<UFA29> /xfe/x4f <CJK>
|
||||
% <UE815> /xfe/x50
|
||||
% <UE816> /xfe/x51
|
||||
% <UE817> /xfe/x52
|
||||
% <UE818> /xfe/x53
|
||||
% <UE819> /xfe/x54
|
||||
% <UE81A> /xfe/x55
|
||||
% <UE81B> /xfe/x56
|
||||
% <UE81C> /xfe/x57
|
||||
% <UE81D> /xfe/x58
|
||||
% <UE81E> /xfe/x59
|
||||
% <UE81F> /xfe/x5a
|
||||
% <UE820> /xfe/x5b
|
||||
% <UE821> /xfe/x5c
|
||||
% <UE822> /xfe/x5d
|
||||
% <UE823> /xfe/x5e
|
||||
% <UE824> /xfe/x5f
|
||||
% <UE825> /xfe/x60
|
||||
% <UE826> /xfe/x61
|
||||
% <UE827> /xfe/x62
|
||||
% <UE828> /xfe/x63
|
||||
% <UE829> /xfe/x64
|
||||
% <UE82A> /xfe/x65
|
||||
% <UE82B> /xfe/x66
|
||||
% <UE82C> /xfe/x67
|
||||
% <UE82D> /xfe/x68
|
||||
% <UE82E> /xfe/x69
|
||||
% <UE82F> /xfe/x6a
|
||||
% <UE830> /xfe/x6b
|
||||
% <UE831> /xfe/x6c
|
||||
% <UE832> /xfe/x6d
|
||||
% <UE833> /xfe/x6e
|
||||
% <UE834> /xfe/x6f
|
||||
% <UE835> /xfe/x70
|
||||
% <UE836> /xfe/x71
|
||||
% <UE837> /xfe/x72
|
||||
% <UE838> /xfe/x73
|
||||
% <UE839> /xfe/x74
|
||||
% <UE83A> /xfe/x75
|
||||
% <UE83B> /xfe/x76
|
||||
% <UE83C> /xfe/x77
|
||||
% <UE83D> /xfe/x78
|
||||
% <UE83E> /xfe/x79
|
||||
% <UE83F> /xfe/x7a
|
||||
% <UE840> /xfe/x7b
|
||||
% <UE841> /xfe/x7c
|
||||
% <UE842> /xfe/x7d
|
||||
% <UE843> /xfe/x7e
|
||||
% <UE844> /xfe/x80
|
||||
% <UE845> /xfe/x81
|
||||
% <UE846> /xfe/x82
|
||||
% <UE847> /xfe/x83
|
||||
% <UE848> /xfe/x84
|
||||
% <UE849> /xfe/x85
|
||||
% <UE84A> /xfe/x86
|
||||
% <UE84B> /xfe/x87
|
||||
% <UE84C> /xfe/x88
|
||||
% <UE84D> /xfe/x89
|
||||
% <UE84E> /xfe/x8a
|
||||
% <UE84F> /xfe/x8b
|
||||
% <UE850> /xfe/x8c
|
||||
% <UE851> /xfe/x8d
|
||||
% <UE852> /xfe/x8e
|
||||
% <UE853> /xfe/x8f
|
||||
% <UE854> /xfe/x90
|
||||
% <UE855> /xfe/x91
|
||||
% <UE856> /xfe/x92
|
||||
% <UE857> /xfe/x93
|
||||
% <UE858> /xfe/x94
|
||||
% <UE859> /xfe/x95
|
||||
% <UE85A> /xfe/x96
|
||||
% <UE85B> /xfe/x97
|
||||
% <UE85C> /xfe/x98
|
||||
% <UE85D> /xfe/x99
|
||||
% <UE85E> /xfe/x9a
|
||||
% <UE85F> /xfe/x9b
|
||||
% <UE860> /xfe/x9c
|
||||
% <UE861> /xfe/x9d
|
||||
% <UE862> /xfe/x9e
|
||||
% <UE863> /xfe/x9f
|
||||
% <UE864> /xfe/xa0
|
||||
END CHARMAP
|
||||
|
||||
WIDTH
|
||||
|
Loading…
Reference in New Issue
Block a user