qjson: to_json() case QTYPE_QSTRING is buggy, rewrite
Known bugs in to_json(): * A start byte for a three-byte sequence followed by less than two continuation bytes is split into one-byte sequences. * Start bytes for sequences longer than three bytes get misinterpreted as start bytes for three-byte sequences. Continuation bytes beyond byte three become one-byte sequences. This means all characters outside the BMP are decoded incorrectly. * One-byte sequences with the MSB are put into the JSON string verbatim when char is unsigned, producing invalid UTF-8. When char is signed, they're replaced by "\\uFFFF" instead. This includes \xFE, \xFF, and stray continuation bytes. * Overlong sequences are happily accepted, unless screwed up by the bugs above. * Likewise, sequences encoding surrogate code points or noncharacters. * Unlike other control characters, ASCII DEL is not escaped. Except in overlong encodings. My rewrite fixes them as follows: * Malformed UTF-8 sequences are replaced. Except the overlong encoding \xC0\x80 of U+0000 is still accepted. Permits embedding NUL characters in C strings. This trick is known as "Modified UTF-8". * Sequences encoding code points beyond Unicode range are replaced. * Sequences encoding code points beyond the BMP produce a surrogate pair. * Sequences encoding surrogate code points are replaced. * Sequences encoding noncharacters are replaced. * ASCII DEL is now always escaped. The replacement character is U+FFFD. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
This commit is contained in:
parent
1d50c8e947
commit
e2ec3f9768
@ -136,33 +136,16 @@ static void to_json(const QObject *obj, QString *str, int pretty, int indent)
|
||||
case QTYPE_QSTRING: {
|
||||
QString *val = qobject_to_qstring(obj);
|
||||
const char *ptr;
|
||||
int cp;
|
||||
char buf[16];
|
||||
char *end;
|
||||
|
||||
ptr = qstring_get_str(val);
|
||||
qstring_append(str, "\"");
|
||||
while (*ptr) {
|
||||
if ((ptr[0] & 0xE0) == 0xE0 &&
|
||||
(ptr[1] & 0x80) && (ptr[2] & 0x80)) {
|
||||
uint16_t wchar;
|
||||
char escape[7];
|
||||
|
||||
wchar = (ptr[0] & 0x0F) << 12;
|
||||
wchar |= (ptr[1] & 0x3F) << 6;
|
||||
wchar |= (ptr[2] & 0x3F);
|
||||
ptr += 2;
|
||||
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", wchar);
|
||||
qstring_append(str, escape);
|
||||
} else if ((ptr[0] & 0xE0) == 0xC0 && (ptr[1] & 0x80)) {
|
||||
uint16_t wchar;
|
||||
char escape[7];
|
||||
|
||||
wchar = (ptr[0] & 0x1F) << 6;
|
||||
wchar |= (ptr[1] & 0x3F);
|
||||
ptr++;
|
||||
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", wchar);
|
||||
qstring_append(str, escape);
|
||||
} else switch (ptr[0]) {
|
||||
for (; *ptr; ptr = end) {
|
||||
cp = mod_utf8_codepoint(ptr, 6, &end);
|
||||
switch (cp) {
|
||||
case '\"':
|
||||
qstring_append(str, "\\\"");
|
||||
break;
|
||||
@ -184,20 +167,25 @@ static void to_json(const QObject *obj, QString *str, int pretty, int indent)
|
||||
case '\t':
|
||||
qstring_append(str, "\\t");
|
||||
break;
|
||||
default: {
|
||||
if (ptr[0] <= 0x1F) {
|
||||
char escape[7];
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", ptr[0]);
|
||||
qstring_append(str, escape);
|
||||
default:
|
||||
if (cp < 0) {
|
||||
cp = 0xFFFD; /* replacement character */
|
||||
}
|
||||
if (cp > 0xFFFF) {
|
||||
/* beyond BMP; need a surrogate pair */
|
||||
snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
|
||||
0xD800 + ((cp - 0x10000) >> 10),
|
||||
0xDC00 + ((cp - 0x10000) & 0x3FF));
|
||||
} else if (cp < 0x20 || cp >= 0x7F) {
|
||||
snprintf(buf, sizeof(buf), "\\u%04X", cp);
|
||||
} else {
|
||||
char buf[2] = { ptr[0], 0 };
|
||||
buf[0] = cp;
|
||||
buf[1] = 0;
|
||||
}
|
||||
qstring_append(str, buf);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
};
|
||||
|
||||
qstring_append(str, "\"");
|
||||
break;
|
||||
}
|
||||
|
@ -144,13 +144,10 @@ static void utf8_string(void)
|
||||
* The JSON parser rejects some invalid sequences, but accepts
|
||||
* others without correcting the problem.
|
||||
*
|
||||
* The JSON formatter replaces some invalid sequences by U+FFFF (a
|
||||
* noncharacter), and goes wonky for others.
|
||||
*
|
||||
* For both directions, we should either reject all invalid
|
||||
* sequences, or minimize overlong sequences and replace all other
|
||||
* invalid sequences by a suitable replacement character. A
|
||||
* common choice for replacement is U+FFFD.
|
||||
* We should either reject all invalid sequences, or minimize
|
||||
* overlong sequences and replace all other invalid sequences by a
|
||||
* suitable replacement character. A common choice for
|
||||
* replacement is U+FFFD.
|
||||
*
|
||||
* Problem: we can't easily deal with embedded U+0000. Parsing
|
||||
* the JSON string "this \\u0000" is fun" yields "this \0 is fun",
|
||||
@ -175,16 +172,10 @@ static void utf8_string(void)
|
||||
* - bug: rejected
|
||||
* JSON parser rejects invalid sequence(s)
|
||||
* We may choose to define this as feature
|
||||
* - bug: want "\"...\""
|
||||
* JSON formatter produces incorrect result, this is the
|
||||
* correct one, assuming replacement character U+FFFF
|
||||
* - bug: want "..." (no \")
|
||||
* - bug: want "..."
|
||||
* JSON parser produces incorrect result, this is the
|
||||
* correct one, assuming replacement character U+FFFF
|
||||
* We may choose to reject instead of replace
|
||||
* Not marked explicitly, but trivial to find:
|
||||
* - JSON formatter replacing invalid sequence by \\uFFFF is a
|
||||
* bug if we want it to fail for invalid sequences.
|
||||
*/
|
||||
|
||||
/* 1 Some correct UTF-8 text */
|
||||
@ -209,7 +200,8 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\\u0000\"",
|
||||
"", /* bug: want overlong "\xC0\x80" */
|
||||
"\"\"", /* bug: want "\"\\u0000\"" */
|
||||
"\"\\u0000\"",
|
||||
"\xC0\x80",
|
||||
},
|
||||
/* 2.1.2 2 bytes U+0080 */
|
||||
{
|
||||
@ -227,20 +219,20 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\xF0\x90\x80\x80\"",
|
||||
"\xF0\x90\x80\x80",
|
||||
"\"\\u0400\\uFFFF\"", /* bug: want "\"\\uD800\\uDC00\"" */
|
||||
"\"\\uD800\\uDC00\"",
|
||||
},
|
||||
/* 2.1.5 5 bytes U+200000 */
|
||||
{
|
||||
"\"\xF8\x88\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u8200\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF8\x88\x80\x80\x80",
|
||||
},
|
||||
/* 2.1.6 6 bytes U+4000000 */
|
||||
{
|
||||
"\"\xFC\x84\x80\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uC100\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFC\x84\x80\x80\x80\x80",
|
||||
},
|
||||
/* 2.2 Last possible sequence of a certain length */
|
||||
@ -248,7 +240,7 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\x7F\"",
|
||||
"\x7F",
|
||||
"\"\177\"",
|
||||
"\"\\u007F\"",
|
||||
},
|
||||
/* 2.2.2 2 bytes U+07FF */
|
||||
{
|
||||
@ -274,21 +266,21 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\xF7\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u7FFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF7\xBF\xBF\xBF",
|
||||
},
|
||||
/* 2.2.5 5 bytes U+3FFFFFF */
|
||||
{
|
||||
"\"\xFB\xBF\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uBFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFB\xBF\xBF\xBF\xBF",
|
||||
},
|
||||
/* 2.2.6 6 bytes U+7FFFFFFF */
|
||||
{
|
||||
"\"\xFD\xBF\xBF\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uDFFF\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFD\xBF\xBF\xBF\xBF\xBF",
|
||||
},
|
||||
/* 2.3 Other boundary conditions */
|
||||
@ -314,13 +306,13 @@ static void utf8_string(void)
|
||||
/* last one in last plane: U+10FFFD */
|
||||
"\"\xF4\x8F\xBF\xBD\"",
|
||||
"\xF4\x8F\xBF\xBD",
|
||||
"\"\\u43FF\\uFFFF\"", /* bug: want "\"\\uDBFF\\uDFFD\"" */
|
||||
"\"\\uDBFF\\uDFFD\""
|
||||
},
|
||||
{
|
||||
/* first one beyond Unicode range: U+110000 */
|
||||
"\"\xF4\x90\x80\x80\"",
|
||||
"\xF4\x90\x80\x80",
|
||||
"\"\\u4400\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3 Malformed sequences */
|
||||
/* 3.1 Unexpected continuation bytes */
|
||||
@ -328,49 +320,49 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\x80\"",
|
||||
"\x80", /* bug: not corrected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.2 Last continuation byte */
|
||||
{
|
||||
"\"\xBF\"",
|
||||
"\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.3 2 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\"",
|
||||
"\x80\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.4 3 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\x80\"",
|
||||
"\x80\xBF\x80", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.5 4 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\x80\xBF\"",
|
||||
"\x80\xBF\x80\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.6 5 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\x80\xBF\x80\"",
|
||||
"\x80\xBF\x80\xBF\x80", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.7 6 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\x80\xBF\x80\xBF\"",
|
||||
"\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.8 7 continuation bytes */
|
||||
{
|
||||
"\"\x80\xBF\x80\xBF\x80\xBF\x80\"",
|
||||
"\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 3.1.9 Sequence of all 64 possible continuation bytes */
|
||||
{
|
||||
@ -391,14 +383,14 @@ static void utf8_string(void)
|
||||
"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
|
||||
"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
|
||||
"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
|
||||
"\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\""
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\""
|
||||
},
|
||||
/* 3.2 Lonely start characters */
|
||||
/* 3.2.1 All 32 first bytes of 2-byte sequences, followed by space */
|
||||
@ -408,10 +400,10 @@ static void utf8_string(void)
|
||||
"\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
|
||||
"\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF \"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
|
||||
"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
|
||||
"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
|
||||
"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
|
||||
"\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
|
||||
"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
|
||||
"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
|
||||
"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
|
||||
"\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
|
||||
"\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
|
||||
"\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
|
||||
@ -424,28 +416,28 @@ static void utf8_string(void)
|
||||
/* bug: not corrected */
|
||||
"\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
|
||||
"\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
|
||||
"\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
|
||||
"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
|
||||
"\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
|
||||
"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
|
||||
},
|
||||
/* 3.2.3 All 8 first bytes of 4-byte sequences, followed by space */
|
||||
{
|
||||
"\"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 \"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
|
||||
"\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
|
||||
"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
|
||||
},
|
||||
/* 3.2.4 All 4 first bytes of 5-byte sequences, followed by space */
|
||||
{
|
||||
"\"\xF8 \xF9 \xFA \xFB \"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
|
||||
"\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
|
||||
"\xF8 \xF9 \xFA \xFB ",
|
||||
},
|
||||
/* 3.2.5 All 2 first bytes of 6-byte sequences, followed by space */
|
||||
{
|
||||
"\"\xFC \xFD \"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF \\uFFFF \"",
|
||||
"\"\\uFFFD \\uFFFD \"",
|
||||
"\xFC \xFD ",
|
||||
},
|
||||
/* 3.3 Sequences with last continuation byte missing */
|
||||
@ -453,66 +445,66 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\xC0\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
"\xC0",
|
||||
},
|
||||
/* 3.3.2 3-byte sequence with last byte missing (U+0000) */
|
||||
{
|
||||
"\"\xE0\x80\"",
|
||||
"\xE0\x80", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.3.3 4-byte sequence with last byte missing (U+0000) */
|
||||
{
|
||||
"\"\xF0\x80\x80\"",
|
||||
"\xF0\x80\x80", /* bug: not corrected */
|
||||
"\"\\u0000\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.3.4 5-byte sequence with last byte missing (U+0000) */
|
||||
{
|
||||
"\"\xF8\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u8000\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF8\x80\x80\x80",
|
||||
},
|
||||
/* 3.3.5 6-byte sequence with last byte missing (U+0000) */
|
||||
{
|
||||
"\"\xFC\x80\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uC000\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFC\x80\x80\x80\x80",
|
||||
},
|
||||
/* 3.3.6 2-byte sequence with last byte missing (U+07FF) */
|
||||
{
|
||||
"\"\xDF\"",
|
||||
"\xDF", /* bug: not corrected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.3.7 3-byte sequence with last byte missing (U+FFFF) */
|
||||
{
|
||||
"\"\xEF\xBF\"",
|
||||
"\xEF\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 3.3.8 4-byte sequence with last byte missing (U+1FFFFF) */
|
||||
{
|
||||
"\"\xF7\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u7FFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF7\xBF\xBF",
|
||||
},
|
||||
/* 3.3.9 5-byte sequence with last byte missing (U+3FFFFFF) */
|
||||
{
|
||||
"\"\xFB\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uBFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFB\xBF\xBF\xBF",
|
||||
},
|
||||
/* 3.3.10 6-byte sequence with last byte missing (U+7FFFFFFF) */
|
||||
{
|
||||
"\"\xFD\xBF\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uDFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"", */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFD\xBF\xBF\xBF\xBF",
|
||||
},
|
||||
/* 3.4 Concatenation of incomplete sequences */
|
||||
@ -520,10 +512,8 @@ static void utf8_string(void)
|
||||
"\"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
|
||||
"\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
/* bug: want "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
|
||||
"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"" */
|
||||
"\"\\u0020\\uFFFF\\u0000\\u8000\\uFFFF\\uC000\\uFFFF\\uFFFF"
|
||||
"\\u07EF\\uFFFF\\u7FFF\\uBFFF\\uFFFF\\uDFFF\\uFFFF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
|
||||
"\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
|
||||
},
|
||||
@ -531,20 +521,19 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\xFE\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
"\xFE",
|
||||
},
|
||||
{
|
||||
"\"\xFF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uFFFF\"",
|
||||
"\"\\uFFFD\"",
|
||||
"\xFF",
|
||||
},
|
||||
{
|
||||
"\"\xFE\xFE\xFF\xFF\"",
|
||||
NULL, /* bug: rejected */
|
||||
/* bug: want "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uEFBF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
"\xFE\xFE\xFF\xFF",
|
||||
},
|
||||
/* 4 Overlong sequences */
|
||||
@ -552,29 +541,29 @@ static void utf8_string(void)
|
||||
{
|
||||
"\"\xC0\xAF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u002F\"", /* bug: want "\"/\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xC0\xAF",
|
||||
},
|
||||
{
|
||||
"\"\xE0\x80\xAF\"",
|
||||
"\xE0\x80\xAF", /* bug: not corrected */
|
||||
"\"\\u002F\"", /* bug: want "\"/\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
"\"\xF0\x80\x80\xAF\"",
|
||||
"\xF0\x80\x80\xAF", /* bug: not corrected */
|
||||
"\"\\u0000\\uFFFF\"" /* bug: want "\"/\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
"\"\xF8\x80\x80\x80\xAF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u8000\\uFFFF\\uFFFF\"", /* bug: want "\"/\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF8\x80\x80\x80\xAF",
|
||||
},
|
||||
{
|
||||
"\"\xFC\x80\x80\x80\x80\xAF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uC000\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"/\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFC\x80\x80\x80\x80\xAF",
|
||||
},
|
||||
/*
|
||||
@ -587,14 +576,14 @@ static void utf8_string(void)
|
||||
/* \U+007F */
|
||||
"\"\xC1\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u007F\"", /* bug: want "\"\177\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xC1\xBF",
|
||||
},
|
||||
{
|
||||
/* \U+07FF */
|
||||
"\"\xE0\x9F\xBF\"",
|
||||
"\xE0\x9F\xBF", /* bug: not corrected */
|
||||
"\"\\u07FF\"",
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/*
|
||||
@ -605,20 +594,20 @@ static void utf8_string(void)
|
||||
*/
|
||||
"\"\xF0\x8F\xBF\xBC\"",
|
||||
"\xF0\x8F\xBF\xBC", /* bug: not corrected */
|
||||
"\"\\u03FF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+1FFFFF */
|
||||
"\"\xF8\x87\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u81FF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF8\x87\xBF\xBF\xBF",
|
||||
},
|
||||
{
|
||||
/* \U+3FFFFFF */
|
||||
"\"\xFC\x83\xBF\xBF\xBF\xBF\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uC0FF\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFC\x83\xBF\xBF\xBF\xBF",
|
||||
},
|
||||
/* 4.3 Overlong representation of the NUL character */
|
||||
@ -633,26 +622,26 @@ static void utf8_string(void)
|
||||
/* \U+0000 */
|
||||
"\"\xE0\x80\x80\"",
|
||||
"\xE0\x80\x80", /* bug: not corrected */
|
||||
"\"\\u0000\"",
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+0000 */
|
||||
"\"\xF0\x80\x80\x80\"",
|
||||
"\xF0\x80\x80\x80", /* bug: not corrected */
|
||||
"\"\\u0000\\uFFFF\"", /* bug: want "\"\\u0000\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+0000 */
|
||||
"\"\xF8\x80\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\u8000\\uFFFF\\uFFFF\"", /* bug: want "\"\\u0000\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xF8\x80\x80\x80\x80",
|
||||
},
|
||||
{
|
||||
/* \U+0000 */
|
||||
"\"\xFC\x80\x80\x80\x80\x80\"",
|
||||
NULL, /* bug: rejected */
|
||||
"\"\\uC000\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\u0000\"" */
|
||||
"\"\\uFFFD\"",
|
||||
"\xFC\x80\x80\x80\x80\x80",
|
||||
},
|
||||
/* 5 Illegal code positions */
|
||||
@ -661,92 +650,92 @@ static void utf8_string(void)
|
||||
/* \U+D800 */
|
||||
"\"\xED\xA0\x80\"",
|
||||
"\xED\xA0\x80", /* bug: not corrected */
|
||||
"\"\\uD800\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB7F */
|
||||
"\"\xED\xAD\xBF\"",
|
||||
"\xED\xAD\xBF", /* bug: not corrected */
|
||||
"\"\\uDB7F\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB80 */
|
||||
"\"\xED\xAE\x80\"",
|
||||
"\xED\xAE\x80", /* bug: not corrected */
|
||||
"\"\\uDB80\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DBFF */
|
||||
"\"\xED\xAF\xBF\"",
|
||||
"\xED\xAF\xBF", /* bug: not corrected */
|
||||
"\"\\uDBFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DC00 */
|
||||
"\"\xED\xB0\x80\"",
|
||||
"\xED\xB0\x80", /* bug: not corrected */
|
||||
"\"\\uDC00\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DF80 */
|
||||
"\"\xED\xBE\x80\"",
|
||||
"\xED\xBE\x80", /* bug: not corrected */
|
||||
"\"\\uDF80\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DFFF */
|
||||
"\"\xED\xBF\xBF\"",
|
||||
"\xED\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uDFFF\"", /* bug: want "\"\\uFFFF\"" */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* 5.2 Paired UTF-16 surrogates */
|
||||
{
|
||||
/* \U+D800\U+DC00 */
|
||||
"\"\xED\xA0\x80\xED\xB0\x80\"",
|
||||
"\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */
|
||||
"\"\\uD800\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+D800\U+DFFF */
|
||||
"\"\xED\xA0\x80\xED\xBF\xBF\"",
|
||||
"\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uD800\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB7F\U+DC00 */
|
||||
"\"\xED\xAD\xBF\xED\xB0\x80\"",
|
||||
"\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */
|
||||
"\"\\uDB7F\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB7F\U+DFFF */
|
||||
"\"\xED\xAD\xBF\xED\xBF\xBF\"",
|
||||
"\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uDB7F\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB80\U+DC00 */
|
||||
"\"\xED\xAE\x80\xED\xB0\x80\"",
|
||||
"\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */
|
||||
"\"\\uDB80\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DB80\U+DFFF */
|
||||
"\"\xED\xAE\x80\xED\xBF\xBF\"",
|
||||
"\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uDB80\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DBFF\U+DC00 */
|
||||
"\"\xED\xAF\xBF\xED\xB0\x80\"",
|
||||
"\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */
|
||||
"\"\\uDBFF\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+DBFF\U+DFFF */
|
||||
"\"\xED\xAF\xBF\xED\xBF\xBF\"",
|
||||
"\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uDBFF\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
|
||||
"\"\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
/* 5.3 Other illegal code positions */
|
||||
/* BMP noncharacters */
|
||||
@ -754,25 +743,25 @@ static void utf8_string(void)
|
||||
/* \U+FFFE */
|
||||
"\"\xEF\xBF\xBE\"",
|
||||
"\xEF\xBF\xBE", /* bug: not corrected */
|
||||
"\"\\uFFFE\"", /* bug: not corrected */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* \U+FFFF */
|
||||
"\"\xEF\xBF\xBF\"",
|
||||
"\xEF\xBF\xBF", /* bug: not corrected */
|
||||
"\"\\uFFFF\"", /* bug: not corrected */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* U+FDD0 */
|
||||
"\"\xEF\xB7\x90\"",
|
||||
"\xEF\xB7\x90", /* bug: not corrected */
|
||||
"\"\\uFDD0\"", /* bug: not corrected */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
{
|
||||
/* U+FDEF */
|
||||
"\"\xEF\xB7\xAF\"",
|
||||
"\xEF\xB7\xAF", /* bug: not corrected */
|
||||
"\"\\uFDEF\"", /* bug: not corrected */
|
||||
"\"\\uFFFD\"",
|
||||
},
|
||||
/* Plane 1 .. 16 noncharacters */
|
||||
{
|
||||
@ -810,15 +799,10 @@ static void utf8_string(void)
|
||||
"\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
|
||||
"\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
|
||||
"\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
|
||||
/* bug: not corrected */
|
||||
"\"\\u07FF\\uFFFF\\u07FF\\uFFFF\\u0BFF\\uFFFF\\u0BFF\\uFFFF"
|
||||
"\\u0FFF\\uFFFF\\u0FFF\\uFFFF\\u13FF\\uFFFF\\u13FF\\uFFFF"
|
||||
"\\u17FF\\uFFFF\\u17FF\\uFFFF\\u1BFF\\uFFFF\\u1BFF\\uFFFF"
|
||||
"\\u1FFF\\uFFFF\\u1FFF\\uFFFF\\u23FF\\uFFFF\\u23FF\\uFFFF"
|
||||
"\\u27FF\\uFFFF\\u27FF\\uFFFF\\u2BFF\\uFFFF\\u2BFF\\uFFFF"
|
||||
"\\u2FFF\\uFFFF\\u2FFF\\uFFFF\\u33FF\\uFFFF\\u33FF\\uFFFF"
|
||||
"\\u37FF\\uFFFF\\u37FF\\uFFFF\\u3BFF\\uFFFF\\u3BFF\\uFFFF"
|
||||
"\\u3FFF\\uFFFF\\u3FFF\\uFFFF\\u43FF\\uFFFF\\u43FF\\uFFFF\"",
|
||||
"\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
|
||||
"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
|
||||
},
|
||||
{}
|
||||
};
|
||||
@ -856,8 +840,8 @@ static void utf8_string(void)
|
||||
qobject_decref(obj);
|
||||
|
||||
/*
|
||||
* Disabled, because json_out currently contains the crap
|
||||
* qobject_to_json() produces.
|
||||
* Disabled, because qobject_from_json() is buggy, and I can't
|
||||
* be bothered to add the expected incorrect results.
|
||||
* FIXME Enable once these bugs have been fixed.
|
||||
*/
|
||||
if (0 && json_out != json_in) {
|
||||
|
Loading…
Reference in New Issue
Block a user